[Mlir-commits] [mlir] [MLIR][Linalg] pack, unpack to take memref inputs (PR #129036)
Hyunsung Lee
llvmlistbot at llvm.org
Sat Apr 12 21:44:08 PDT 2025
https://github.com/ita9naiwa updated https://github.com/llvm/llvm-project/pull/129036
>From 4d523adc3cf5eb581c43395e66aaa0012dbc179b Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <ita9naiwa at gmail.com>
Date: Thu, 27 Feb 2025 19:54:30 +0900
Subject: [PATCH 01/32] draft
---
.../Dialect/Linalg/IR/LinalgRelayoutOps.td | 72 +++++++++++++++++--
.../Dialect/Linalg/IR/RelayoutOpInterface.td | 1 +
.../mlir/Dialect/Utils/ReshapeOpsUtils.h | 4 +-
mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp | 2 +-
4 files changed, 69 insertions(+), 10 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
index 1e48a5e3a20ee..f8a4657c564ce 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
@@ -77,7 +77,20 @@ class Linalg_RelayoutOp<string mnemonic, list<Trait> traits = []> :
/// with `inner_dims_pos` rather than the packed tensor.
SmallVector<int64_t> getTiledOuterDims();
}];
-
+ let extraClassDeclaration = commonExtraClassDeclaration # [{
+ ShapedType getInputType() {
+ return cast<ShapedType>(getInput().getType());
+ }
+ ShapedType getOutputType() {
+ return cast<ShapedType>(getOutput().getType());
+ }
+ int64_t getInputRank() {
+ return getInputType().getRank();
+ }
+ int64_t getOutputRank() {
+ return getOutputType().getRank();
+ }
+ }];
let hasVerifier = 1;
}
@@ -152,14 +165,14 @@ def Linalg_PackOp : Linalg_RelayoutOp<"pack", [
// Note: Only tiled dimensions can be padded.
```
}];
- let arguments = (ins AnyRankedTensor:$source,
- AnyRankedTensor:$dest,
+ let arguments = (ins AnyShaped:$source,
+ AnyShaped:$dest,
Optional<AnyType>:$padding_value,
DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$outer_dims_perm,
DenseI64ArrayAttr:$inner_dims_pos,
Variadic<Index>:$inner_tiles,
DenseI64ArrayAttr:$static_inner_tiles);
- let results = (outs AnyRankedTensor:$result);
+ let results = (outs AnyShaped:$result);
let assemblyFormat = [{
$source
(`padding_value` `(` $padding_value^ `:` type($padding_value) `)`)?
@@ -179,6 +192,28 @@ def Linalg_PackOp : Linalg_RelayoutOp<"pack", [
];
let extraClassDeclaration = commonExtraClassDeclaration # [{
+ Value getOutput() {
+ return getDpsInitOperand(0)->get();
+ }
+
+ // Return the input operand.
+ Value getInput() {
+ return getDpsInputOperand(0)->get();
+ }
+ ShapedType getInputType() {
+ return cast<ShapedType>(getInput().getType());
+ }
+ ShapedType getOutputType() {
+ return cast<ShapedType>(getDest().getType());
+ }
+ int64_t getInputRank() {
+ return getInputType().getRank();
+ }
+ int64_t getOutputRank() {
+ return getOutputType().getRank();
+ }
+
+ LogicalResult generateScalarImplementation(OpBuilder &builder, Location loc, ValueRange ivs);
// Method to get the shape of the result as `SmallVector<OpFoldResult>`.
// This is a static method to allow getting the shape of the destination
// expected while creating a `pack` op.
@@ -229,6 +264,7 @@ def Linalg_PackOp : Linalg_RelayoutOp<"pack", [
/// 2. pads the other ones, and
/// 3. doesn't shuffle the dimensions
bool isLikePad();
+
}];
let hasCanonicalizeMethod = 1;
@@ -279,13 +315,13 @@ def Linalg_UnPackOp : Linalg_RelayoutOp<"unpack"> {
: tensor<8x16x8x32xf32> -> tensor<128x256xf32>
```
}];
- let arguments = (ins AnyRankedTensor:$source,
- AnyRankedTensor:$dest,
+ let arguments = (ins AnyShaped:$source,
+ AnyShaped:$dest,
DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$outer_dims_perm,
DenseI64ArrayAttr:$inner_dims_pos,
Variadic<Index>:$inner_tiles,
DenseI64ArrayAttr:$static_inner_tiles);
- let results = (outs AnyRankedTensor:$result);
+ let results = (outs AnyShaped:$result);
let assemblyFormat = [{
$source
(`outer_dims_perm` `=` $outer_dims_perm^)?
@@ -303,6 +339,28 @@ def Linalg_UnPackOp : Linalg_RelayoutOp<"unpack"> {
];
let extraClassDeclaration = commonExtraClassDeclaration # [{
+ Value getOutput() {
+ return getDpsInitOperand(0)->get();
+ }
+
+ // Return the input operand.
+ Value getInput() {
+ return getDpsInputOperand(0)->get();
+ }
+ ShapedType getInputType() {
+ return cast<ShapedType>(getInput().getType());
+ }
+ ShapedType getOutputType() {
+ return cast<ShapedType>(getDest().getType()); // getDest() 사용
+ }
+ int64_t getInputRank() {
+ return getInputType().getRank();
+ }
+ int64_t getOutputRank() {
+ return getOutputType().getRank();
+ }
+ LogicalResult generateScalarImplementation(OpBuilder &builder, Location loc, ValueRange ivs);
+
static Value createDestinationTensor(OpBuilder &b, Location loc,
Value source, ArrayRef<OpFoldResult> innerTileSizes,
ArrayRef<int64_t> innerDimsPos, ArrayRef<int64_t> outerDimsPerm);
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/RelayoutOpInterface.td b/mlir/include/mlir/Dialect/Linalg/IR/RelayoutOpInterface.td
index 2dec2fc4396f4..467d862d277eb 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/RelayoutOpInterface.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/RelayoutOpInterface.td
@@ -10,6 +10,7 @@
#define LINALG_IR_RELAYOUTOPINTERFACE
include "mlir/Interfaces/DestinationStyleOpInterface.td"
+include "mlir/Dialect/Linalg/IR/LinalgInterfaces.td"
include "mlir/IR/OpBase.td"
def LinalgRelayoutOpInterface : OpInterface<"RelayoutOpInterface"> {
diff --git a/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h b/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h
index 3af89a6ab3799..a86bf74a7b6a1 100644
--- a/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h
+++ b/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h
@@ -451,7 +451,7 @@ getLinearizedDimensions(ArrayRef<ReassociationIndices> reassociationIndices);
/// %4 = tensor.extract_slice %0 [%3#0, %3#1, %3#2, 0] [1, 1, 1, 10] [1, 1, 1, 1] :
/// tensor<3x7x11x10xf32> to tensor<1x1x1x10xf32>
///
-/// %5 = tensor.collapse_shape %4 [[0, 1, 2], [3]] :
+/// %5 = tensor.collapse_shape %4 [[0, 1, 2], [3]] :
/// tensor<1x1x1x10xf32> into tensor<1x10xf32>
/// %6 = tensor.insert_slice %5 into %arg0 [%iv, 0] [1, 10] [1, 1] :
/// tensor<1x10xf32> into tensor<10x10xf32>
@@ -573,7 +573,7 @@ PackingMetadata computePackingMetadata(int64_t packedRank,
/// Removes the op and replaces the constant with a new constant of the result
/// shape. When an optional cst attribute is passed, it is reshaped only if the
/// splat value matches the value in the attribute.
-OpFoldResult reshapeConstantSource(DenseElementsAttr source, TensorType result,
+OpFoldResult reshapeConstantSource(DenseElementsAttr source, ShapedType result,
std::optional<Attribute> cst = std::nullopt);
} // namespace mlir
diff --git a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp
index 0336423c57b1d..4267732571801 100644
--- a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp
+++ b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp
@@ -485,7 +485,7 @@ PackingMetadata mlir::computePackingMetadata(int64_t packedRank,
}
OpFoldResult mlir::reshapeConstantSource(DenseElementsAttr source,
- TensorType result,
+ ShapedType result,
std::optional<Attribute> cst) {
if (source && source.isSplat() && result.hasStaticShape() &&
(!cst.has_value() || source.getSplatValue<Attribute>() == cst.value()))
>From 4f2dbf4848092942a7932387e39d3c1220d78923 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <ita9naiwa at gmail.com>
Date: Fri, 28 Feb 2025 08:00:32 +0900
Subject: [PATCH 02/32] draft
---
.../Dialect/Linalg/IR/LinalgRelayoutOps.td | 44 -------------------
1 file changed, 44 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
index f8a4657c564ce..6e2c6171132f5 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
@@ -192,28 +192,6 @@ def Linalg_PackOp : Linalg_RelayoutOp<"pack", [
];
let extraClassDeclaration = commonExtraClassDeclaration # [{
- Value getOutput() {
- return getDpsInitOperand(0)->get();
- }
-
- // Return the input operand.
- Value getInput() {
- return getDpsInputOperand(0)->get();
- }
- ShapedType getInputType() {
- return cast<ShapedType>(getInput().getType());
- }
- ShapedType getOutputType() {
- return cast<ShapedType>(getDest().getType());
- }
- int64_t getInputRank() {
- return getInputType().getRank();
- }
- int64_t getOutputRank() {
- return getOutputType().getRank();
- }
-
- LogicalResult generateScalarImplementation(OpBuilder &builder, Location loc, ValueRange ivs);
// Method to get the shape of the result as `SmallVector<OpFoldResult>`.
// This is a static method to allow getting the shape of the destination
// expected while creating a `pack` op.
@@ -339,28 +317,6 @@ def Linalg_UnPackOp : Linalg_RelayoutOp<"unpack"> {
];
let extraClassDeclaration = commonExtraClassDeclaration # [{
- Value getOutput() {
- return getDpsInitOperand(0)->get();
- }
-
- // Return the input operand.
- Value getInput() {
- return getDpsInputOperand(0)->get();
- }
- ShapedType getInputType() {
- return cast<ShapedType>(getInput().getType());
- }
- ShapedType getOutputType() {
- return cast<ShapedType>(getDest().getType()); // getDest() 사용
- }
- int64_t getInputRank() {
- return getInputType().getRank();
- }
- int64_t getOutputRank() {
- return getOutputType().getRank();
- }
- LogicalResult generateScalarImplementation(OpBuilder &builder, Location loc, ValueRange ivs);
-
static Value createDestinationTensor(OpBuilder &b, Location loc,
Value source, ArrayRef<OpFoldResult> innerTileSizes,
ArrayRef<int64_t> innerDimsPos, ArrayRef<int64_t> outerDimsPerm);
>From 226230c9445084671531d755d5c3f5612bed7d67 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <ita9naiwa at gmail.com>
Date: Fri, 28 Feb 2025 08:01:05 +0900
Subject: [PATCH 03/32] draft
---
.../mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td | 15 +--------------
1 file changed, 1 insertion(+), 14 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
index 6e2c6171132f5..c68c395fc6337 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
@@ -77,20 +77,7 @@ class Linalg_RelayoutOp<string mnemonic, list<Trait> traits = []> :
/// with `inner_dims_pos` rather than the packed tensor.
SmallVector<int64_t> getTiledOuterDims();
}];
- let extraClassDeclaration = commonExtraClassDeclaration # [{
- ShapedType getInputType() {
- return cast<ShapedType>(getInput().getType());
- }
- ShapedType getOutputType() {
- return cast<ShapedType>(getOutput().getType());
- }
- int64_t getInputRank() {
- return getInputType().getRank();
- }
- int64_t getOutputRank() {
- return getOutputType().getRank();
- }
- }];
+
let hasVerifier = 1;
}
>From 0c184dfc85cdb0d89d62aa8cafc4f752e1acc654 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <ita9naiwa at gmail.com>
Date: Fri, 28 Feb 2025 09:44:08 +0900
Subject: [PATCH 04/32] init
---
.../Dialect/Linalg/IR/LinalgRelayoutOps.td | 10 +++---
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 14 ++++-----
.../Transforms/PackAndUnpackPatterns.cpp | 12 +++----
.../Dialect/Linalg/Transforms/Transforms.cpp | 31 +++++++++++++++----
.../Linalg/Transforms/Vectorization.cpp | 2 +-
mlir/lib/Tools/mlir-opt/launch.json | 13 ++++++++
6 files changed, 57 insertions(+), 25 deletions(-)
create mode 100644 mlir/lib/Tools/mlir-opt/launch.json
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
index c68c395fc6337..785c7cc924159 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
@@ -43,10 +43,10 @@ class Linalg_RelayoutOp<string mnemonic, list<Trait> traits = []> :
code commonExtraClassDeclaration = [{
size_t getSourceRank() { return getSourceType().getRank(); };
size_t getDestRank() { return getDestType().getRank(); };
- RankedTensorType getSourceType() {
- return ::llvm::cast<RankedTensorType>(getSource().getType()); };
- RankedTensorType getDestType() {
- return ::llvm::cast<RankedTensorType>(getDest().getType()); };
+ ShapedType getSourceType() {
+ return ::llvm::cast<ShapedType>(getSource().getType()); };
+ ShapedType getDestType() {
+ return ::llvm::cast<ShapedType>(getDest().getType()); };
MutableOperandRange getDpsInitsMutable() { return getDestMutable(); }
@@ -190,7 +190,7 @@ def Linalg_PackOp : Linalg_RelayoutOp<"pack", [
// Method to get the `RankedTensorType` of the result based on the inner
// tiles, position of the inner tiles (innerDimsPos) and interchange vector
// of outer loops (outerDimsPerm).
- static RankedTensorType inferPackedType(RankedTensorType sourceType,
+ static RankedTensorType inferPackedType(ShapedType sourceType,
ArrayRef<int64_t> innerTileSizes, ArrayRef<int64_t> innerDimsPos,
ArrayRef<int64_t> outerDimsPerm = {});
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index 07b19e5cb1a89..f4f08d9d4acf7 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -803,7 +803,7 @@ struct FoldInsertPadIntoFill : public OpRewritePattern<tensor::InsertSliceOp> {
rewriter, loc, addMap, {std::get<0>(p), std::get<1>(p)}));
}
- RankedTensorType srcPadType = srcPadOp.getSourceType();
+ ShapedType srcPadType = srcPadOp.getSourceType();
SmallVector<OpFoldResult, 4> newSizes;
for (int i = 0, e = srcPadType.getRank(); i < e; ++i) {
if (srcPadType.isDynamicDim(i)) {
@@ -4433,7 +4433,7 @@ static LogicalResult commonVerifierPackAndUnPackOp(OpTy packOrUnPack) {
return op->emitError("invalid zero tile factor");
// Verify inner_dims_pos and outer_dims_perm.
- RankedTensorType unpackedType = (std::is_same<OpTy, PackOp>::value)
+ ShapedType unpackedType = (std::is_same<OpTy, PackOp>::value)
? packOrUnPack.getSourceType()
: packOrUnPack.getDestType();
size_t unpackedRank = unpackedType.getRank();
@@ -4747,7 +4747,7 @@ SmallVector<OpFoldResult> PackOp::getResultShape(
/// Get the expected packed type based on source type, tile factors, position of
/// the inner tiles and permutation of the outer tiled loop.
-RankedTensorType PackOp::inferPackedType(RankedTensorType sourceType,
+RankedTensorType PackOp::inferPackedType(ShapedType sourceType,
ArrayRef<int64_t> innerTileSizes,
ArrayRef<int64_t> innerDimsPos,
ArrayRef<int64_t> outerDimsPerm) {
@@ -4943,7 +4943,7 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
rewriter.create<tensor::CastOp>(loc, newSrcType, packOp.getSource());
}
Value dest = packOp.getDest();
- RankedTensorType originalResultType = packOp.getDestType();
+ ShapedType originalResultType = packOp.getDestType();
bool needUpdateDestType = (destShape != originalResultType.getShape());
if (needUpdateDestType) {
auto newDestType = packOp.getDestType().clone(destShape);
@@ -4953,7 +4953,7 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
rewriter.modifyOpInPlace(packOp, [&] {
packOp.getSourceMutable().assign(source);
packOp.getDestMutable().assign(dest);
- packOp.getResult().setType(cast<RankedTensorType>(dest.getType()));
+ packOp.getResult().setType(cast<ShapedType>(dest.getType()));
});
// Insert a cast if needed
if (needUpdateDestType) {
@@ -4970,7 +4970,7 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
template <typename PackOrUnpackOp>
static bool isLikePadUnPad(PackOrUnpackOp packOp,
- RankedTensorType packedTensorType) {
+ ShapedType packedTensorType) {
static_assert(std::is_same<PackOrUnpackOp, PackOp>::value ||
std::is_same<PackOrUnpackOp, UnPackOp>::value,
"Function meant for pack/unpack");
@@ -5274,7 +5274,7 @@ LogicalResult UnPackOp::canonicalize(UnPackOp unPackOp,
}
bool UnPackOp::isLikeUnPad() {
- RankedTensorType packedTensorType = getSourceType();
+ ShapedType packedTensorType = getSourceType();
return isLikePadUnPad(*this, packedTensorType);
}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/PackAndUnpackPatterns.cpp b/mlir/lib/Dialect/Linalg/Transforms/PackAndUnpackPatterns.cpp
index 0984b6988b93b..599aa3b6668df 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/PackAndUnpackPatterns.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/PackAndUnpackPatterns.cpp
@@ -111,7 +111,7 @@ struct SimplifyPackToExpandShape : public OpRewritePattern<PackOp> {
if (packOp.getPaddingValue())
return rewriter.notifyMatchFailure(packOp, "expects no padding value");
- RankedTensorType sourceType = packOp.getSourceType();
+ ShapedType sourceType = packOp.getSourceType();
if (failed(isPackOnInnerMostDim(rewriter, packOp)) &&
failed(isPackOn1D(rewriter, packOp, sourceType.getShape(),
packOp.getStaticTiles())) &&
@@ -119,7 +119,7 @@ struct SimplifyPackToExpandShape : public OpRewritePattern<PackOp> {
return failure();
}
- RankedTensorType destType = packOp.getDestType();
+ ShapedType destType = packOp.getDestType();
auto reassociation =
getReassociationIndicesForReshape(sourceType, destType);
if (!reassociation)
@@ -157,8 +157,8 @@ struct SimplifyUnPackToCollapseShape : public OpRewritePattern<UnPackOp> {
"expects outer_dims_perm is empty or an identity permutation");
}
- RankedTensorType sourceType = unpackOp.getSourceType();
- RankedTensorType destType = unpackOp.getDestType();
+ ShapedType sourceType = unpackOp.getSourceType();
+ ShapedType destType = unpackOp.getDestType();
if (!sourceType.hasStaticShape() || !destType.hasStaticShape())
return rewriter.notifyMatchFailure(unpackOp, "expects static shapes");
@@ -173,7 +173,7 @@ struct SimplifyUnPackToCollapseShape : public OpRewritePattern<UnPackOp> {
LogicalResult matchAndRewrite(UnPackOp unpackOp,
PatternRewriter &rewriter) const override {
- RankedTensorType destType = unpackOp.getDestType();
+ ShapedType destType = unpackOp.getDestType();
if (failed(isUnpackOnInnerMostDim(rewriter, unpackOp)) &&
failed(isPackOn1D(rewriter, unpackOp, destType.getShape(),
unpackOp.getStaticTiles())) &&
@@ -181,7 +181,7 @@ struct SimplifyUnPackToCollapseShape : public OpRewritePattern<UnPackOp> {
return failure();
}
- RankedTensorType sourceType = unpackOp.getSourceType();
+ ShapedType sourceType = unpackOp.getSourceType();
auto reassociation =
getReassociationIndicesForReshape(sourceType, destType);
if (!reassociation)
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index dcd50cc44f81b..7ed211841c53f 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -359,7 +359,7 @@ linalg::lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(unPackOp);
- RankedTensorType packedTensorType = unPackOp.getSourceType();
+ ShapedType packedTensorType = unPackOp.getSourceType();
int64_t packedRank = packedTensorType.getRank();
OpFoldResult zero = rewriter.getIndexAttr(0), one = rewriter.getIndexAttr(1);
@@ -396,10 +396,29 @@ linalg::lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
applyPermutationToVector(stripMinedShape, packedToStripMinedShapePerm);
// 3. Transpose packedShape to stripMinedShape.
- RankedTensorType stripMinedTensorType =
- RankedTensorType::Builder(packedTensorType).setShape(stripMinedShape);
- RankedTensorType collapsedType = tensor::CollapseShapeOp::inferCollapsedType(
- stripMinedTensorType, packingMetadata.reassociations);
+ ShapedType stripMinedType;
+ if (auto tensorType = packedTensorType.dyn_cast<TensorType>()) {
+ stripMinedType =
+ RankedTensorType::get(stripMinedShape, tensorType.getElementType());
+ } else if (auto memrefType = packedTensorType.dyn_cast<MemRefType>()) {
+ stripMinedType =
+ MemRefType::get(stripMinedShape, memrefType.getElementType());
+ }
+ ShapedType collapsedType;
+ if (stripMinedType.isa<TensorType>()) {
+ collapsedType = tensor::CollapseShapeOp::inferCollapsedType(
+ stripMinedType.cast<RankedTensorType>(),
+ packingMetadata.reassociations);
+ } else if (stripMinedType.isa<MemRefType>()) {
+ auto memrefTy = stripMinedType.cast<MemRefType>();
+ auto tensorTy =
+ RankedTensorType::get(memrefTy.getShape(), memrefTy.getElementType());
+ auto collapsedTensorType = tensor::CollapseShapeOp::inferCollapsedType(
+ tensorTy, packingMetadata.reassociations);
+ // tensor collapsed type을 memref로 재구성 (같은 메모리 공간 유지)
+ collapsedType = MemRefType::get(collapsedTensorType.getShape(),
+ collapsedTensorType.getElementType());
+ }
// Get dynamic dims from input tensor based on packedToStripMinedShapePerm
// permutation.
@@ -407,7 +426,7 @@ linalg::lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
tensor::getMixedSizes(rewriter, loc, unPackOp.getSource());
applyPermutationToVector(dims, packedToStripMinedShapePerm);
auto emptyOp = rewriter.create<tensor::EmptyOp>(
- loc, dims, stripMinedTensorType.getElementType());
+ loc, dims, stripMinedType.getElementType());
auto transposeOp = rewriter.create<linalg::TransposeOp>(
loc, unPackOp.getSource(), emptyOp, packedToStripMinedShapePerm);
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index ae04c2b6b2a5b..25ad5e38addbe 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -1669,7 +1669,7 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, linalg::UnPackOp unpackOp,
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(unpackOp);
- RankedTensorType unpackTensorType = unpackOp.getSourceType();
+ ShapedType unpackTensorType = unpackOp.getSourceType();
ArrayRef<int64_t> innerDimPos = unpackOp.getInnerDimsPos();
ArrayRef<int64_t> innerTiles = unpackOp.getStaticInnerTiles();
diff --git a/mlir/lib/Tools/mlir-opt/launch.json b/mlir/lib/Tools/mlir-opt/launch.json
new file mode 100644
index 0000000000000..5a686d02e2dfb
--- /dev/null
+++ b/mlir/lib/Tools/mlir-opt/launch.json
@@ -0,0 +1,13 @@
+{
+ "version": "0.2.0",
+ "configurations": [
+ {
+ "name": "ma",
+ "type": "lldb",
+ "request": "launch",
+ "program": "/Users/ita/src/iree-build/tools/iree-opt --show-dialects",
+ "args": [],
+ "cwd": "${workspaceFolder}"
+ }
+ ]
+}
>From 19201c69e23578a69583bb98415f9c9583cb5c41 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <ita9naiwa at gmail.com>
Date: Fri, 28 Feb 2025 14:50:46 +0900
Subject: [PATCH 05/32] lint
---
mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp | 1 -
mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp | 12 ++++++------
mlir/lib/Tools/mlir-opt/launch.json | 13 -------------
3 files changed, 6 insertions(+), 20 deletions(-)
delete mode 100644 mlir/lib/Tools/mlir-opt/launch.json
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 7ed211841c53f..36e01ef46b30b 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -415,7 +415,6 @@ linalg::lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
RankedTensorType::get(memrefTy.getShape(), memrefTy.getElementType());
auto collapsedTensorType = tensor::CollapseShapeOp::inferCollapsedType(
tensorTy, packingMetadata.reassociations);
- // tensor collapsed type을 memref로 재구성 (같은 메모리 공간 유지)
collapsedType = MemRefType::get(collapsedTensorType.getShape(),
collapsedTensorType.getElementType());
}
diff --git a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp
index 4267732571801..9a2bd3493f6af 100644
--- a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp
+++ b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp
@@ -315,11 +315,11 @@ SmallVector<Range> SliceFromCollapseHelper::getExtractSliceParams(
// have proven that these are not sliced. In this case we just take
// the full extent of each dimension in the reassociation list.
if (linearizedDimensions[it.index()]) {
- llvm::append_range(
- offsetsSizesAndStrides,
- llvm::map_range(it.value(), [&](int64_t idx) -> Range {
- return {zeroAttr, collapseShapeInputShape[idx], oneAttr};
- }));
+ llvm::append_range(offsetsSizesAndStrides,
+ llvm::map_range(it.value(), [&](int64_t idx) -> Range {
+ return {zeroAttr, collapseShapeInputShape[idx],
+ oneAttr};
+ }));
continue;
}
@@ -485,7 +485,7 @@ PackingMetadata mlir::computePackingMetadata(int64_t packedRank,
}
OpFoldResult mlir::reshapeConstantSource(DenseElementsAttr source,
- ShapedType result,
+ ShapedType result,
std::optional<Attribute> cst) {
if (source && source.isSplat() && result.hasStaticShape() &&
(!cst.has_value() || source.getSplatValue<Attribute>() == cst.value()))
diff --git a/mlir/lib/Tools/mlir-opt/launch.json b/mlir/lib/Tools/mlir-opt/launch.json
deleted file mode 100644
index 5a686d02e2dfb..0000000000000
--- a/mlir/lib/Tools/mlir-opt/launch.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
- "version": "0.2.0",
- "configurations": [
- {
- "name": "ma",
- "type": "lldb",
- "request": "launch",
- "program": "/Users/ita/src/iree-build/tools/iree-opt --show-dialects",
- "args": [],
- "cwd": "${workspaceFolder}"
- }
- ]
-}
>From b99b92030f2f664607f43554d2b7bc722c98c2c1 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <ita9naiwa at gmail.com>
Date: Fri, 28 Feb 2025 15:26:13 +0900
Subject: [PATCH 06/32] lint
---
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index f4f08d9d4acf7..eca8cea3e6323 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -4434,8 +4434,8 @@ static LogicalResult commonVerifierPackAndUnPackOp(OpTy packOrUnPack) {
// Verify inner_dims_pos and outer_dims_perm.
ShapedType unpackedType = (std::is_same<OpTy, PackOp>::value)
- ? packOrUnPack.getSourceType()
- : packOrUnPack.getDestType();
+ ? packOrUnPack.getSourceType()
+ : packOrUnPack.getDestType();
size_t unpackedRank = unpackedType.getRank();
ArrayRef<int64_t> innerDimsPos = packOrUnPack.getInnerDimsPos();
ArrayRef<int64_t> outerDimPerm = packOrUnPack.getOuterDimsPerm();
@@ -4969,8 +4969,7 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
}
template <typename PackOrUnpackOp>
-static bool isLikePadUnPad(PackOrUnpackOp packOp,
- ShapedType packedTensorType) {
+static bool isLikePadUnPad(PackOrUnpackOp packOp, ShapedType packedTensorType) {
static_assert(std::is_same<PackOrUnpackOp, PackOp>::value ||
std::is_same<PackOrUnpackOp, UnPackOp>::value,
"Function meant for pack/unpack");
>From be6a1193579633d7b678a30a9a80e5dee89a51e1 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <ita9naiwa at gmail.com>
Date: Fri, 28 Feb 2025 16:19:20 +0900
Subject: [PATCH 07/32] add
---
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index eca8cea3e6323..a19039fbca67d 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -5001,9 +5001,12 @@ static bool isLikePadUnPad(PackOrUnpackOp packOp, ShapedType packedTensorType) {
}
bool PackOp::isLikePad() {
- auto packedTensorType =
- llvm::cast<RankedTensorType>((*this)->getResultTypes().front());
- return isLikePadUnPad(*this, packedTensorType);
+ if (auto packedTensorType =
+ llvm::dyn_cast<RankedTensorType>((*this)->getResultTypes().front()))
+ return isLikePadUnPad(*this, packedTensorType);
+ if (auto packedTensorType =
+ llvm::dyn_cast<MemRefType>((*this)->getResultTypes().front()))
+ return isLikePadUnPad(*this, packedTensorType);
}
OpFoldResult PackOp::fold(FoldAdaptor adaptor) {
>From eee8805c351e7b8100d3e73d1e67c1c06e065962 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <ita9naiwa at gmail.com>
Date: Sat, 1 Mar 2025 09:04:26 +0900
Subject: [PATCH 08/32] remove tensor casting
---
.../mlir/Dialect/MemRef/IR/MemRefOps.td | 5 +++
.../Dialect/Linalg/Transforms/Transforms.cpp | 10 ++----
mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp | 32 ++++++++++++++++++-
3 files changed, 39 insertions(+), 8 deletions(-)
diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
index 4c8a214049ea9..8bcc1882b454d 100644
--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
@@ -1799,6 +1799,11 @@ def MemRef_CollapseShapeOp : MemRef_ReassociativeReshapeOp<"collapse_shape", [
static MemRefType computeCollapsedType(
MemRefType srcType, ArrayRef<ReassociationIndices> reassociation);
+ static MemRefType
+ inferCollapsedType(MemRefType type, ArrayRef<AffineMap> reassociation);
+ static MemRefType
+ inferCollapsedType(MemRefType type,
+ SmallVector<ReassociationIndices> reassociation);
}];
let hasVerifier = 1;
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 36e01ef46b30b..efa0453dda036 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -17,6 +17,7 @@
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/Transforms/Transforms.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"
@@ -410,13 +411,8 @@ linalg::lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
stripMinedType.cast<RankedTensorType>(),
packingMetadata.reassociations);
} else if (stripMinedType.isa<MemRefType>()) {
- auto memrefTy = stripMinedType.cast<MemRefType>();
- auto tensorTy =
- RankedTensorType::get(memrefTy.getShape(), memrefTy.getElementType());
- auto collapsedTensorType = tensor::CollapseShapeOp::inferCollapsedType(
- tensorTy, packingMetadata.reassociations);
- collapsedType = MemRefType::get(collapsedTensorType.getShape(),
- collapsedTensorType.getElementType());
+ collapsedType = memref::CollapseShapeOp::inferCollapsedType(
+ stripMinedType.cast<MemRefType>(), packingMetadata.reassociations);
}
// Get dynamic dims from input tensor based on packedToStripMinedShapePerm
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
index 11597505e7888..ba12cc34d6457 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -9,6 +9,7 @@
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
@@ -1124,7 +1125,7 @@ struct DimOfMemRefReshape : public OpRewritePattern<DimOp> {
}
} // else dim.getIndex is a block argument to reshape->getBlock and
// dominates reshape
- } // Check condition 2
+ } // Check condition 2
else if (dim->getBlock() != reshape->getBlock() &&
!dim.getIndex().getParentRegion()->isProperAncestor(
reshape->getParentRegion())) {
@@ -2525,6 +2526,35 @@ MemRefType CollapseShapeOp::computeCollapsedType(
srcType.getMemorySpace());
}
+MemRefType
+CollapseShapeOp::inferCollapsedType(MemRefType type,
+ ArrayRef<AffineMap> reassociation) {
+ auto shape = type.getShape();
+ SmallVector<int64_t, 4> newShape;
+ assert(isReassociationValid(reassociation) && "invalid reassociation");
+ unsigned currentDim = 0;
+ for (AffineMap m : reassociation) {
+ unsigned dim = m.getNumResults();
+ auto band = shape.slice(currentDim, dim);
+ int64_t size = 1;
+ if (llvm::is_contained(band, ShapedType::kDynamic))
+ size = ShapedType::kDynamic;
+ else
+ for (unsigned d = 0; d < dim; ++d)
+ size *= shape[currentDim + d];
+ newShape.push_back(size);
+ currentDim += dim;
+ }
+ return MemRefType::get(newShape, type.getElementType());
+}
+
+MemRefType CollapseShapeOp::inferCollapsedType(
+ MemRefType type, SmallVector<ReassociationIndices> reassociation) {
+ return inferCollapsedType(
+ type, getSymbolLessAffineMaps(convertReassociationIndicesToExprs(
+ type.getContext(), reassociation)));
+}
+
void CollapseShapeOp::build(OpBuilder &b, OperationState &result, Value src,
ArrayRef<ReassociationIndices> reassociation,
ArrayRef<NamedAttribute> attrs) {
>From c5b3c3955321ef0e9211226c8fea017bd4b591bf Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <ita9naiwa at gmail.com>
Date: Sat, 1 Mar 2025 09:39:30 +0900
Subject: [PATCH 09/32] add test
---
.../lib/Dialect/Linalg/Transforms/Transforms.cpp | 5 ++---
mlir/test/Dialect/Linalg/loops.mlir | 16 ++++++++++++++++
2 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index efa0453dda036..98dab332b2f40 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -408,11 +408,10 @@ linalg::lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
ShapedType collapsedType;
if (stripMinedType.isa<TensorType>()) {
collapsedType = tensor::CollapseShapeOp::inferCollapsedType(
- stripMinedType.cast<RankedTensorType>(),
- packingMetadata.reassociations);
+ cast<RankedTensorType>(stripMinedType), packingMetadata.reassociations);
} else if (stripMinedType.isa<MemRefType>()) {
collapsedType = memref::CollapseShapeOp::inferCollapsedType(
- stripMinedType.cast<MemRefType>(), packingMetadata.reassociations);
+ cast<MemRefType>(stripMinedType), packingMetadata.reassociations);
}
// Get dynamic dims from input tensor based on packedToStripMinedShapePerm
diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir
index efe8010cffc91..767f593329f52 100644
--- a/mlir/test/Dialect/Linalg/loops.mlir
+++ b/mlir/test/Dialect/Linalg/loops.mlir
@@ -942,3 +942,19 @@ func.func @transpose(%input: memref<?xf32>,
// CHECKPARALLEL: }
// CHECKPARALLEL: return
// CHECKPARALLEL: }
+
+// Test that we can lower all the way to LLVM without crashing, don't check results here.
+func.func @pack_memref(%source: memref<128x256xf32>) -> memref<8x16x8x32xf32> {
+ %dest = memref.alloc() : memref<8x16x8x32xf32>
+ %packed = linalg.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [8, 32]
+ into %dest : memref<128x256xf32> -> memref<8x16x8x32xf32>
+ return %packed : memref<8x16x8x32xf32>
+}
+
+// Test that we can lower all the way to LLVM without crashing, don't check results here.
+func.func @unpack_memref(%source: memref<16x8x8x32xf32>) -> memref<128x256xf32> {
+ %dest = memref.alloc() : memref<128x256xf32>
+ %unpacked = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
+ into %dest : memref<16x8x8x32xf32> -> memref<128x256xf32>
+ return %unpacked : memref<128x256xf32>
+}
\ No newline at end of file
>From a5d01dffda768947463451af6cab1cf6e282114e Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <hyunsungl at nvidia.com>
Date: Sun, 16 Mar 2025 21:21:41 +0900
Subject: [PATCH 10/32] fix upon review
---
.../Dialect/Linalg/IR/RelayoutOpInterface.td | 1 -
.../mlir/Dialect/MemRef/IR/MemRefOps.td | 7 +--
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 14 +++--
.../Transforms/PackAndUnpackPatterns.cpp | 24 +++++---
.../Dialect/Linalg/Transforms/Transforms.cpp | 2 +-
mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp | 56 +++++++++----------
mlir/test/Dialect/Linalg/loops.mlir | 16 ------
mlir/test/Dialect/Linalg/roundtrip.mlir | 18 ++++++
8 files changed, 71 insertions(+), 67 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/RelayoutOpInterface.td b/mlir/include/mlir/Dialect/Linalg/IR/RelayoutOpInterface.td
index 467d862d277eb..2dec2fc4396f4 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/RelayoutOpInterface.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/RelayoutOpInterface.td
@@ -10,7 +10,6 @@
#define LINALG_IR_RELAYOUTOPINTERFACE
include "mlir/Interfaces/DestinationStyleOpInterface.td"
-include "mlir/Dialect/Linalg/IR/LinalgInterfaces.td"
include "mlir/IR/OpBase.td"
def LinalgRelayoutOpInterface : OpInterface<"RelayoutOpInterface"> {
diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
index 87564066d309d..93449766aca4e 100644
--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
@@ -1782,11 +1782,6 @@ def MemRef_CollapseShapeOp : MemRef_ReassociativeReshapeOp<"collapse_shape", [
static MemRefType computeCollapsedType(
MemRefType srcType, ArrayRef<ReassociationIndices> reassociation);
- static MemRefType
- inferCollapsedType(MemRefType type, ArrayRef<AffineMap> reassociation);
- static MemRefType
- inferCollapsedType(MemRefType type,
- SmallVector<ReassociationIndices> reassociation);
}];
let hasVerifier = 1;
@@ -1806,7 +1801,7 @@ def MemRef_StoreOp : MemRef_Op<"store",
let summary = "store operation";
let description = [{
The `store` op stores an element into a memref at the specified indices.
-
+
The number of indices must match the rank of the memref. The indices must
be in-bounds: `0 <= idx < dim_size`
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index a19039fbca67d..b4cbc7c6ad8e9 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -5001,12 +5001,8 @@ static bool isLikePadUnPad(PackOrUnpackOp packOp, ShapedType packedTensorType) {
}
bool PackOp::isLikePad() {
- if (auto packedTensorType =
- llvm::dyn_cast<RankedTensorType>((*this)->getResultTypes().front()))
- return isLikePadUnPad(*this, packedTensorType);
- if (auto packedTensorType =
- llvm::dyn_cast<MemRefType>((*this)->getResultTypes().front()))
- return isLikePadUnPad(*this, packedTensorType);
+ auto packedTensorType = llvm::dyn_cast<ShapedType>((*this)->getResultTypes().front());
+ return isLikePadUnPad(*this, packedTensorType);
}
OpFoldResult PackOp::fold(FoldAdaptor adaptor) {
@@ -5042,6 +5038,9 @@ struct FoldTensorCastPackOp : public OpRewritePattern<PackOp> {
if (!tensor::hasFoldableTensorCastOperand(op))
return failure();
+ if (!op.hasPureTensorSemantics())
+ return failure();
+
SmallVector<Type> newResultTypes(op->getResultTypes());
SmallVector<Value> newOperands =
tensor::getUpdatedOperandsAfterCastOpFolding(op, newResultTypes);
@@ -5310,6 +5309,9 @@ struct FoldTensorCastUnPackOp : public OpRewritePattern<UnPackOp> {
if (!tensor::hasFoldableTensorCastOperand(op))
return failure();
+ if (!op.hasPureTensorSemantics())
+ return failure();
+
SmallVector<Type> newResultTypes(op->getResultTypes());
SmallVector<Value> newOperands =
tensor::getUpdatedOperandsAfterCastOpFolding(op, newResultTypes);
diff --git a/mlir/lib/Dialect/Linalg/Transforms/PackAndUnpackPatterns.cpp b/mlir/lib/Dialect/Linalg/Transforms/PackAndUnpackPatterns.cpp
index 599aa3b6668df..59e4b2ff634c2 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/PackAndUnpackPatterns.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/PackAndUnpackPatterns.cpp
@@ -171,25 +171,27 @@ struct SimplifyUnPackToCollapseShape : public OpRewritePattern<UnPackOp> {
return success();
}
- LogicalResult matchAndRewrite(UnPackOp unpackOp,
+ LogicalResult matchAndRewrite(UnPackOp unPackOp,
PatternRewriter &rewriter) const override {
- ShapedType destType = unpackOp.getDestType();
- if (failed(isUnpackOnInnerMostDim(rewriter, unpackOp)) &&
- failed(isPackOn1D(rewriter, unpackOp, destType.getShape(),
- unpackOp.getStaticTiles())) &&
- !unpackOp.isLikeUnPad()) {
+ if (!unPackOp.hasPureTensorSemantics())
+ return failure();
+ ShapedType destType = unPackOp.getDestType();
+ if (failed(isUnpackOnInnerMostDim(rewriter, unPackOp)) &&
+ failed(isPackOn1D(rewriter, unPackOp, destType.getShape(),
+ unPackOp.getStaticTiles())) &&
+ !unPackOp.isLikeUnPad()) {
return failure();
}
- ShapedType sourceType = unpackOp.getSourceType();
+ ShapedType sourceType = unPackOp.getSourceType();
auto reassociation =
getReassociationIndicesForReshape(sourceType, destType);
if (!reassociation)
return failure();
Value collapsed = insertCollapse(
- rewriter, unpackOp.getLoc(), unpackOp.getSource(), destType,
+ rewriter, unPackOp.getLoc(), unPackOp.getSource(), destType,
getReassociationIndicesAttribute(rewriter, *reassociation));
- rewriter.replaceOp(unpackOp, collapsed);
+ rewriter.replaceOp(unPackOp, collapsed);
return success();
}
};
@@ -426,6 +428,8 @@ struct FoldConsumerUnPackWithProducerLinalgTransposeOp
LogicalResult matchAndRewrite(UnPackOp unPackOp,
PatternRewriter &rewriter) const override {
+ if (!unPackOp.hasPureTensorSemantics())
+ return failure();
auto linalgOp = unPackOp.getSource().getDefiningOp<linalg::LinalgOp>();
if (!linalgOp)
return failure();
@@ -507,6 +511,8 @@ struct FoldEmptyTensorWithUnPackOp : public OpRewritePattern<UnPackOp> {
LogicalResult matchAndRewrite(UnPackOp unPackOp,
PatternRewriter &rewriter) const override {
+ if (!unPackOp.hasPureTensorSemantics())
+ return failure();
// Check for tensor.empty source.
auto emptyOp = unPackOp.getSource().getDefiningOp<tensor::EmptyOp>();
if (!emptyOp)
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 98dab332b2f40..105831a3d9259 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -410,7 +410,7 @@ linalg::lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
collapsedType = tensor::CollapseShapeOp::inferCollapsedType(
cast<RankedTensorType>(stripMinedType), packingMetadata.reassociations);
} else if (stripMinedType.isa<MemRefType>()) {
- collapsedType = memref::CollapseShapeOp::inferCollapsedType(
+ collapsedType = memref::CollapseShapeOp::computeCollapsedType(
cast<MemRefType>(stripMinedType), packingMetadata.reassociations);
}
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
index ba12cc34d6457..03c08756d110b 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -2526,34 +2526,34 @@ MemRefType CollapseShapeOp::computeCollapsedType(
srcType.getMemorySpace());
}
-MemRefType
-CollapseShapeOp::inferCollapsedType(MemRefType type,
- ArrayRef<AffineMap> reassociation) {
- auto shape = type.getShape();
- SmallVector<int64_t, 4> newShape;
- assert(isReassociationValid(reassociation) && "invalid reassociation");
- unsigned currentDim = 0;
- for (AffineMap m : reassociation) {
- unsigned dim = m.getNumResults();
- auto band = shape.slice(currentDim, dim);
- int64_t size = 1;
- if (llvm::is_contained(band, ShapedType::kDynamic))
- size = ShapedType::kDynamic;
- else
- for (unsigned d = 0; d < dim; ++d)
- size *= shape[currentDim + d];
- newShape.push_back(size);
- currentDim += dim;
- }
- return MemRefType::get(newShape, type.getElementType());
-}
-
-MemRefType CollapseShapeOp::inferCollapsedType(
- MemRefType type, SmallVector<ReassociationIndices> reassociation) {
- return inferCollapsedType(
- type, getSymbolLessAffineMaps(convertReassociationIndicesToExprs(
- type.getContext(), reassociation)));
-}
+// MemRefType
+// CollapseShapeOp::inferCollapsedType(MemRefType type,
+// ArrayRef<AffineMap> reassociation) {
+// auto shape = type.getShape();
+// SmallVector<int64_t, 4> newShape;
+// assert(isReassociationValid(reassociation) && "invalid reassociation");
+// unsigned currentDim = 0;
+// for (AffineMap m : reassociation) {
+// unsigned dim = m.getNumResults();
+// auto band = shape.slice(currentDim, dim);
+// int64_t size = 1;
+// if (llvm::is_contained(band, ShapedType::kDynamic))
+// size = ShapedType::kDynamic;
+// else
+// for (unsigned d = 0; d < dim; ++d)
+// size *= shape[currentDim + d];
+// newShape.push_back(size);
+// currentDim += dim;
+// }
+// return MemRefType::get(newShape, type.getElementType());
+// }
+
+// MemRefType CollapseShapeOp::inferCollapsedType(
+// MemRefType type, SmallVector<ReassociationIndices> reassociation) {
+// return inferCollapsedType(
+// type, getSymbolLessAffineMaps(convertReassociationIndicesToExprs(
+// type.getContext(), reassociation)));
+// }
void CollapseShapeOp::build(OpBuilder &b, OperationState &result, Value src,
ArrayRef<ReassociationIndices> reassociation,
diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir
index 767f593329f52..efe8010cffc91 100644
--- a/mlir/test/Dialect/Linalg/loops.mlir
+++ b/mlir/test/Dialect/Linalg/loops.mlir
@@ -942,19 +942,3 @@ func.func @transpose(%input: memref<?xf32>,
// CHECKPARALLEL: }
// CHECKPARALLEL: return
// CHECKPARALLEL: }
-
-// Test that we can lower all the way to LLVM without crashing, don't check results here.
-func.func @pack_memref(%source: memref<128x256xf32>) -> memref<8x16x8x32xf32> {
- %dest = memref.alloc() : memref<8x16x8x32xf32>
- %packed = linalg.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [8, 32]
- into %dest : memref<128x256xf32> -> memref<8x16x8x32xf32>
- return %packed : memref<8x16x8x32xf32>
-}
-
-// Test that we can lower all the way to LLVM without crashing, don't check results here.
-func.func @unpack_memref(%source: memref<16x8x8x32xf32>) -> memref<128x256xf32> {
- %dest = memref.alloc() : memref<128x256xf32>
- %unpacked = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
- into %dest : memref<16x8x8x32xf32> -> memref<128x256xf32>
- return %unpacked : memref<128x256xf32>
-}
\ No newline at end of file
diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir
index dc556761b09e5..7f7aa12534a9b 100644
--- a/mlir/test/Dialect/Linalg/roundtrip.mlir
+++ b/mlir/test/Dialect/Linalg/roundtrip.mlir
@@ -706,3 +706,21 @@ func.func @conv2d_channel_first_q_promote(%img: tensor<100x3x224x224xi8>, %filt:
// CHECK-LABEL: func @conv2d_channel_first_q_promote(
// CHECK: %[[arg0:[a-zA-z0-9]*]]: tensor<100x3x224x224xi8>, %[[arg1:[a-zA-z0-9]*]]: tensor<64x3x5x5xi8>, %[[arg2:[a-zA-z0-9]*]]: i8, %[[arg3:[a-zA-z0-9]*]]: i8)
// CHECK: linalg.conv_2d_nchw_fchw_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%[[arg0]], %[[arg1]], %[[arg2]], %[[arg3]] : tensor<100x3x224x224xi8>, tensor<64x3x5x5xi8>, i8, i8) outs(%{{.*}} : tensor<100x64x220x220xi32>) -> tensor<100x64x220x220xi32>
+
+// -----
+// Test that we can lower all the way to LLVM without crashing, don't check results here.
+func.func @pack_memref(%source: memref<128x256xf32>) -> memref<8x16x8x32xf32> {
+ %dest = memref.alloc() : memref<8x16x8x32xf32>
+ %packed = linalg.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [8, 32]
+ into %dest : memref<128x256xf32> -> memref<8x16x8x32xf32>
+ return %packed : memref<8x16x8x32xf32>
+}
+
+// -----
+// Test that we can lower all the way to LLVM without crashing, don't check results here.
+func.func @unpack_memref(%source: memref<16x8x8x32xf32>) -> memref<128x256xf32> {
+ %dest = memref.alloc() : memref<128x256xf32>
+ %unpacked = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
+ into %dest : memref<16x8x8x32xf32> -> memref<128x256xf32>
+ return %unpacked : memref<128x256xf32>
+}
>From 2480616ebfbb968d83ab119bf7d6a84897f482e5 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <hyunsungl at nvidia.com>
Date: Sun, 23 Mar 2025 15:09:40 +0900
Subject: [PATCH 11/32] lint
---
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 3 ++-
mlir/test/Dialect/Linalg/roundtrip.mlir | 8 ++++----
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index b4cbc7c6ad8e9..8d71cc0142556 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -5001,7 +5001,8 @@ static bool isLikePadUnPad(PackOrUnpackOp packOp, ShapedType packedTensorType) {
}
bool PackOp::isLikePad() {
- auto packedTensorType = llvm::dyn_cast<ShapedType>((*this)->getResultTypes().front());
+ auto packedTensorType =
+ llvm::dyn_cast<ShapedType>((*this)->getResultTypes().front());
return isLikePadUnPad(*this, packedTensorType);
}
diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir
index 7f7aa12534a9b..c2e9e3fbd5423 100644
--- a/mlir/test/Dialect/Linalg/roundtrip.mlir
+++ b/mlir/test/Dialect/Linalg/roundtrip.mlir
@@ -711,16 +711,16 @@ func.func @conv2d_channel_first_q_promote(%img: tensor<100x3x224x224xi8>, %filt:
// Test that we can lower all the way to LLVM without crashing, don't check results here.
func.func @pack_memref(%source: memref<128x256xf32>) -> memref<8x16x8x32xf32> {
%dest = memref.alloc() : memref<8x16x8x32xf32>
- %packed = linalg.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [8, 32]
+ linalg.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [8, 32]
into %dest : memref<128x256xf32> -> memref<8x16x8x32xf32>
- return %packed : memref<8x16x8x32xf32>
+ return %dest : memref<8x16x8x32xf32>
}
// -----
// Test that we can lower all the way to LLVM without crashing, don't check results here.
func.func @unpack_memref(%source: memref<16x8x8x32xf32>) -> memref<128x256xf32> {
%dest = memref.alloc() : memref<128x256xf32>
- %unpacked = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
+ linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
into %dest : memref<16x8x8x32xf32> -> memref<128x256xf32>
- return %unpacked : memref<128x256xf32>
+ return %dest : memref<128x256xf32>
}
>From 7b92a4ee2af6c15035dbb5824f23f2524c7aa1a3 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <hyunsungl at nvidia.com>
Date: Mon, 24 Mar 2025 10:37:02 +0900
Subject: [PATCH 12/32] format fix
---
.../Dialect/Linalg/IR/LinalgRelayoutOps.td | 1 -
mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp | 29 -------------------
2 files changed, 30 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
index 785c7cc924159..63d36ec1fd3d6 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
@@ -229,7 +229,6 @@ def Linalg_PackOp : Linalg_RelayoutOp<"pack", [
/// 2. pads the other ones, and
/// 3. doesn't shuffle the dimensions
bool isLikePad();
-
}];
let hasCanonicalizeMethod = 1;
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
index 70d44904788b1..dbd3f6d631a8a 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -2526,35 +2526,6 @@ MemRefType CollapseShapeOp::computeCollapsedType(
srcType.getMemorySpace());
}
-// MemRefType
-// CollapseShapeOp::inferCollapsedType(MemRefType type,
-// ArrayRef<AffineMap> reassociation) {
-// auto shape = type.getShape();
-// SmallVector<int64_t, 4> newShape;
-// assert(isReassociationValid(reassociation) && "invalid reassociation");
-// unsigned currentDim = 0;
-// for (AffineMap m : reassociation) {
-// unsigned dim = m.getNumResults();
-// auto band = shape.slice(currentDim, dim);
-// int64_t size = 1;
-// if (llvm::is_contained(band, ShapedType::kDynamic))
-// size = ShapedType::kDynamic;
-// else
-// for (unsigned d = 0; d < dim; ++d)
-// size *= shape[currentDim + d];
-// newShape.push_back(size);
-// currentDim += dim;
-// }
-// return MemRefType::get(newShape, type.getElementType());
-// }
-
-// MemRefType CollapseShapeOp::inferCollapsedType(
-// MemRefType type, SmallVector<ReassociationIndices> reassociation) {
-// return inferCollapsedType(
-// type, getSymbolLessAffineMaps(convertReassociationIndicesToExprs(
-// type.getContext(), reassociation)));
-// }
-
void CollapseShapeOp::build(OpBuilder &b, OperationState &result, Value src,
ArrayRef<ReassociationIndices> reassociation,
ArrayRef<NamedAttribute> attrs) {
>From 6dc08ae1628ab2c5795f17af1a3b1ff682e5d861 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <hyunsungl at nvidia.com>
Date: Tue, 25 Mar 2025 14:58:06 +0900
Subject: [PATCH 13/32] revert changes
---
.../Dialect/Linalg/IR/LinalgRelayoutOps.td | 9 ++++++-
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 6 +++++
.../Dialect/Linalg/Transforms/Transforms.cpp | 27 +++++--------------
.../Linalg/Transforms/Vectorization.cpp | 2 +-
mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp | 3 +--
mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp | 10 +++----
6 files changed, 28 insertions(+), 29 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
index 63d36ec1fd3d6..03da3d38ef4c5 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
@@ -34,7 +34,7 @@ class Linalg_RelayoutOp<string mnemonic, list<Trait> traits = []> :
Op<Linalg_Dialect, mnemonic, !listconcat(traits, [
DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
DestinationStyleOpInterface, LinalgRelayoutOpInterface,
- ConditionallySpeculatable, NoMemoryEffect,
+ ConditionallySpeculatable, DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>,
TypesMatchWith<"result type matches type of dest",
"dest", "result",
@@ -76,6 +76,13 @@ class Linalg_RelayoutOp<string mnemonic, list<Trait> traits = []> :
/// have been tiled. Also, the order of the output dimensions is consistent
/// with `inner_dims_pos` rather than the packed tensor.
SmallVector<int64_t> getTiledOuterDims();
+
+ void $cppClass::getEffects(
+ SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
+ &effects) {
+ getGenericEffectsImpl(effects, cast<LinalgOp>(getOperation()));
+ }
+
}];
let hasVerifier = 1;
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index 9766c6e56fb7c..1515d648bddca 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -4822,6 +4822,9 @@ bool areTilesAndTiledDimsAllConstant(OpTy op) {
}
Speculation::Speculatability PackOp::getSpeculatability() {
+ if (!hasPureTensorSemantics())
+ return Speculation::NotSpeculatable;
+
if (getPaddingValue())
return Speculation::Speculatable;
@@ -5122,6 +5125,9 @@ LogicalResult UnPackOp::verify() {
}
Speculation::Speculatability UnPackOp::getSpeculatability() {
+ if (!hasPureTensorSemantics())
+ return Speculation::NotSpeculatable;
+
// See PackOp::getSpeculatability.
if (!areTilesAndTiledDimsAllConstant(*this))
return Speculation::NotSpeculatable;
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 105831a3d9259..085d6e44d854d 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -17,7 +17,6 @@
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/Transforms/Transforms.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"
@@ -360,7 +359,7 @@ linalg::lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(unPackOp);
- ShapedType packedTensorType = unPackOp.getSourceType();
+ RankedTensorType packedTensorType = unPackOp.getSourceType();
int64_t packedRank = packedTensorType.getRank();
OpFoldResult zero = rewriter.getIndexAttr(0), one = rewriter.getIndexAttr(1);
@@ -397,22 +396,10 @@ linalg::lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
applyPermutationToVector(stripMinedShape, packedToStripMinedShapePerm);
// 3. Transpose packedShape to stripMinedShape.
- ShapedType stripMinedType;
- if (auto tensorType = packedTensorType.dyn_cast<TensorType>()) {
- stripMinedType =
- RankedTensorType::get(stripMinedShape, tensorType.getElementType());
- } else if (auto memrefType = packedTensorType.dyn_cast<MemRefType>()) {
- stripMinedType =
- MemRefType::get(stripMinedShape, memrefType.getElementType());
- }
- ShapedType collapsedType;
- if (stripMinedType.isa<TensorType>()) {
- collapsedType = tensor::CollapseShapeOp::inferCollapsedType(
- cast<RankedTensorType>(stripMinedType), packingMetadata.reassociations);
- } else if (stripMinedType.isa<MemRefType>()) {
- collapsedType = memref::CollapseShapeOp::computeCollapsedType(
- cast<MemRefType>(stripMinedType), packingMetadata.reassociations);
- }
+ RankedTensorType stripMinedTensorType =
+ RankedTensorType::Builder(packedTensorType).setShape(stripMinedShape);
+ RankedTensorType collapsedType = tensor::CollapseShapeOp::inferCollapsedType(
+ stripMinedTensorType, packingMetadata.reassociations);
// Get dynamic dims from input tensor based on packedToStripMinedShapePerm
// permutation.
@@ -420,7 +407,7 @@ linalg::lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
tensor::getMixedSizes(rewriter, loc, unPackOp.getSource());
applyPermutationToVector(dims, packedToStripMinedShapePerm);
auto emptyOp = rewriter.create<tensor::EmptyOp>(
- loc, dims, stripMinedType.getElementType());
+ loc, dims, stripMinedTensorType.getElementType());
auto transposeOp = rewriter.create<linalg::TransposeOp>(
loc, unPackOp.getSource(), emptyOp, packedToStripMinedShapePerm);
@@ -1675,4 +1662,4 @@ void linalg::populateDecomposePackUnpackPatterns(RewritePatternSet &patterns) {
void linalg::populateDecomposePadPatterns(RewritePatternSet &patterns) {
patterns.add<DecomposePadOpPattern>(patterns.getContext());
-}
+}
\ No newline at end of file
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index dfb3f0c90595d..2dcd897330d1e 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -1669,7 +1669,7 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, linalg::UnPackOp unpackOp,
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(unpackOp);
- ShapedType unpackTensorType = unpackOp.getSourceType();
+ RankedTensorType unpackTensorType = unpackOp.getSourceType();
ArrayRef<int64_t> innerDimPos = unpackOp.getInnerDimsPos();
ArrayRef<int64_t> innerTiles = unpackOp.getStaticInnerTiles();
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
index dbd3f6d631a8a..1a584a387f2a5 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -9,7 +9,6 @@
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
@@ -1125,7 +1124,7 @@ struct DimOfMemRefReshape : public OpRewritePattern<DimOp> {
}
} // else dim.getIndex is a block argument to reshape->getBlock and
// dominates reshape
- } // Check condition 2
+ } // Check condition 2
else if (dim->getBlock() != reshape->getBlock() &&
!dim.getIndex().getParentRegion()->isProperAncestor(
reshape->getParentRegion())) {
diff --git a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp
index 9a2bd3493f6af..cd0cdd378c352 100644
--- a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp
+++ b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp
@@ -315,11 +315,11 @@ SmallVector<Range> SliceFromCollapseHelper::getExtractSliceParams(
// have proven that these are not sliced. In this case we just take
// the full extent of each dimension in the reassociation list.
if (linearizedDimensions[it.index()]) {
- llvm::append_range(offsetsSizesAndStrides,
- llvm::map_range(it.value(), [&](int64_t idx) -> Range {
- return {zeroAttr, collapseShapeInputShape[idx],
- oneAttr};
- }));
+ llvm::append_range(
+ offsetsSizesAndStrides,
+ llvm::map_range(it.value(), [&](int64_t idx) -> Range {
+ return {zeroAttr, collapseShapeInputShape[idx], oneAttr};
+ }));
continue;
}
>From cf7be5780250547577c8eca7c0c021f9590516a9 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <hyunsungl at nvidia.com>
Date: Tue, 25 Mar 2025 15:03:54 +0900
Subject: [PATCH 14/32] revert changes
---
mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp | 2 +-
mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 085d6e44d854d..dcd50cc44f81b 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -1662,4 +1662,4 @@ void linalg::populateDecomposePackUnpackPatterns(RewritePatternSet &patterns) {
void linalg::populateDecomposePadPatterns(RewritePatternSet &patterns) {
patterns.add<DecomposePadOpPattern>(patterns.getContext());
-}
\ No newline at end of file
+}
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
index 1a584a387f2a5..59434dccc117b 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -1124,7 +1124,7 @@ struct DimOfMemRefReshape : public OpRewritePattern<DimOp> {
}
} // else dim.getIndex is a block argument to reshape->getBlock and
// dominates reshape
- } // Check condition 2
+ } // Check condition 2
else if (dim->getBlock() != reshape->getBlock() &&
!dim.getIndex().getParentRegion()->isProperAncestor(
reshape->getParentRegion())) {
>From 4e2f00de633fbde83d6cc967c442c75d809f0536 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <hyunsungl at nvidia.com>
Date: Tue, 25 Mar 2025 15:45:58 +0900
Subject: [PATCH 15/32] nit
---
mlir/test/Dialect/Linalg/roundtrip.mlir | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir
index c2e9e3fbd5423..d8e11d03bedd4 100644
--- a/mlir/test/Dialect/Linalg/roundtrip.mlir
+++ b/mlir/test/Dialect/Linalg/roundtrip.mlir
@@ -709,7 +709,7 @@ func.func @conv2d_channel_first_q_promote(%img: tensor<100x3x224x224xi8>, %filt:
// -----
// Test that we can lower all the way to LLVM without crashing, don't check results here.
-func.func @pack_memref(%source: memref<128x256xf32>) -> memref<8x16x8x32xf32> {
+func.func @pack_memref(%source: memref<128x256xf32>, memref<8x16x8x32xf32>) -> memref<8x16x8x32xf32> {
%dest = memref.alloc() : memref<8x16x8x32xf32>
linalg.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [8, 32]
into %dest : memref<128x256xf32> -> memref<8x16x8x32xf32>
@@ -718,8 +718,7 @@ func.func @pack_memref(%source: memref<128x256xf32>) -> memref<8x16x8x32xf32> {
// -----
// Test that we can lower all the way to LLVM without crashing, don't check results here.
-func.func @unpack_memref(%source: memref<16x8x8x32xf32>) -> memref<128x256xf32> {
- %dest = memref.alloc() : memref<128x256xf32>
+func.func @unpack_memref(%source: memref<16x8x8x32xf32>, %dest: memref<128x256xf32>) -> memref<128x256xf32> {
linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
into %dest : memref<16x8x8x32xf32> -> memref<128x256xf32>
return %dest : memref<128x256xf32>
>From ee7a42a0c739bd4c56d0ce82318199ea01874491 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <hyunsungl at nvidia.com>
Date: Thu, 27 Mar 2025 14:09:08 +0900
Subject: [PATCH 16/32] fix upon review: Add getEffects for PackOp and UnPackOp
---
.../Dialect/Linalg/IR/LinalgRelayoutOps.td | 7 ---
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 54 +++++++++++++++++++
.../Dialect/Linalg/Transforms/Transforms.cpp | 3 +-
.../Linalg/Transforms/Vectorization.cpp | 3 +-
mlir/test/Dialect/Linalg/roundtrip.mlir | 3 +-
5 files changed, 59 insertions(+), 11 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
index 03da3d38ef4c5..980e99872b9a6 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
@@ -76,13 +76,6 @@ class Linalg_RelayoutOp<string mnemonic, list<Trait> traits = []> :
/// have been tiled. Also, the order of the output dimensions is consistent
/// with `inner_dims_pos` rather than the packed tensor.
SmallVector<int64_t> getTiledOuterDims();
-
- void $cppClass::getEffects(
- SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
- &effects) {
- getGenericEffectsImpl(effects, cast<LinalgOp>(getOperation()));
- }
-
}];
let hasVerifier = 1;
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index 1515d648bddca..93ca2581f2a3d 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -4803,6 +4803,60 @@ PackOp PackOp::createTransposedClone(OpBuilder &b, Location loc,
getPaddingValue(), metadata.outerDimsPerm);
}
+void PackOp::getEffects(
+ SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
+ &effects) {
+ // No memory effects for pure tensor semantics
+ if (hasPureTensorSemantics())
+ return;
+
+ for (OpOperand &opOperand : getOperation()->getOpOperands()) {
+ if (!llvm::isa<MemRefType>(opOperand.get().getType()))
+ continue;
+
+ if (&opOperand == &getSourceMutable()) {
+ effects.emplace_back(MemoryEffects::Read::get(), &opOperand, /*stage=*/0,
+ /*effectOnFullRegion=*/true,
+ SideEffects::DefaultResource::get());
+ }
+ else if (&opOperand == &getDestMutable()) {
+ effects.emplace_back(MemoryEffects::Read::get(), &opOperand, /*stage=*/0,
+ /*effectOnFullRegion=*/true,
+ SideEffects::DefaultResource::get());
+ effects.emplace_back(MemoryEffects::Write::get(), &opOperand, /*stage=*/0,
+ /*effectOnFullRegion=*/true,
+ SideEffects::DefaultResource::get());
+ }
+ }
+}
+
+void UnPackOp::getEffects(
+ SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
+ &effects) {
+ // No memory effects for pure tensor semantics
+ if (hasPureTensorSemantics())
+ return;
+
+ for (OpOperand &opOperand : getOperation()->getOpOperands()) {
+ if (!llvm::isa<MemRefType>(opOperand.get().getType()))
+ continue;
+
+ if (&opOperand == &getSourceMutable()) {
+ effects.emplace_back(MemoryEffects::Read::get(), &opOperand, /*stage=*/0,
+ /*effectOnFullRegion=*/true,
+ SideEffects::DefaultResource::get());
+ }
+ else if (&opOperand == &getDestMutable()) {
+ effects.emplace_back(MemoryEffects::Read::get(), &opOperand, /*stage=*/0,
+ /*effectOnFullRegion=*/true,
+ SideEffects::DefaultResource::get());
+ effects.emplace_back(MemoryEffects::Write::get(), &opOperand, /*stage=*/0,
+ /*effectOnFullRegion=*/true,
+ SideEffects::DefaultResource::get());
+ }
+ }
+}
+
/// Returns true if the tiles and the tiled dims are constant.
template <typename OpTy>
bool areTilesAndTiledDimsAllConstant(OpTy op) {
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index dcd50cc44f81b..2ae6474cf3a2f 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -359,7 +359,8 @@ linalg::lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(unPackOp);
- RankedTensorType packedTensorType = unPackOp.getSourceType();
+ // TODO: support non-ranked tensor types. ShapedType
+ RankedTensorType packedTensorType = dyn_cast<RankedTensorType>(unPackOp.getSourceType());
int64_t packedRank = packedTensorType.getRank();
OpFoldResult zero = rewriter.getIndexAttr(0), one = rewriter.getIndexAttr(1);
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index 2dcd897330d1e..3b91b897bcfd4 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -1669,7 +1669,8 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, linalg::UnPackOp unpackOp,
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(unpackOp);
- RankedTensorType unpackTensorType = unpackOp.getSourceType();
+ // TODO: support non-ranked tensor types. ShapedType
+ RankedTensorType unpackTensorType = dyn_cast<RankedTensorType>(unpackOp.getSourceType());
ArrayRef<int64_t> innerDimPos = unpackOp.getInnerDimsPos();
ArrayRef<int64_t> innerTiles = unpackOp.getStaticInnerTiles();
diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir
index d8e11d03bedd4..7ca20f684583a 100644
--- a/mlir/test/Dialect/Linalg/roundtrip.mlir
+++ b/mlir/test/Dialect/Linalg/roundtrip.mlir
@@ -709,8 +709,7 @@ func.func @conv2d_channel_first_q_promote(%img: tensor<100x3x224x224xi8>, %filt:
// -----
// Test that we can lower all the way to LLVM without crashing, don't check results here.
-func.func @pack_memref(%source: memref<128x256xf32>, memref<8x16x8x32xf32>) -> memref<8x16x8x32xf32> {
- %dest = memref.alloc() : memref<8x16x8x32xf32>
+func.func @pack_memref(%source: memref<128x256xf32>, %dest: memref<8x16x8x32xf32>) -> memref<8x16x8x32xf32> {
linalg.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [8, 32]
into %dest : memref<128x256xf32> -> memref<8x16x8x32xf32>
return %dest : memref<8x16x8x32xf32>
>From 5b95ee88d4bd1e4304c73383c3c03308598d0ae6 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <hyunsungl at nvidia.com>
Date: Thu, 27 Mar 2025 14:15:52 +0900
Subject: [PATCH 17/32] make clang-format happy
---
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 30 +++++++++----------
.../Dialect/Linalg/Transforms/Transforms.cpp | 3 +-
2 files changed, 16 insertions(+), 17 deletions(-)
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index 93ca2581f2a3d..7587178dd94d2 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -4816,16 +4816,15 @@ void PackOp::getEffects(
if (&opOperand == &getSourceMutable()) {
effects.emplace_back(MemoryEffects::Read::get(), &opOperand, /*stage=*/0,
- /*effectOnFullRegion=*/true,
- SideEffects::DefaultResource::get());
- }
- else if (&opOperand == &getDestMutable()) {
+ /*effectOnFullRegion=*/true,
+ SideEffects::DefaultResource::get());
+ } else if (&opOperand == &getDestMutable()) {
effects.emplace_back(MemoryEffects::Read::get(), &opOperand, /*stage=*/0,
- /*effectOnFullRegion=*/true,
- SideEffects::DefaultResource::get());
+ /*effectOnFullRegion=*/true,
+ SideEffects::DefaultResource::get());
effects.emplace_back(MemoryEffects::Write::get(), &opOperand, /*stage=*/0,
- /*effectOnFullRegion=*/true,
- SideEffects::DefaultResource::get());
+ /*effectOnFullRegion=*/true,
+ SideEffects::DefaultResource::get());
}
}
}
@@ -4843,16 +4842,15 @@ void UnPackOp::getEffects(
if (&opOperand == &getSourceMutable()) {
effects.emplace_back(MemoryEffects::Read::get(), &opOperand, /*stage=*/0,
- /*effectOnFullRegion=*/true,
- SideEffects::DefaultResource::get());
- }
- else if (&opOperand == &getDestMutable()) {
+ /*effectOnFullRegion=*/true,
+ SideEffects::DefaultResource::get());
+ } else if (&opOperand == &getDestMutable()) {
effects.emplace_back(MemoryEffects::Read::get(), &opOperand, /*stage=*/0,
- /*effectOnFullRegion=*/true,
- SideEffects::DefaultResource::get());
+ /*effectOnFullRegion=*/true,
+ SideEffects::DefaultResource::get());
effects.emplace_back(MemoryEffects::Write::get(), &opOperand, /*stage=*/0,
- /*effectOnFullRegion=*/true,
- SideEffects::DefaultResource::get());
+ /*effectOnFullRegion=*/true,
+ SideEffects::DefaultResource::get());
}
}
}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 2ae6474cf3a2f..75afcb1fec332 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -360,7 +360,8 @@ linalg::lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
rewriter.setInsertionPoint(unPackOp);
// TODO: support non-ranked tensor types. ShapedType
- RankedTensorType packedTensorType = dyn_cast<RankedTensorType>(unPackOp.getSourceType());
+ RankedTensorType packedTensorType =
+ dyn_cast<RankedTensorType>(unPackOp.getSourceType());
int64_t packedRank = packedTensorType.getRank();
OpFoldResult zero = rewriter.getIndexAttr(0), one = rewriter.getIndexAttr(1);
>From 8b5ac5abd85b35ced34839b955247103341dd9a0 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <hyunsungl at nvidia.com>
Date: Thu, 27 Mar 2025 14:21:30 +0900
Subject: [PATCH 18/32] make clang-format happy
---
mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index 3b91b897bcfd4..f716ff97f7cf3 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -1670,7 +1670,8 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, linalg::UnPackOp unpackOp,
rewriter.setInsertionPoint(unpackOp);
// TODO: support non-ranked tensor types. ShapedType
- RankedTensorType unpackTensorType = dyn_cast<RankedTensorType>(unpackOp.getSourceType());
+ RankedTensorType unpackTensorType =
+ dyn_cast<RankedTensorType>(unpackOp.getSourceType());
ArrayRef<int64_t> innerDimPos = unpackOp.getInnerDimsPos();
ArrayRef<int64_t> innerTiles = unpackOp.getStaticInnerTiles();
>From c955d2137b454af779dedb12cd933da529140846 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <hyunsungl at nvidia.com>
Date: Fri, 28 Mar 2025 07:34:26 +0900
Subject: [PATCH 19/32] wrap getEffects function
---
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 43 +++++++++---------------
1 file changed, 15 insertions(+), 28 deletions(-)
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index 7587178dd94d2..63977d7165e36 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -4803,22 +4803,23 @@ PackOp PackOp::createTransposedClone(OpBuilder &b, Location loc,
getPaddingValue(), metadata.outerDimsPerm);
}
-void PackOp::getEffects(
- SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
- &effects) {
+template <typename OpTy>
+static void getEffectsImpl(
+ OpTy op, SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
+ &effects) {
// No memory effects for pure tensor semantics
- if (hasPureTensorSemantics())
+ if (op.hasPureTensorSemantics())
return;
- for (OpOperand &opOperand : getOperation()->getOpOperands()) {
+ for (OpOperand &opOperand : op.getOperation()->getOpOperands()) {
if (!llvm::isa<MemRefType>(opOperand.get().getType()))
continue;
- if (&opOperand == &getSourceMutable()) {
+ if (&opOperand == &op.getSourceMutable()) {
effects.emplace_back(MemoryEffects::Read::get(), &opOperand, /*stage=*/0,
/*effectOnFullRegion=*/true,
SideEffects::DefaultResource::get());
- } else if (&opOperand == &getDestMutable()) {
+ } else if (&opOperand == &op.getDestMutable()) {
effects.emplace_back(MemoryEffects::Read::get(), &opOperand, /*stage=*/0,
/*effectOnFullRegion=*/true,
SideEffects::DefaultResource::get());
@@ -4829,30 +4830,16 @@ void PackOp::getEffects(
}
}
-void UnPackOp::getEffects(
+void PackOp::getEffects(
SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
&effects) {
- // No memory effects for pure tensor semantics
- if (hasPureTensorSemantics())
- return;
-
- for (OpOperand &opOperand : getOperation()->getOpOperands()) {
- if (!llvm::isa<MemRefType>(opOperand.get().getType()))
- continue;
+ getEffectsImpl(*this, effects);
+}
- if (&opOperand == &getSourceMutable()) {
- effects.emplace_back(MemoryEffects::Read::get(), &opOperand, /*stage=*/0,
- /*effectOnFullRegion=*/true,
- SideEffects::DefaultResource::get());
- } else if (&opOperand == &getDestMutable()) {
- effects.emplace_back(MemoryEffects::Read::get(), &opOperand, /*stage=*/0,
- /*effectOnFullRegion=*/true,
- SideEffects::DefaultResource::get());
- effects.emplace_back(MemoryEffects::Write::get(), &opOperand, /*stage=*/0,
- /*effectOnFullRegion=*/true,
- SideEffects::DefaultResource::get());
- }
- }
+void UnPackOp::getEffects(
+ SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
+ &effects) {
+ getEffectsImpl(*this, effects);
}
/// Returns true if the tiles and the tiled dims are constant.
>From 276069d36b4bb88b628d2b29f20f6c85e76aa931 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <hyunsungl at nvidia.com>
Date: Sun, 30 Mar 2025 08:47:51 +0900
Subject: [PATCH 20/32] fix upon review
---
.../Dialect/Linalg/IR/LinalgRelayoutOps.td | 9 +-
.../mlir/Dialect/Utils/ReshapeOpsUtils.h | 2 +-
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 101 +++++++++++++-----
.../Transforms/DataLayoutPropagation.cpp | 4 +-
mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp | 12 +--
mlir/test/Dialect/Linalg/roundtrip.mlir | 10 +-
6 files changed, 96 insertions(+), 42 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
index 980e99872b9a6..bd9caa3f6b1a7 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
@@ -190,7 +190,14 @@ def Linalg_PackOp : Linalg_RelayoutOp<"pack", [
// Method to get the `RankedTensorType` of the result based on the inner
// tiles, position of the inner tiles (innerDimsPos) and interchange vector
// of outer loops (outerDimsPerm).
- static RankedTensorType inferPackedType(ShapedType sourceType,
+ static RankedTensorType inferPackedTensorType(RankedTensorType sourceType,
+ ArrayRef<int64_t> innerTileSizes, ArrayRef<int64_t> innerDimsPos,
+ ArrayRef<int64_t> outerDimsPerm = {});
+
+ // Method to get the `MemRefType` of the result based on the inner
+ // tiles, position of the inner tiles (innerDimsPos) and interchange vector
+ // of outer loops (outerDimsPerm).
+ static MemRefType inferPackedMemRefType(MemRefType sourceType,
ArrayRef<int64_t> innerTileSizes, ArrayRef<int64_t> innerDimsPos,
ArrayRef<int64_t> outerDimsPerm = {});
diff --git a/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h b/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h
index a86bf74a7b6a1..99c80a2196567 100644
--- a/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h
+++ b/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h
@@ -573,7 +573,7 @@ PackingMetadata computePackingMetadata(int64_t packedRank,
/// Removes the op and replaces the constant with a new constant of the result
/// shape. When an optional cst attribute is passed, it is reshaped only if the
/// splat value matches the value in the attribute.
-OpFoldResult reshapeConstantSource(DenseElementsAttr source, ShapedType result,
+OpFoldResult reshapeConstantSource(DenseElementsAttr source, TensorType result,
std::optional<Attribute> cst = std::nullopt);
} // namespace mlir
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index afff911168324..0af14b12da040 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -9,8 +9,8 @@
// This file implements the Linalg operations.
//
//===----------------------------------------------------------------------===//
-
#include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include <iostream>
#include "mlir/AsmParser/AsmParser.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
@@ -29,6 +29,7 @@
#include "mlir/IR/AffineExprVisitor.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Attributes.h"
+#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinTypeInterfaces.h"
#include "mlir/IR/Matchers.h"
@@ -45,6 +46,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/TypeSwitch.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/LogicalResult.h"
#include "llvm/Support/MathExtras.h"
@@ -4426,15 +4428,30 @@ static LogicalResult commonVerifierPackAndUnPackOp(OpTy packOrUnPack) {
tiles, [](OpFoldResult tile) { return isConstantIntValue(tile, 0); });
};
+ // Verify that the source and destination are ranked types.
+ if (!packOrUnPack.getSourceType().hasRank() ||
+ !packOrUnPack.getDestType().hasRank()) {
+ return op->emitError(
+ "expected both source and destination to be shaped types");
+ }
+
// Verify tiles. Do not allow zero tiles.
SmallVector<OpFoldResult> mixedTiles = packOrUnPack.getMixedTiles();
if (hasZeros(mixedTiles))
return op->emitError("invalid zero tile factor");
+ // Verify that the Operation does not have mixed tensor/buffer semantics.
+ if (!packOrUnPack.hasPureBufferSemantics() &&
+ !packOrUnPack.hasPureTensorSemantics()) {
+ return op->emitError("mixing tensor and buffer semantics is not allowed");
+ }
+ bool hasTensorSemantics = packOrUnPack.hasPureTensorSemantics();
+
// Verify inner_dims_pos and outer_dims_perm.
ShapedType unpackedType = (std::is_same<OpTy, PackOp>::value)
? packOrUnPack.getSourceType()
: packOrUnPack.getDestType();
+
size_t unpackedRank = unpackedType.getRank();
ArrayRef<int64_t> innerDimsPos = packOrUnPack.getInnerDimsPos();
ArrayRef<int64_t> outerDimPerm = packOrUnPack.getOuterDimsPerm();
@@ -4471,12 +4488,17 @@ static LogicalResult commonVerifierPackAndUnPackOp(OpTy packOrUnPack) {
// Verify result shape is greater than the minimum expected
// by the pack operation, and that the output shape
// represents full tiles.
- RankedTensorType expectedPackedType = PackOp::inferPackedType(
- unpackedType, packOrUnPack.getStaticTiles(), innerDimsPos, outerDimPerm);
- if (!areAllInBound(expectedPackedType.getShape(), packedType.getShape())) {
- return op->emitError("the shape of output is not large enough to hold the "
- "packed data. Expected at least ")
- << expectedPackedType << ", got " << packedType;
+ if (hasTensorSemantics) {
+ RankedTensorType expectedPackedType = PackOp::inferPackedTensorType(
+ cast<RankedTensorType>(unpackedType), packOrUnPack.getStaticTiles(),
+ innerDimsPos, outerDimPerm);
+ if (!areAllInBound(expectedPackedType.getShape(), packedType.getShape())) {
+ return op->emitError(
+ "the shape of output is not large enough to hold the "
+ "packed data. Expected at least ")
+ << expectedPackedType << ", got " << packedType;
+ }
+ } else {
}
if (!llvm::all_of(
llvm::zip(packedType.getShape().take_back(mixedTiles.size()),
@@ -4680,9 +4702,9 @@ asShapeWithAnyValueAsDynamic(ArrayRef<OpFoldResult> ofrs) {
return result;
}
-/// Helper for PackOp::{getResultShape,inferPackedType}. Returns the shape of
-/// the packed type. Having a shared helper helps implement these two methods in
-/// a way that ensures that they agree on which dimensions are dynamic.
+/// Helper for PackOp::{getResultShape,inferPackedTensorType}. Returns the shape
+/// of the packed type. Having a shared helper helps implement these two methods
+/// in a way that ensures that they agree on which dimensions are dynamic.
static SmallVector<int64_t> getPackOpResultTypeShape(
ArrayRef<int64_t> sourceShape, ArrayRef<int64_t> innerTileSizes,
ArrayRef<int64_t> innerDimsPos, ArrayRef<int64_t> outerDimsPerm) {
@@ -4746,13 +4768,21 @@ SmallVector<OpFoldResult> PackOp::getResultShape(
/// Get the expected packed type based on source type, tile factors, position of
/// the inner tiles and permutation of the outer tiled loop.
-RankedTensorType PackOp::inferPackedType(ShapedType sourceType,
+RankedTensorType PackOp::inferPackedTensorType(
+ RankedTensorType sourceType, ArrayRef<int64_t> innerTileSizes,
+ ArrayRef<int64_t> innerDimsPos, ArrayRef<int64_t> outerDimsPerm) {
+ SmallVector<int64_t> resultShape = getPackOpResultTypeShape(
+ sourceType.getShape(), innerTileSizes, innerDimsPos, outerDimsPerm);
+ return RankedTensorType::get(resultShape, sourceType.getElementType());
+}
+
+MemRefType PackOp::inferPackedMemRefType(MemRefType sourceType,
ArrayRef<int64_t> innerTileSizes,
ArrayRef<int64_t> innerDimsPos,
ArrayRef<int64_t> outerDimsPerm) {
SmallVector<int64_t> resultShape = getPackOpResultTypeShape(
sourceType.getShape(), innerTileSizes, innerDimsPos, outerDimsPerm);
- return RankedTensorType::get(resultShape, sourceType.getElementType());
+ return MemRefType::get(resultShape, sourceType.getElementType());
}
Value PackOp::createDestinationTensor(OpBuilder &b, Location loc, Value source,
@@ -4802,7 +4832,7 @@ PackOp PackOp::createTransposedClone(OpBuilder &b, Location loc,
}
template <typename OpTy>
-static void getEffectsImpl(
+static void getPackUnPackEffectsImpl(
OpTy op, SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
&effects) {
// No memory effects for pure tensor semantics
@@ -4831,13 +4861,13 @@ static void getEffectsImpl(
void PackOp::getEffects(
SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
&effects) {
- getEffectsImpl(*this, effects);
+ getPackUnPackEffectsImpl(*this, effects);
}
void UnPackOp::getEffects(
SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
&effects) {
- getEffectsImpl(*this, effects);
+ getPackUnPackEffectsImpl(*this, effects);
}
/// Returns true if the tiles and the tiled dims are constant.
@@ -4972,35 +5002,49 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
return success();
}
- // Insert tensor.cast ops if static shape inference is available..
+ // Insert either tensor.cast or memref.cast ops
+ // if static shape inference is available..
+ bool hasTensorSemantics = packOp.hasPureTensorSemantics();
+
SmallVector<int64_t> srcShape, destShape;
if (inferStaticShape(packOp, srcShape, destShape)) {
Location loc = packOp.getLoc();
Value source = packOp.getSource();
if (srcShape != packOp.getSourceType().getShape()) {
auto newSrcType = packOp.getSourceType().clone(srcShape);
- source =
- rewriter.create<tensor::CastOp>(loc, newSrcType, packOp.getSource());
+ if (hasTensorSemantics)
+ source = rewriter.create<tensor::CastOp>(loc, newSrcType,
+ packOp.getSource());
+ else
+ source = rewriter.create<memref::CastOp>(loc, newSrcType,
+ packOp.getSource());
}
Value dest = packOp.getDest();
ShapedType originalResultType = packOp.getDestType();
bool needUpdateDestType = (destShape != originalResultType.getShape());
if (needUpdateDestType) {
auto newDestType = packOp.getDestType().clone(destShape);
- dest =
- rewriter.create<tensor::CastOp>(loc, newDestType, packOp.getDest());
+ if (hasTensorSemantics)
+ dest =
+ rewriter.create<tensor::CastOp>(loc, newDestType, packOp.getDest());
}
rewriter.modifyOpInPlace(packOp, [&] {
packOp.getSourceMutable().assign(source);
packOp.getDestMutable().assign(dest);
- packOp.getResult().setType(cast<ShapedType>(dest.getType()));
+ packOp.getResult().setType(cast<RankedTensorType>(dest.getType()));
});
// Insert a cast if needed
if (needUpdateDestType) {
rewriter.setInsertionPointAfter(packOp);
- auto castOp =
- rewriter.create<tensor::CastOp>(loc, originalResultType, packOp);
- rewriter.replaceAllUsesExcept(packOp, castOp, castOp);
+ if (hasTensorSemantics) {
+ auto castOp =
+ rewriter.create<tensor::CastOp>(loc, originalResultType, packOp);
+ rewriter.replaceAllUsesExcept(packOp, castOp, castOp);
+ } else {
+ auto castOp =
+ rewriter.create<memref::CastOp>(loc, originalResultType, packOp);
+ rewriter.replaceAllUsesExcept(packOp, castOp, castOp);
+ }
}
return success();
}
@@ -5047,12 +5091,15 @@ bool PackOp::isLikePad() {
}
OpFoldResult PackOp::fold(FoldAdaptor adaptor) {
+ if (!hasPureTensorSemantics())
+ return {};
+
std::optional<Attribute> paddingValue;
if (auto pad = adaptor.getPaddingValue())
paddingValue = pad;
if (OpFoldResult reshapedSource = reshapeConstantSource(
llvm::dyn_cast_if_present<DenseElementsAttr>(adaptor.getSource()),
- getDestType(), paddingValue))
+ cast<TensorType>(getDestType()), paddingValue))
return reshapedSource;
return {};
}
@@ -5324,9 +5371,11 @@ bool UnPackOp::isLikeUnPad() {
}
OpFoldResult UnPackOp::fold(FoldAdaptor adaptor) {
+ if (!hasPureTensorSemantics())
+ return {};
if (OpFoldResult reshapedSource = reshapeConstantSource(
llvm::dyn_cast_if_present<DenseElementsAttr>(adaptor.getSource()),
- getResult().getType()))
+ cast<TensorType>(getResult().getType())))
return reshapedSource;
return {};
}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
index 9f5000b70b6f6..22bd5a8b38862 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
@@ -808,7 +808,7 @@ bubbleUpPackOpThroughExpandShape(tensor::ExpandShapeOp expandOp,
// If reassociation is not possible, then reordering cannot happen.
// This can be caused by pack padding affecting previously expanded
// dimensions or packing extending dimensions.
- RankedTensorType newPackType = linalg::PackOp::inferPackedType(
+ RankedTensorType newPackType = linalg::PackOp::inferPackedTensorType(
expandOp.getSrcType(), packOp.getStaticInnerTiles(),
projectedInnerDimsPos, /*outerDimsPerm=*/SmallVector<int64_t>{});
auto reassocExpand =
@@ -943,7 +943,7 @@ static LogicalResult pushDownUnPackOpThroughExpandShape(
nextPos += 1;
}
- RankedTensorType newExpandType = linalg::PackOp::inferPackedType(
+ RankedTensorType newExpandType = linalg::PackOp::inferPackedTensorType(
expandTy, innerTileSizes, projectedInnerDimsPos, newOuterDimsPerm);
auto newExpandOp = rewriter.create<tensor::ExpandShapeOp>(
expandOp.getLoc(), newExpandType, unPackOp.getSource(),
diff --git a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp
index cd0cdd378c352..86a1fb12f2b26 100644
--- a/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp
+++ b/mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp
@@ -315,11 +315,11 @@ SmallVector<Range> SliceFromCollapseHelper::getExtractSliceParams(
// have proven that these are not sliced. In this case we just take
// the full extent of each dimension in the reassociation list.
if (linearizedDimensions[it.index()]) {
- llvm::append_range(
- offsetsSizesAndStrides,
- llvm::map_range(it.value(), [&](int64_t idx) -> Range {
- return {zeroAttr, collapseShapeInputShape[idx], oneAttr};
- }));
+ llvm::append_range(offsetsSizesAndStrides,
+ llvm::map_range(it.value(), [&](int64_t idx) -> Range {
+ return {zeroAttr, collapseShapeInputShape[idx],
+ oneAttr};
+ }));
continue;
}
@@ -485,7 +485,7 @@ PackingMetadata mlir::computePackingMetadata(int64_t packedRank,
}
OpFoldResult mlir::reshapeConstantSource(DenseElementsAttr source,
- ShapedType result,
+ TensorType result,
std::optional<Attribute> cst) {
if (source && source.isSplat() && result.hasStaticShape() &&
(!cst.has_value() || source.getSplatValue<Attribute>() == cst.value()))
diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir
index 7ca20f684583a..550d717570e69 100644
--- a/mlir/test/Dialect/Linalg/roundtrip.mlir
+++ b/mlir/test/Dialect/Linalg/roundtrip.mlir
@@ -708,17 +708,15 @@ func.func @conv2d_channel_first_q_promote(%img: tensor<100x3x224x224xi8>, %filt:
// CHECK: linalg.conv_2d_nchw_fchw_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%[[arg0]], %[[arg1]], %[[arg2]], %[[arg3]] : tensor<100x3x224x224xi8>, tensor<64x3x5x5xi8>, i8, i8) outs(%{{.*}} : tensor<100x64x220x220xi32>) -> tensor<100x64x220x220xi32>
// -----
-// Test that we can lower all the way to LLVM without crashing, don't check results here.
-func.func @pack_memref(%source: memref<128x256xf32>, %dest: memref<8x16x8x32xf32>) -> memref<8x16x8x32xf32> {
+func.func @pack_memref(%source: memref<128x256xf32>, %dest: memref<8x16x8x32xf32>) {
linalg.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [8, 32]
into %dest : memref<128x256xf32> -> memref<8x16x8x32xf32>
- return %dest : memref<8x16x8x32xf32>
+ return
}
// -----
-// Test that we can lower all the way to LLVM without crashing, don't check results here.
-func.func @unpack_memref(%source: memref<16x8x8x32xf32>, %dest: memref<128x256xf32>) -> memref<128x256xf32> {
+func.func @unpack_memref(%source: memref<16x8x8x32xf32>, %dest: memref<128x256xf32>) {
linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
into %dest : memref<16x8x8x32xf32> -> memref<128x256xf32>
- return %dest : memref<128x256xf32>
+ return
}
>From 790e974e544fd8552cc668a621795f661b292247 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <hyunsungl at nvidia.com>
Date: Sun, 30 Mar 2025 18:01:35 +0900
Subject: [PATCH 21/32] bail out transforms using PackOp, UnPackOp
---
.../Linalg/Transforms/BlockPackMatmul.cpp | 5 ++
.../Transforms/DataLayoutPropagation.cpp | 52 +++++++++++++++++++
.../Linalg/Transforms/Vectorization.cpp | 25 +++++++++
3 files changed, 82 insertions(+)
diff --git a/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp b/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp
index 81842e4bea631..0b3d86d51ca0a 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp
@@ -91,6 +91,11 @@ transposePackedMatmul(RewriterBase &rewriter, linalg::LinalgOp linalgOp,
linalg::PackOp packOp, AffineMap operandMap,
ArrayRef<unsigned> blocksStartDimPos,
bool transposeOuterBlocks, bool transposeInnerBlocks) {
+ // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
+ if (!packOp.hasPureTensorSemantics()) {
+ return failure();
+ }
+
assert(operandMap.getNumDims() >= 4 &&
"expected at least 4D prepacked matmul");
assert(blocksStartDimPos.size() >= 2 &&
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
index 22bd5a8b38862..ced3719ff8c3e 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
@@ -63,6 +63,12 @@ getPackingInfoFromOperand(OpOperand *opOperand, linalg::GenericOp genericOp,
OpTy packOrUnPackOp) {
static_assert(llvm::is_one_of<OpTy, linalg::PackOp, linalg::UnPackOp>::value,
"applies to only pack or unpack operations");
+ // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
+ if (isa<linalg::LinalgOp>(packOrUnPackOp)) {
+ if (!packOrUnPackOp.hasPureTensorSemantics()) {
+ return failure();
+ }
+ }
LLVM_DEBUG(
{ llvm::dbgs() << "--- Construct PackInfo From an operand ---\n"; });
@@ -373,6 +379,11 @@ static GenericOp packGenericOp(RewriterBase &rewriter, GenericOp genericOp,
static FailureOr<GenericOp>
bubbleUpPackOpThroughGenericOp(RewriterBase &rewriter, linalg::PackOp packOp,
const ControlPropagationFn &controlFn) {
+ // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
+ if (!packOp.hasPureTensorSemantics()) {
+ return failure();
+ }
+
auto genericOp = packOp.getSource().getDefiningOp<GenericOp>();
if (!genericOp)
return failure();
@@ -461,6 +472,11 @@ struct BubbleUpPackOpThroughGenericOpPattern
LogicalResult matchAndRewrite(linalg::PackOp packOp,
PatternRewriter &rewriter) const override {
+ // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
+ if (!packOp.hasPureTensorSemantics()) {
+ return failure();
+ }
+
auto genericOp =
bubbleUpPackOpThroughGenericOp(rewriter, packOp, controlFn);
if (failed(genericOp))
@@ -483,6 +499,11 @@ class BubbleUpPackThroughPadOp final : public OpRewritePattern<linalg::PackOp> {
LogicalResult matchAndRewrite(linalg::PackOp packOp,
PatternRewriter &rewriter) const override {
+ // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
+ if (!packOp.hasPureTensorSemantics()) {
+ return failure();
+ }
+
auto padOp = packOp.getSource().getDefiningOp<tensor::PadOp>();
if (!padOp)
return failure();
@@ -651,6 +672,11 @@ static LogicalResult
bubbleUpPackOpThroughCollapseShape(tensor::CollapseShapeOp collapseOp,
linalg::PackOp packOp,
PatternRewriter &rewriter) {
+ // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
+ if (!packOp.hasPureTensorSemantics()) {
+ return failure();
+ }
+
SmallVector<int64_t> innerTileSizes = packOp.getStaticTiles();
ArrayRef<int64_t> innerDimsPos = packOp.getInnerDimsPos();
ArrayRef<int64_t> outerDimsPerm = packOp.getOuterDimsPerm();
@@ -757,6 +783,11 @@ static LogicalResult
bubbleUpPackOpThroughExpandShape(tensor::ExpandShapeOp expandOp,
linalg::PackOp packOp,
PatternRewriter &rewriter) {
+ // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
+ if (!packOp.hasPureTensorSemantics()) {
+ return failure();
+ }
+
// Outer dimensions permutation is not supported currently.
// TODO: Handle outer_dims_perm variants.
ArrayRef<int64_t> outerDimsPerm = packOp.getOuterDimsPerm();
@@ -840,6 +871,11 @@ class BubbleUpPackOpThroughReshapeOp final
LogicalResult matchAndRewrite(linalg::PackOp packOp,
PatternRewriter &rewriter) const override {
+ // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
+ if (!packOp.hasPureTensorSemantics()) {
+ return failure();
+ }
+
Operation *srcOp = packOp.getSource().getDefiningOp();
// Currently only support when the pack op is the only user.
if (!srcOp || !(srcOp->getNumResults() == 1) ||
@@ -893,6 +929,11 @@ class BubbleUpPackOpThroughReshapeOp final
static LogicalResult pushDownUnPackOpThroughExpandShape(
linalg::UnPackOp unPackOp, tensor::ExpandShapeOp expandOp,
PatternRewriter &rewriter, ControlPropagationFn controlFn) {
+ // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
+ if (!unPackOp.hasPureTensorSemantics()) {
+ return failure();
+ }
+
// User controlled propagation function.
if (!controlFn(&expandOp.getSrcMutable()))
return failure();
@@ -970,6 +1011,11 @@ class PushDownUnPackOpThroughReshapeOp final
LogicalResult matchAndRewrite(linalg::UnPackOp unPackOp,
PatternRewriter &rewriter) const override {
+ // TODO(issues/129004): Support MemRef UnPackOp. Temporarily return failure.
+ if (!unPackOp.hasPureTensorSemantics()) {
+ return failure();
+ }
+
Value result = unPackOp.getResult();
// Currently only support unpack op with the single user.
if (!result.hasOneUse()) {
@@ -1146,11 +1192,17 @@ struct PushDownUnPackThroughPadOp : public OpRewritePattern<tensor::PadOp> {
LogicalResult matchAndRewrite(tensor::PadOp padOp,
PatternRewriter &rewriter) const override {
+
linalg::UnPackOp unpackOp =
padOp.getSource().getDefiningOp<linalg::UnPackOp>();
+
if (!unpackOp)
return failure();
+ // TODO(issues/129004): Support MemRef PadOp. Temporarily return failure.
+ if (!unpackOp.hasPureTensorSemantics())
+ return failure();
+
if (!controlFn(&padOp.getSourceMutable()))
return failure();
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index f716ff97f7cf3..aba729ec3f5cd 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -1588,6 +1588,11 @@ static LogicalResult
vectorizeAsTensorPackOp(RewriterBase &rewriter, linalg::PackOp packOp,
ArrayRef<int64_t> inputVectorSizes,
SmallVectorImpl<Value> &newResults) {
+ // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
+ if (!packOp.hasPureTensorSemantics()) {
+ return failure();
+ }
+
// TODO: Introduce a parent class that will handle the insertion point update.
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(packOp);
@@ -1664,6 +1669,10 @@ static LogicalResult
vectorizeAsTensorUnpackOp(RewriterBase &rewriter, linalg::UnPackOp unpackOp,
ArrayRef<int64_t> inputVectorSizes,
SmallVectorImpl<Value> &newResults) {
+ // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
+ if (!unpackOp.hasPureTensorSemantics()) {
+ return failure();
+ }
// TODO: Introduce a parent class that will handle the insertion point update.
OpBuilder::InsertionGuard g(rewriter);
@@ -1891,6 +1900,10 @@ vectorizeDynamicLinalgOpPrecondition(linalg::LinalgOp op,
static LogicalResult
vectorizeUnPackOpPrecondition(linalg::UnPackOp unpackOp,
ArrayRef<int64_t> inputVectorSizes) {
+ // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
+ if (!unpackOp.hasPureTensorSemantics()) {
+ return failure();
+ }
if (llvm::any_of(unpackOp.getInnerTiles(), [](OpFoldResult res) {
return !getConstantIntValue(res).has_value();
@@ -2136,6 +2149,11 @@ static LogicalResult vectorizeLinalgOpPrecondition(
static LogicalResult
vectorizePackOpPrecondition(linalg::PackOp packOp,
ArrayRef<int64_t> inputVectorSizes) {
+ // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
+ if (!packOp.hasPureTensorSemantics()) {
+ return failure();
+ }
+
auto padValue = packOp.getPaddingValue();
Attribute cstAttr;
if (padValue && !matchPattern(padValue, m_Constant(&cstAttr))) {
@@ -2358,6 +2376,13 @@ static void convertAffineApply(RewriterBase &rewriter, LinalgOp linalgOp) {
}
bool mlir::linalg::hasVectorizationImpl(Operation *op) {
+ // TODO(issues/129004): Support MemRef PackOp. Temporarily return false.
+ // Actually do we need this?
+ if (isa<linalg::PackOp, linalg::UnPackOp>(op)) {
+ if (!cast<LinalgOp>(op).hasPureTensorSemantics()) {
+ return false;
+ }
+ }
return isa<linalg::LinalgOp, tensor::PadOp, linalg::PackOp, linalg::UnPackOp,
tensor::InsertSliceOp>(op);
}
>From 820e40b994b9b26b92c7f184b2b9a01c1328d489 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <hyunsungl at nvidia.com>
Date: Sun, 30 Mar 2025 19:23:21 +0900
Subject: [PATCH 22/32] fix build error
---
mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
index ced3719ff8c3e..199011ac901ce 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
@@ -64,8 +64,9 @@ getPackingInfoFromOperand(OpOperand *opOperand, linalg::GenericOp genericOp,
static_assert(llvm::is_one_of<OpTy, linalg::PackOp, linalg::UnPackOp>::value,
"applies to only pack or unpack operations");
// TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
- if (isa<linalg::LinalgOp>(packOrUnPackOp)) {
- if (!packOrUnPackOp.hasPureTensorSemantics()) {
+ if (auto linalgOp =
+ dyn_cast<linalg::LinalgOp>(packOrUnPackOp.getOperation())) {
+ if (!linalgOp.hasPureTensorSemantics()) {
return failure();
}
}
>From 43a64b912adaa2eed85d9715c13c3057c2c4b53e Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <hyunsungl at nvidia.com>
Date: Sun, 30 Mar 2025 20:38:34 +0900
Subject: [PATCH 23/32] fix build error
---
.../Dialect/Linalg/Transforms/Transforms.cpp | 25 +++++++++++++++++++
.../Linalg/Transforms/Vectorization.cpp | 7 ------
2 files changed, 25 insertions(+), 7 deletions(-)
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 75afcb1fec332..63c0e4d126c9a 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -219,6 +219,11 @@ struct PackedOperandsDimList {
FailureOr<LowerPackResult> linalg::lowerPack(RewriterBase &rewriter,
linalg::PackOp packOp,
bool lowerPadLikeWithInsertSlice) {
+ // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
+ if (!packOp.hasPureTensorSemantics()) {
+ return failure();
+ }
+
// 1. Filter out NYI cases.
auto packedTensorType =
cast<RankedTensorType>(packOp->getResultTypes().front());
@@ -355,6 +360,11 @@ FailureOr<LowerPackResult> linalg::lowerPack(RewriterBase &rewriter,
FailureOr<LowerUnPackOpResult>
linalg::lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
bool lowerUnpadLikeWithExtractSlice) {
+ // TODO(issues/129004): Support MemRef UnPackOp. Temporarily return failure.
+ if (!unPackOp.hasPureTensorSemantics()) {
+ return failure();
+ }
+
Location loc = unPackOp->getLoc();
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(unPackOp);
@@ -1032,6 +1042,11 @@ static Value getPackOpSourceOrPaddedSource(OpBuilder &builder,
return input;
}
+ // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
+ if (!packOp.hasPureTensorSemantics()) {
+ return packOp.getSource();
+ }
+
assert(llvm::all_of(packOp.getAllOuterDims(),
[](int64_t val) { return val == 1; }) &&
"some outer dims are != 1");
@@ -1144,6 +1159,11 @@ getPackUnpackRankReducedPerm(ArrayRef<int64_t> shape,
LogicalResult DecomposeOuterUnitDimsPackOpPattern::matchAndRewrite(
linalg::PackOp packOp, PatternRewriter &rewriter) const {
+ // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
+ if (!packOp.hasPureTensorSemantics()) {
+ return failure();
+ }
+
// TODO: support the case that outer dimensions are not all 1s. A
// tensor.expand_shape will be generated in this case.
if (llvm::any_of(packOp.getAllOuterDims(),
@@ -1245,6 +1265,11 @@ LogicalResult DecomposeOuterUnitDimsPackOpPattern::matchAndRewrite(
LogicalResult DecomposeOuterUnitDimsUnPackOpPattern::matchAndRewrite(
linalg::UnPackOp unpackOp, PatternRewriter &rewriter) const {
+ // TODO(issues/129004): Support MemRef UnPackOp. Temporarily return failure.
+ if (!unpackOp.hasPureTensorSemantics()) {
+ return failure();
+ }
+
int64_t srcRank = unpackOp.getSourceRank();
int64_t destRank = unpackOp.getDestRank();
ArrayRef<int64_t> srcShape = unpackOp.getSourceType().getShape();
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index aba729ec3f5cd..8936f9d9e389e 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -2376,13 +2376,6 @@ static void convertAffineApply(RewriterBase &rewriter, LinalgOp linalgOp) {
}
bool mlir::linalg::hasVectorizationImpl(Operation *op) {
- // TODO(issues/129004): Support MemRef PackOp. Temporarily return false.
- // Actually do we need this?
- if (isa<linalg::PackOp, linalg::UnPackOp>(op)) {
- if (!cast<LinalgOp>(op).hasPureTensorSemantics()) {
- return false;
- }
- }
return isa<linalg::LinalgOp, tensor::PadOp, linalg::PackOp, linalg::UnPackOp,
tensor::InsertSliceOp>(op);
}
>From 486c62b7e91efca21f0aff37949095cac10b7895 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <hyunsungl at nvidia.com>
Date: Wed, 2 Apr 2025 15:33:59 +0900
Subject: [PATCH 24/32] add invalid pack/unpack cases
---
mlir/test/Dialect/Linalg/invalid.mlir | 37 +++++++++++++++++++++++++++
1 file changed, 37 insertions(+)
diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir
index 90ceadebbc1fa..aa12778ffbf7f 100644
--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@@ -1666,3 +1666,40 @@ func.func @unpack_static_inner_tile_size_and_dynamic_output_shape(
%0 = linalg.unpack %input inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %output : tensor<?x?x?x4xf32> -> tensor<?x?xf32>
return %0 : tensor<?x?xf32>
}
+
+// -----
+
+func.func @pack_source_dest_type_mismatch_1(%source: tensor<128x256xf32>, %dest: memref<8x16x8x32xf32>) {
+ // expected-error at +1 {{mixing tensor and buffer semantics is not allowed}}
+ linalg.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [8, 32]
+ into %dest : tensor<128x256xf32> -> memref<8x16x8x32xf32>
+ return
+}
+
+
+// -----
+
+func.func @pack_source_dest_type_mismatch_2(%source: memref<128x256xf32>, %dest: tensor<8x16x8x32xf32>) {
+ // expected-error at +1 {{mixing tensor and buffer semantics is not allowed}}
+ %0 = linalg.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [8, 32]
+ into %dest : memref<128x256xf32> -> tensor<8x16x8x32xf32>
+ return
+}
+
+// -----
+
+func.func @unpack_source_dest_type_mismatch_1(%source: tensor<16x8x8x32xf32>, %dest: memref<128x256xf32>) {
+ // expected-error at +1 {{mixing tensor and buffer semantics is not allowed}}
+ linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
+ into %dest : tensor<16x8x8x32xf32> -> memref<128x256xf32>
+ return
+}
+
+// -----
+
+func.func @unpack_source_dest_type_mismatch_1(%source: memref<16x8x8x32xf32>, %dest: tensor<128x256xf32>) {
+ // expected-error at +1 {{mixing tensor and buffer semantics is not allowed}}
+ %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
+ into %dest : memref<16x8x8x32xf32> -> tensor<128x256xf32>
+ return
+}
>From ca889b5727d5808902e360fec2ee2c0586b2a879 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <hyunsungl at nvidia.com>
Date: Wed, 2 Apr 2025 16:41:06 +0900
Subject: [PATCH 25/32] fix roundtrip test
---
mlir/test/Dialect/Linalg/roundtrip.mlir | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir
index 550d717570e69..9c5141f56d575 100644
--- a/mlir/test/Dialect/Linalg/roundtrip.mlir
+++ b/mlir/test/Dialect/Linalg/roundtrip.mlir
@@ -708,15 +708,27 @@ func.func @conv2d_channel_first_q_promote(%img: tensor<100x3x224x224xi8>, %filt:
// CHECK: linalg.conv_2d_nchw_fchw_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%[[arg0]], %[[arg1]], %[[arg2]], %[[arg3]] : tensor<100x3x224x224xi8>, tensor<64x3x5x5xi8>, i8, i8) outs(%{{.*}} : tensor<100x64x220x220xi32>) -> tensor<100x64x220x220xi32>
// -----
+
func.func @pack_memref(%source: memref<128x256xf32>, %dest: memref<8x16x8x32xf32>) {
linalg.pack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [8, 32]
into %dest : memref<128x256xf32> -> memref<8x16x8x32xf32>
return
}
+// CHECK-label: func @pack_memref(
+// CHECK: %[[source:[a-zA-z0-9]*]]: memref<128x256xf32>, %[[dest:[a-zA-z0-9]*]]: memref<8x16x8x32xf32>) {
+// CHECK: %pack = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %arg1 : memref<128x256xf32> -> memref<8x16x8x32xf32>
+// CHECK: return
+// CHECK: }
// -----
+
func.func @unpack_memref(%source: memref<16x8x8x32xf32>, %dest: memref<128x256xf32>) {
linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
into %dest : memref<16x8x8x32xf32> -> memref<128x256xf32>
return
}
+
+// CHECK-label: func @unpack_memref(
+// CHECK: %[[source:[a-zA-z0-9]*]]: memref<16x8x8x32xf32>, %[[dest:[a-zA-z0-9]*]]: memref<128x256xf32>) {
+// CHECK: %unpack = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %arg1 : memref<16x8x8x32xf32> -> memref<128x256xf32>
+// CHECK: return
>From ce910b9c8158a4b752394801b531ea46458a3e3c Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <hyunsungl at nvidia.com>
Date: Wed, 2 Apr 2025 17:11:17 +0900
Subject: [PATCH 26/32] fix upon review
---
.../Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp | 2 +-
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 32 +++++++------
.../Linalg/Transforms/BlockPackMatmul.cpp | 5 +-
.../Transforms/DataLayoutPropagation.cpp | 46 +++++++------------
.../Dialect/Linalg/Transforms/Transforms.cpp | 26 +++++------
.../Linalg/Transforms/Vectorization.cpp | 23 ++++------
6 files changed, 60 insertions(+), 74 deletions(-)
diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
index 6119097456d1f..bb2b474814824 100644
--- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
@@ -78,7 +78,7 @@ struct OpenMPOpConversion : public ConvertOpToLLVMPattern<T> {
omp::FlushOp, omp::MapBoundsOp,
omp::ThreadprivateOp>::value) {
if (isa<MemRefType>(originalOperand.getType())) {
- // TODO: Support memref type in variable operands
+ // TODO: Support Memref PackOp. Temporarily return failure.
return rewriter.notifyMatchFailure(op, "memref is not supported yet");
}
}
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index 7e3a714b95bc8..711b48abcc0f4 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -4431,8 +4431,7 @@ static LogicalResult commonVerifierPackAndUnPackOp(OpTy packOrUnPack) {
// Verify that the source and destination are ranked types.
if (!packOrUnPack.getSourceType().hasRank() ||
!packOrUnPack.getDestType().hasRank()) {
- return op->emitError(
- "expected both source and destination to be shaped types");
+ return op->emitError("expected both source and destination to have rank");
}
// Verify tiles. Do not allow zero tiles.
@@ -5002,31 +5001,26 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
return success();
}
- // Insert either tensor.cast or memref.cast ops
- // if static shape inference is available..
+ // Insert tensor.cast if static shape inference is available..
bool hasTensorSemantics = packOp.hasPureTensorSemantics();
+ // TODO: support memref.cast if static shape inference is available.
SmallVector<int64_t> srcShape, destShape;
if (inferStaticShape(packOp, srcShape, destShape)) {
Location loc = packOp.getLoc();
Value source = packOp.getSource();
if (srcShape != packOp.getSourceType().getShape()) {
auto newSrcType = packOp.getSourceType().clone(srcShape);
- if (hasTensorSemantics)
- source = rewriter.create<tensor::CastOp>(loc, newSrcType,
- packOp.getSource());
- else
- source = rewriter.create<memref::CastOp>(loc, newSrcType,
- packOp.getSource());
+ source =
+ rewriter.create<tensor::CastOp>(loc, newSrcType, packOp.getSource());
}
Value dest = packOp.getDest();
ShapedType originalResultType = packOp.getDestType();
bool needUpdateDestType = (destShape != originalResultType.getShape());
if (needUpdateDestType) {
auto newDestType = packOp.getDestType().clone(destShape);
- if (hasTensorSemantics)
- dest =
- rewriter.create<tensor::CastOp>(loc, newDestType, packOp.getDest());
+ dest =
+ rewriter.create<tensor::CastOp>(loc, newDestType, packOp.getDest());
}
rewriter.modifyOpInPlace(packOp, [&] {
packOp.getSourceMutable().assign(source);
@@ -5036,6 +5030,7 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
// Insert a cast if needed
if (needUpdateDestType) {
rewriter.setInsertionPointAfter(packOp);
+ /// 1
if (hasTensorSemantics) {
auto castOp =
rewriter.create<tensor::CastOp>(loc, originalResultType, packOp);
@@ -5045,6 +5040,16 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
rewriter.create<memref::CastOp>(loc, originalResultType, packOp);
rewriter.replaceAllUsesExcept(packOp, castOp, castOp);
}
+ /// 2
+ Operation *castOp;
+ if (hasTensorSemantics) {
+ castOp =
+ rewriter.create<tensor::CastOp>(loc, originalResultType, packOp);
+ } else {
+ castOp =
+ rewriter.create<memref::CastOp>(loc, originalResultType, packOp);
+ }
+ rewriter.replaceAllUsesExcept(packOp, castOp->getResult(0), castOp);
}
return success();
}
@@ -5126,6 +5131,7 @@ struct FoldTensorCastPackOp : public OpRewritePattern<PackOp> {
if (!tensor::hasFoldableTensorCastOperand(op))
return failure();
+ // TODO: Support Memref PackOp. Temporarily return failure.
if (!op.hasPureTensorSemantics())
return failure();
diff --git a/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp b/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp
index 0b3d86d51ca0a..cdd9d3da9bcf8 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/BlockPackMatmul.cpp
@@ -91,10 +91,9 @@ transposePackedMatmul(RewriterBase &rewriter, linalg::LinalgOp linalgOp,
linalg::PackOp packOp, AffineMap operandMap,
ArrayRef<unsigned> blocksStartDimPos,
bool transposeOuterBlocks, bool transposeInnerBlocks) {
- // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
- if (!packOp.hasPureTensorSemantics()) {
+ // TODO: Support Memref PackOp. Temporarily return failure.
+ if (!packOp.hasPureTensorSemantics())
return failure();
- }
assert(operandMap.getNumDims() >= 4 &&
"expected at least 4D prepacked matmul");
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
index 199011ac901ce..54a11ad7c0b02 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
@@ -63,13 +63,16 @@ getPackingInfoFromOperand(OpOperand *opOperand, linalg::GenericOp genericOp,
OpTy packOrUnPackOp) {
static_assert(llvm::is_one_of<OpTy, linalg::PackOp, linalg::UnPackOp>::value,
"applies to only pack or unpack operations");
- // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
- if (auto linalgOp =
- dyn_cast<linalg::LinalgOp>(packOrUnPackOp.getOperation())) {
- if (!linalgOp.hasPureTensorSemantics()) {
+ if (PackOp packOp = dyn_cast<PackOp>(packOrUnPackOp)) {
+ if (!packOp.hasPureTensorSemantics())
return failure();
- }
}
+
+ if (UnPackOp unpackOp = dyn_cast<UnPackOp>(packOrUnPackOp)) {
+ if (!unpackOp.hasPureTensorSemantics())
+ return failure();
+ }
+
LLVM_DEBUG(
{ llvm::dbgs() << "--- Construct PackInfo From an operand ---\n"; });
@@ -380,10 +383,8 @@ static GenericOp packGenericOp(RewriterBase &rewriter, GenericOp genericOp,
static FailureOr<GenericOp>
bubbleUpPackOpThroughGenericOp(RewriterBase &rewriter, linalg::PackOp packOp,
const ControlPropagationFn &controlFn) {
- // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
- if (!packOp.hasPureTensorSemantics()) {
+ if (!packOp.hasPureTensorSemantics())
return failure();
- }
auto genericOp = packOp.getSource().getDefiningOp<GenericOp>();
if (!genericOp)
@@ -473,10 +474,8 @@ struct BubbleUpPackOpThroughGenericOpPattern
LogicalResult matchAndRewrite(linalg::PackOp packOp,
PatternRewriter &rewriter) const override {
- // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
- if (!packOp.hasPureTensorSemantics()) {
+ if (!packOp.hasPureTensorSemantics())
return failure();
- }
auto genericOp =
bubbleUpPackOpThroughGenericOp(rewriter, packOp, controlFn);
@@ -500,10 +499,8 @@ class BubbleUpPackThroughPadOp final : public OpRewritePattern<linalg::PackOp> {
LogicalResult matchAndRewrite(linalg::PackOp packOp,
PatternRewriter &rewriter) const override {
- // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
- if (!packOp.hasPureTensorSemantics()) {
+ if (!packOp.hasPureTensorSemantics())
return failure();
- }
auto padOp = packOp.getSource().getDefiningOp<tensor::PadOp>();
if (!padOp)
@@ -673,10 +670,8 @@ static LogicalResult
bubbleUpPackOpThroughCollapseShape(tensor::CollapseShapeOp collapseOp,
linalg::PackOp packOp,
PatternRewriter &rewriter) {
- // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
- if (!packOp.hasPureTensorSemantics()) {
+ if (!packOp.hasPureTensorSemantics())
return failure();
- }
SmallVector<int64_t> innerTileSizes = packOp.getStaticTiles();
ArrayRef<int64_t> innerDimsPos = packOp.getInnerDimsPos();
@@ -784,10 +779,8 @@ static LogicalResult
bubbleUpPackOpThroughExpandShape(tensor::ExpandShapeOp expandOp,
linalg::PackOp packOp,
PatternRewriter &rewriter) {
- // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
- if (!packOp.hasPureTensorSemantics()) {
+ if (!packOp.hasPureTensorSemantics())
return failure();
- }
// Outer dimensions permutation is not supported currently.
// TODO: Handle outer_dims_perm variants.
@@ -872,10 +865,8 @@ class BubbleUpPackOpThroughReshapeOp final
LogicalResult matchAndRewrite(linalg::PackOp packOp,
PatternRewriter &rewriter) const override {
- // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
- if (!packOp.hasPureTensorSemantics()) {
+ if (!packOp.hasPureTensorSemantics())
return failure();
- }
Operation *srcOp = packOp.getSource().getDefiningOp();
// Currently only support when the pack op is the only user.
@@ -930,10 +921,8 @@ class BubbleUpPackOpThroughReshapeOp final
static LogicalResult pushDownUnPackOpThroughExpandShape(
linalg::UnPackOp unPackOp, tensor::ExpandShapeOp expandOp,
PatternRewriter &rewriter, ControlPropagationFn controlFn) {
- // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
- if (!unPackOp.hasPureTensorSemantics()) {
+ if (!unPackOp.hasPureTensorSemantics())
return failure();
- }
// User controlled propagation function.
if (!controlFn(&expandOp.getSrcMutable()))
@@ -1012,10 +1001,8 @@ class PushDownUnPackOpThroughReshapeOp final
LogicalResult matchAndRewrite(linalg::UnPackOp unPackOp,
PatternRewriter &rewriter) const override {
- // TODO(issues/129004): Support MemRef UnPackOp. Temporarily return failure.
- if (!unPackOp.hasPureTensorSemantics()) {
+ if (!unPackOp.hasPureTensorSemantics())
return failure();
- }
Value result = unPackOp.getResult();
// Currently only support unpack op with the single user.
@@ -1200,7 +1187,6 @@ struct PushDownUnPackThroughPadOp : public OpRewritePattern<tensor::PadOp> {
if (!unpackOp)
return failure();
- // TODO(issues/129004): Support MemRef PadOp. Temporarily return failure.
if (!unpackOp.hasPureTensorSemantics())
return failure();
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 63c0e4d126c9a..49a2dbed14e75 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -219,10 +219,9 @@ struct PackedOperandsDimList {
FailureOr<LowerPackResult> linalg::lowerPack(RewriterBase &rewriter,
linalg::PackOp packOp,
bool lowerPadLikeWithInsertSlice) {
- // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
- if (!packOp.hasPureTensorSemantics()) {
+ // TODO: Support Memref PackOp. Temporarily return failure.
+ if (!packOp.hasPureTensorSemantics())
return failure();
- }
// 1. Filter out NYI cases.
auto packedTensorType =
@@ -360,7 +359,7 @@ FailureOr<LowerPackResult> linalg::lowerPack(RewriterBase &rewriter,
FailureOr<LowerUnPackOpResult>
linalg::lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
bool lowerUnpadLikeWithExtractSlice) {
- // TODO(issues/129004): Support MemRef UnPackOp. Temporarily return failure.
+ // TODO: Support Memref PackOp. Temporarily return failure.
if (!unPackOp.hasPureTensorSemantics()) {
return failure();
}
@@ -369,9 +368,10 @@ linalg::lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(unPackOp);
- // TODO: support non-ranked tensor types. ShapedType
- RankedTensorType packedTensorType =
- dyn_cast<RankedTensorType>(unPackOp.getSourceType());
+ auto packedTensorType = dyn_cast<RankedTensorType>(unPackOp.getSourceType());
+ if (!packedTensorType)
+ return failure();
+
int64_t packedRank = packedTensorType.getRank();
OpFoldResult zero = rewriter.getIndexAttr(0), one = rewriter.getIndexAttr(1);
@@ -1042,10 +1042,9 @@ static Value getPackOpSourceOrPaddedSource(OpBuilder &builder,
return input;
}
- // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
- if (!packOp.hasPureTensorSemantics()) {
+ // TODO: Support Memref PackOp. Temporarily return failure.
+ if (!packOp.hasPureTensorSemantics())
return packOp.getSource();
- }
assert(llvm::all_of(packOp.getAllOuterDims(),
[](int64_t val) { return val == 1; }) &&
@@ -1159,10 +1158,9 @@ getPackUnpackRankReducedPerm(ArrayRef<int64_t> shape,
LogicalResult DecomposeOuterUnitDimsPackOpPattern::matchAndRewrite(
linalg::PackOp packOp, PatternRewriter &rewriter) const {
- // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
- if (!packOp.hasPureTensorSemantics()) {
+ // TODO: Support Memref PackOp. Temporarily return failure.
+ if (!packOp.hasPureTensorSemantics())
return failure();
- }
// TODO: support the case that outer dimensions are not all 1s. A
// tensor.expand_shape will be generated in this case.
@@ -1265,7 +1263,7 @@ LogicalResult DecomposeOuterUnitDimsPackOpPattern::matchAndRewrite(
LogicalResult DecomposeOuterUnitDimsUnPackOpPattern::matchAndRewrite(
linalg::UnPackOp unpackOp, PatternRewriter &rewriter) const {
- // TODO(issues/129004): Support MemRef UnPackOp. Temporarily return failure.
+ // TODO: Support Memref PackOp. Temporarily return failure.
if (!unpackOp.hasPureTensorSemantics()) {
return failure();
}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index 8936f9d9e389e..c3d2de697efb4 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -1588,10 +1588,9 @@ static LogicalResult
vectorizeAsTensorPackOp(RewriterBase &rewriter, linalg::PackOp packOp,
ArrayRef<int64_t> inputVectorSizes,
SmallVectorImpl<Value> &newResults) {
- // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
- if (!packOp.hasPureTensorSemantics()) {
+ // TODO: Support Memref PackOp. Temporarily return failure.
+ if (!packOp.hasPureTensorSemantics())
return failure();
- }
// TODO: Introduce a parent class that will handle the insertion point update.
OpBuilder::InsertionGuard g(rewriter);
@@ -1669,18 +1668,17 @@ static LogicalResult
vectorizeAsTensorUnpackOp(RewriterBase &rewriter, linalg::UnPackOp unpackOp,
ArrayRef<int64_t> inputVectorSizes,
SmallVectorImpl<Value> &newResults) {
- // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
- if (!unpackOp.hasPureTensorSemantics()) {
+ // TODO: Support Memref PackOp. Temporarily return failure.
+ if (!unpackOp.hasPureTensorSemantics())
return failure();
- }
// TODO: Introduce a parent class that will handle the insertion point update.
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(unpackOp);
- // TODO: support non-ranked tensor types. ShapedType
- RankedTensorType unpackTensorType =
- dyn_cast<RankedTensorType>(unpackOp.getSourceType());
+ auto unpackTensorType = dyn_cast<RankedTensorType>(unpackOp.getSourceType());
+ if (!unpackTensorType)
+ return failure();
ArrayRef<int64_t> innerDimPos = unpackOp.getInnerDimsPos();
ArrayRef<int64_t> innerTiles = unpackOp.getStaticInnerTiles();
@@ -1900,7 +1898,7 @@ vectorizeDynamicLinalgOpPrecondition(linalg::LinalgOp op,
static LogicalResult
vectorizeUnPackOpPrecondition(linalg::UnPackOp unpackOp,
ArrayRef<int64_t> inputVectorSizes) {
- // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
+ // TODO: Support Memref PackOp. Temporarily return failure.
if (!unpackOp.hasPureTensorSemantics()) {
return failure();
}
@@ -2149,10 +2147,9 @@ static LogicalResult vectorizeLinalgOpPrecondition(
static LogicalResult
vectorizePackOpPrecondition(linalg::PackOp packOp,
ArrayRef<int64_t> inputVectorSizes) {
- // TODO(issues/129004): Support MemRef PackOp. Temporarily return failure.
- if (!packOp.hasPureTensorSemantics()) {
+ // TODO: Support Memref PackOp. Temporarily return failure.
+ if (!packOp.hasPureTensorSemantics())
return failure();
- }
auto padValue = packOp.getPaddingValue();
Attribute cstAttr;
>From 6a501bdffa959eb1bf95a09c23b4699798879c9f Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <hyunsungl at nvidia.com>
Date: Wed, 2 Apr 2025 18:49:02 +0900
Subject: [PATCH 27/32] fix upon review
---
.../Dialect/Linalg/IR/LinalgRelayoutOps.td | 7 ++++
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 37 ++++++++-----------
.../Transforms/DataLayoutPropagation.cpp | 9 ++---
3 files changed, 26 insertions(+), 27 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
index bd9caa3f6b1a7..b224b402ff8d6 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
@@ -201,6 +201,13 @@ def Linalg_PackOp : Linalg_RelayoutOp<"pack", [
ArrayRef<int64_t> innerTileSizes, ArrayRef<int64_t> innerDimsPos,
ArrayRef<int64_t> outerDimsPerm = {});
+ // Method to get the Shape of the result based on the input shape, inner
+ // tiles, position of the inner tiles (innerDimsPos) and interchange vector
+ // of outer loops (outerDimsPerm).
+ static SmallVector<int64_t> inferPackedShape(ArrayRef<int64_t> inputShape,
+ ArrayRef<int64_t> innerTileSizes, ArrayRef<int64_t> innerDimsPos,
+ ArrayRef<int64_t> outerDimsPerm = {});
+
// Returns true if we have enough static information to catch undefined
// behavior when the tile size does not divide perfectly the dimension of
// the input tensor. Detecting UB requires that the input size and either
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index 711b48abcc0f4..f285a5093a80b 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -4487,17 +4487,13 @@ static LogicalResult commonVerifierPackAndUnPackOp(OpTy packOrUnPack) {
// Verify result shape is greater than the minimum expected
// by the pack operation, and that the output shape
// represents full tiles.
- if (hasTensorSemantics) {
- RankedTensorType expectedPackedType = PackOp::inferPackedTensorType(
- cast<RankedTensorType>(unpackedType), packOrUnPack.getStaticTiles(),
- innerDimsPos, outerDimPerm);
- if (!areAllInBound(expectedPackedType.getShape(), packedType.getShape())) {
- return op->emitError(
- "the shape of output is not large enough to hold the "
- "packed data. Expected at least ")
- << expectedPackedType << ", got " << packedType;
- }
- } else {
+ auto expectedPackedShape = PackOp::inferPackedShape(
+ unpackedType.getShape(), packOrUnPack.getStaticTiles(),
+ packOrUnPack.getInnerDimsPos(), packOrUnPack.getOuterDimsPerm());
+ if (!areAllInBound(expectedPackedShape, packedType.getShape())) {
+ return op->emitError("the shape of output is not large enough to hold the "
+ "packed data. Expected at least ")
+ << expectedPackedShape << ", got " << packedType.getShape();
}
if (!llvm::all_of(
llvm::zip(packedType.getShape().take_back(mixedTiles.size()),
@@ -4784,6 +4780,14 @@ MemRefType PackOp::inferPackedMemRefType(MemRefType sourceType,
return MemRefType::get(resultShape, sourceType.getElementType());
}
+SmallVector<int64_t> PackOp::inferPackedShape(ArrayRef<int64_t> inputShape,
+ ArrayRef<int64_t> innerTileSizes,
+ ArrayRef<int64_t> innerDimsPos,
+ ArrayRef<int64_t> outerDimsPerm) {
+ return getPackOpResultTypeShape(inputShape, innerTileSizes, innerDimsPos,
+ outerDimsPerm);
+}
+
Value PackOp::createDestinationTensor(OpBuilder &b, Location loc, Value source,
ArrayRef<OpFoldResult> innerTileSizes,
ArrayRef<int64_t> innerDimsPos,
@@ -5030,7 +5034,6 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
// Insert a cast if needed
if (needUpdateDestType) {
rewriter.setInsertionPointAfter(packOp);
- /// 1
if (hasTensorSemantics) {
auto castOp =
rewriter.create<tensor::CastOp>(loc, originalResultType, packOp);
@@ -5040,16 +5043,6 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
rewriter.create<memref::CastOp>(loc, originalResultType, packOp);
rewriter.replaceAllUsesExcept(packOp, castOp, castOp);
}
- /// 2
- Operation *castOp;
- if (hasTensorSemantics) {
- castOp =
- rewriter.create<tensor::CastOp>(loc, originalResultType, packOp);
- } else {
- castOp =
- rewriter.create<memref::CastOp>(loc, originalResultType, packOp);
- }
- rewriter.replaceAllUsesExcept(packOp, castOp->getResult(0), castOp);
}
return success();
}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
index 54a11ad7c0b02..7891067323165 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
@@ -63,13 +63,12 @@ getPackingInfoFromOperand(OpOperand *opOperand, linalg::GenericOp genericOp,
OpTy packOrUnPackOp) {
static_assert(llvm::is_one_of<OpTy, linalg::PackOp, linalg::UnPackOp>::value,
"applies to only pack or unpack operations");
- if (PackOp packOp = dyn_cast<PackOp>(packOrUnPackOp)) {
- if (!packOp.hasPureTensorSemantics())
+ if constexpr (std::is_same_v<OpTy, linalg::PackOp>) {
+ if (!packOrUnPackOp.hasPureTensorSemantics())
return failure();
}
-
- if (UnPackOp unpackOp = dyn_cast<UnPackOp>(packOrUnPackOp)) {
- if (!unpackOp.hasPureTensorSemantics())
+ if constexpr (std::is_same_v<OpTy, linalg::UnPackOp>) {
+ if (!packOrUnPackOp.hasPureTensorSemantics())
return failure();
}
>From 535e796e458cd8c3f82e43177beb7ef1f507d918 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <hyunsungl at nvidia.com>
Date: Wed, 2 Apr 2025 20:04:27 +0900
Subject: [PATCH 28/32] .
---
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 1 -
.../Linalg/Transforms/DataLayoutPropagation.cpp | 10 ++--------
2 files changed, 2 insertions(+), 9 deletions(-)
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index f285a5093a80b..ea7ea0694e7a3 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -4444,7 +4444,6 @@ static LogicalResult commonVerifierPackAndUnPackOp(OpTy packOrUnPack) {
!packOrUnPack.hasPureTensorSemantics()) {
return op->emitError("mixing tensor and buffer semantics is not allowed");
}
- bool hasTensorSemantics = packOrUnPack.hasPureTensorSemantics();
// Verify inner_dims_pos and outer_dims_perm.
ShapedType unpackedType = (std::is_same<OpTy, PackOp>::value)
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
index 7891067323165..9a5c792aea852 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
@@ -63,14 +63,8 @@ getPackingInfoFromOperand(OpOperand *opOperand, linalg::GenericOp genericOp,
OpTy packOrUnPackOp) {
static_assert(llvm::is_one_of<OpTy, linalg::PackOp, linalg::UnPackOp>::value,
"applies to only pack or unpack operations");
- if constexpr (std::is_same_v<OpTy, linalg::PackOp>) {
- if (!packOrUnPackOp.hasPureTensorSemantics())
- return failure();
- }
- if constexpr (std::is_same_v<OpTy, linalg::UnPackOp>) {
- if (!packOrUnPackOp.hasPureTensorSemantics())
- return failure();
- }
+ if (!packOrUnPackOp.hasPureTensorSemantics())
+ return failure();
LLVM_DEBUG(
{ llvm::dbgs() << "--- Construct PackInfo From an operand ---\n"; });
>From 865d90c373fe914ec60bb4ad5990b3527db3aae3 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <ita9naiwa at gmail.com>
Date: Sun, 13 Apr 2025 12:32:44 +0900
Subject: [PATCH 29/32] fix upon review
---
.../Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp | 4 +-
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 33 +-
.../Transforms/DataLayoutPropagation.cpp | 6 -
.../Dialect/Linalg/Transforms/Transforms.cpp | 23 +-
.../Linalg/Transforms/Vectorization.cpp | 12 +-
mlir/test/Dialect/Linalg/canonicalize.mlir | 166 +++++----
mlir/test/Dialect/Linalg/invalid.mlir | 340 +++++++++---------
7 files changed, 301 insertions(+), 283 deletions(-)
diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
index bb2b474814824..ad9621257f5df 100644
--- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
@@ -77,10 +77,8 @@ struct OpenMPOpConversion : public ConvertOpToLLVMPattern<T> {
if constexpr (llvm::is_one_of<T, omp::AtomicUpdateOp, omp::AtomicWriteOp,
omp::FlushOp, omp::MapBoundsOp,
omp::ThreadprivateOp>::value) {
- if (isa<MemRefType>(originalOperand.getType())) {
- // TODO: Support Memref PackOp. Temporarily return failure.
+ if (isa<MemRefType>(originalOperand.getType()))
return rewriter.notifyMatchFailure(op, "memref is not supported yet");
- }
}
convertedOperands.push_back(convertedOperand);
}
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index ea7ea0694e7a3..f01e2f96e19d6 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -10,7 +10,6 @@
//
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/Linalg/IR/Linalg.h"
-#include <iostream>
#include "mlir/AsmParser/AsmParser.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
@@ -4486,13 +4485,23 @@ static LogicalResult commonVerifierPackAndUnPackOp(OpTy packOrUnPack) {
// Verify result shape is greater than the minimum expected
// by the pack operation, and that the output shape
// represents full tiles.
- auto expectedPackedShape = PackOp::inferPackedShape(
+ SmallVector<int64_t> expectedPackedShape = PackOp::inferPackedShape(
unpackedType.getShape(), packOrUnPack.getStaticTiles(),
packOrUnPack.getInnerDimsPos(), packOrUnPack.getOuterDimsPerm());
+
if (!areAllInBound(expectedPackedShape, packedType.getShape())) {
+ auto elementType = unpackedType.getElementType();
+ Type expectedType, actualType;
+ if (packOrUnPack.hasPureTensorSemantics()) {
+ expectedType = RankedTensorType::get(expectedPackedShape, elementType);
+ actualType = RankedTensorType::get(packedType.getShape(), elementType);
+ } else {
+ expectedType = MemRefType::get(expectedPackedShape, elementType);
+ actualType = MemRefType::get(packedType.getShape(), elementType);
+ }
return op->emitError("the shape of output is not large enough to hold the "
"packed data. Expected at least ")
- << expectedPackedShape << ", got " << packedType.getShape();
+ << expectedType << ", got " << actualType;
}
if (!llvm::all_of(
llvm::zip(packedType.getShape().take_back(mixedTiles.size()),
@@ -5033,15 +5042,24 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
// Insert a cast if needed
if (needUpdateDestType) {
rewriter.setInsertionPointAfter(packOp);
+ // if (hasTensorSemantics) {
+ // auto castOp =
+ // rewriter.create<tensor::CastOp>(loc, originalResultType, packOp);
+ // rewriter.replaceAllUsesExcept(packOp, castOp, castOp);
+ // } else {
+ // auto castOp =
+ // rewriter.create<memref::CastOp>(loc, originalResultType, packOp);
+ // rewriter.replaceAllUsesExcept(packOp, castOp, castOp);
+ // }
+ Operation *castOp;
if (hasTensorSemantics) {
- auto castOp =
+ castOp =
rewriter.create<tensor::CastOp>(loc, originalResultType, packOp);
- rewriter.replaceAllUsesExcept(packOp, castOp, castOp);
} else {
- auto castOp =
+ castOp =
rewriter.create<memref::CastOp>(loc, originalResultType, packOp);
- rewriter.replaceAllUsesExcept(packOp, castOp, castOp);
}
+ rewriter.replaceAllUsesExcept(packOp, castOp->getResult(0), castOp);
}
return success();
}
@@ -5423,6 +5441,7 @@ struct FoldTensorCastUnPackOp : public OpRewritePattern<UnPackOp> {
if (!tensor::hasFoldableTensorCastOperand(op))
return failure();
+ // TODO: Support Memref PackOp. Temporarily return failure.
if (!op.hasPureTensorSemantics())
return failure();
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
index 9a5c792aea852..5f38f5a84ac64 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp
@@ -63,9 +63,6 @@ getPackingInfoFromOperand(OpOperand *opOperand, linalg::GenericOp genericOp,
OpTy packOrUnPackOp) {
static_assert(llvm::is_one_of<OpTy, linalg::PackOp, linalg::UnPackOp>::value,
"applies to only pack or unpack operations");
- if (!packOrUnPackOp.hasPureTensorSemantics())
- return failure();
-
LLVM_DEBUG(
{ llvm::dbgs() << "--- Construct PackInfo From an operand ---\n"; });
@@ -376,9 +373,6 @@ static GenericOp packGenericOp(RewriterBase &rewriter, GenericOp genericOp,
static FailureOr<GenericOp>
bubbleUpPackOpThroughGenericOp(RewriterBase &rewriter, linalg::PackOp packOp,
const ControlPropagationFn &controlFn) {
- if (!packOp.hasPureTensorSemantics())
- return failure();
-
auto genericOp = packOp.getSource().getDefiningOp<GenericOp>();
if (!genericOp)
return failure();
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 49a2dbed14e75..c8a930aec60cd 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -219,7 +219,6 @@ struct PackedOperandsDimList {
FailureOr<LowerPackResult> linalg::lowerPack(RewriterBase &rewriter,
linalg::PackOp packOp,
bool lowerPadLikeWithInsertSlice) {
- // TODO: Support Memref PackOp. Temporarily return failure.
if (!packOp.hasPureTensorSemantics())
return failure();
@@ -359,19 +358,14 @@ FailureOr<LowerPackResult> linalg::lowerPack(RewriterBase &rewriter,
FailureOr<LowerUnPackOpResult>
linalg::lowerUnPack(RewriterBase &rewriter, linalg::UnPackOp unPackOp,
bool lowerUnpadLikeWithExtractSlice) {
- // TODO: Support Memref PackOp. Temporarily return failure.
- if (!unPackOp.hasPureTensorSemantics()) {
+ if (!unPackOp.hasPureTensorSemantics())
return failure();
- }
Location loc = unPackOp->getLoc();
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(unPackOp);
- auto packedTensorType = dyn_cast<RankedTensorType>(unPackOp.getSourceType());
- if (!packedTensorType)
- return failure();
-
+ auto packedTensorType = cast<RankedTensorType>(unPackOp.getSourceType());
int64_t packedRank = packedTensorType.getRank();
OpFoldResult zero = rewriter.getIndexAttr(0), one = rewriter.getIndexAttr(1);
@@ -1038,14 +1032,14 @@ LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite(
static Value getPackOpSourceOrPaddedSource(OpBuilder &builder,
linalg::PackOp packOp) {
Value input = packOp.getSource();
+ // TODO: Support Memref PackOp. Temporarily return just Op Source.
+ if (!packOp.hasPureTensorSemantics())
+ return input;
+
if (!packOp.getPaddingValue()) {
return input;
}
- // TODO: Support Memref PackOp. Temporarily return failure.
- if (!packOp.hasPureTensorSemantics())
- return packOp.getSource();
-
assert(llvm::all_of(packOp.getAllOuterDims(),
[](int64_t val) { return val == 1; }) &&
"some outer dims are != 1");
@@ -1158,7 +1152,6 @@ getPackUnpackRankReducedPerm(ArrayRef<int64_t> shape,
LogicalResult DecomposeOuterUnitDimsPackOpPattern::matchAndRewrite(
linalg::PackOp packOp, PatternRewriter &rewriter) const {
- // TODO: Support Memref PackOp. Temporarily return failure.
if (!packOp.hasPureTensorSemantics())
return failure();
@@ -1263,10 +1256,8 @@ LogicalResult DecomposeOuterUnitDimsPackOpPattern::matchAndRewrite(
LogicalResult DecomposeOuterUnitDimsUnPackOpPattern::matchAndRewrite(
linalg::UnPackOp unpackOp, PatternRewriter &rewriter) const {
- // TODO: Support Memref PackOp. Temporarily return failure.
- if (!unpackOp.hasPureTensorSemantics()) {
+ if (!unpackOp.hasPureTensorSemantics())
return failure();
- }
int64_t srcRank = unpackOp.getSourceRank();
int64_t destRank = unpackOp.getDestRank();
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index 89a61762e84f3..3a11921c50581 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -1588,10 +1588,6 @@ static LogicalResult
vectorizeAsTensorPackOp(RewriterBase &rewriter, linalg::PackOp packOp,
ArrayRef<int64_t> inputVectorSizes,
SmallVectorImpl<Value> &newResults) {
- // TODO: Support Memref PackOp. Temporarily return failure.
- if (!packOp.hasPureTensorSemantics())
- return failure();
-
// TODO: Introduce a parent class that will handle the insertion point update.
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(packOp);
@@ -1668,17 +1664,11 @@ static LogicalResult
vectorizeAsTensorUnpackOp(RewriterBase &rewriter, linalg::UnPackOp unpackOp,
ArrayRef<int64_t> inputVectorSizes,
SmallVectorImpl<Value> &newResults) {
- // TODO: Support Memref PackOp. Temporarily return failure.
- if (!unpackOp.hasPureTensorSemantics())
- return failure();
-
// TODO: Introduce a parent class that will handle the insertion point update.
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(unpackOp);
- auto unpackTensorType = dyn_cast<RankedTensorType>(unpackOp.getSourceType());
- if (!unpackTensorType)
- return failure();
+ auto unpackTensorType = cast<RankedTensorType>(unpackOp.getSourceType());
ArrayRef<int64_t> innerDimPos = unpackOp.getInnerDimsPos();
ArrayRef<int64_t> innerTiles = unpackOp.getStaticInnerTiles();
diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
index 86cb8f58abe02..eafbb99caecaa 100644
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -1,30 +1,42 @@
-// RUN: mlir-opt %s -canonicalize="test-convergence" -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -canonicalize="test-convergence" -split-input-file |
+// FileCheck %s
// CHECK-LABEL: func @memref_cast(
-func.func @memref_cast(%a: index, %b: index) -> memref<?x?xf32> {
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %c8 = arith.constant 8 : index
- %c16 = arith.constant 16 : index
- %1 = memref.alloc (%b) : memref<?xi8>
- %2 = memref.view %1[%c0][] : memref<?xi8> to memref<16x16xf32>
- %3 = memref.cast %2 : memref<16x16xf32> to memref<?x?xf32>
-
- // CHECK: linalg.matmul ins({{.*}}memref<16x16xf32>, memref<16x16xf32>) outs({{.*}}memref<16x16xf32>)
- linalg.matmul ins(%3, %3: memref<?x?xf32>, memref<?x?xf32>)
- outs(%3: memref<?x?xf32>)
- return %3: memref<?x?xf32>
-}
+func.func @memref_cast(% a : index, % b : index)->memref < ? x ? xf32>{
+ % c0 = arith.constant 0 : index %
+ c1 = arith.constant 1 : index %
+ c8 = arith.constant 8 : index %
+ c16 = arith.constant 16 : index %
+ 1 = memref.alloc(% b) : memref <
+ ? xi8 > % 2 = memref.view % 1 [% c0][]
+ : memref < ? xi8 > to memref<16x16xf32> %
+ 3 = memref.cast % 2
+ : memref<16x16xf32> to memref <
+ ? x
+ ? xf32 >
+
+ // CHECK: linalg.matmul
+ // ins({{.*}}memref<16x16xf32>,
+ // memref<16x16xf32>)
+ // outs({{.*}}memref<16x16xf32>)
+ linalg.matmul ins(
+ % 3, % 3 : memref < ? x ? xf32 >,
+ memref < ? x
+ ? xf32 >)
+ outs(% 3 : memref <
+ ? x ? xf32 >) return % 3
+ : memref <
+ ? x
+ ? xf32 > }
// -----
#accesses = [
- affine_map<(i) -> (i)>
-]
+ affine_map<(i)->(i)>]
#trait = {
- indexing_maps = #accesses,
- iterator_types = ["parallel"]
+ indexing_maps = #accesses,
+ iterator_types = ["parallel"]
}
func.func @dce_zero_memref(%arg0 : memref<0xf32>, %arg1: tensor<0xf32>) -> tensor<0xf32> {
@@ -117,7 +129,7 @@ func.func @linalg_effects(
// -----
-#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
+#map = affine_map < (d0, d1, d2)->(d0, d1, d2)>
func.func @remove_no_op(%arg0 : tensor<?x?x?xf32>, %arg1 : tensor<?x?x?xf32>)
-> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
%c0 = arith.constant 0 : index
@@ -144,7 +156,7 @@ func.func @remove_no_op(%arg0 : tensor<?x?x?xf32>, %arg1 : tensor<?x?x?xf32>)
// -----
-#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
+#map = affine_map < (d0, d1, d2)->(d0, d1, d2)>
func.func @remove_no_op_mismatched_types(%arg0 : tensor<?x?x?xf32>)
-> tensor<1x2x3xf32> {
%out = tensor.empty() : tensor<1x2x3xf32>
@@ -160,12 +172,12 @@ func.func @remove_no_op_mismatched_types(%arg0 : tensor<?x?x?xf32>)
}
// CHECK-LABEL: func @remove_no_op_mismatched_types
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
-// CHECK: %[[CAST:.*]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to tensor<1x2x3xf32>
-// CHECK: return %[[CAST]]
+// CHECK: %[[CAST:.*]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to
+// tensor<1x2x3xf32> CHECK: return %[[CAST]]
// -----
-#map = affine_map<() -> ()>
+#map = affine_map < ()->()>
func.func @cant_fold_to_tensor_cast(%arg0 : f32) -> tensor<f32> {
%out = tensor.empty() : tensor<f32>
%g = linalg.generic {
@@ -183,7 +195,7 @@ func.func @cant_fold_to_tensor_cast(%arg0 : f32) -> tensor<f32> {
// -----
-#map = affine_map<(d0, d1) -> (d0, d1)>
+#map = affine_map < (d0, d1)->(d0, d1)>
func.func @keep_not_noop(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -208,7 +220,7 @@ func.func @keep_not_noop(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32> {
// -----
-#map = affine_map<(d0, d1) -> (d0, d1)>
+#map = affine_map < (d0, d1)->(d0, d1)>
func.func @keep_not_noop(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>)
-> (tensor<?x?xf32>, tensor<?x?xf32>) {
%c0 = arith.constant 0 : index
@@ -386,7 +398,7 @@ func.func @fill_pack_general() -> tensor<1x1x8x4x4x8xi32>{
// -----
-#map = affine_map<()[s0] -> (s0 ceildiv 16)>
+#map = affine_map < ()[s0]->(s0 ceildiv 16)>
func.func @dynamic_fill_pack(%arg0: tensor<?x?xf32>) -> tensor<?x?x16x16xf32> {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
@@ -495,11 +507,15 @@ func.func @no_fold_pad_fill_value_mismatch() -> tensor<412x276xf32> {
// -----
-// Tests below verify whether static information is propagated through all the operands of generic op.
-// 1. If one of the inputs of generic op has static info and it has no cast source.
-// 2. If one of the inputs of generic op has static info and it is coming from tensr.cast operation.
-// 3. If one of the outputs of generic op has static info and it is coming from tenso.cast operation.
-#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
+// Tests below verify whether static information is propagated through all the
+// operands of generic op.
+// 1. If one of the inputs of generic op has static info and it has no cast
+// source.
+// 2. If one of the inputs of generic op has static info and it is coming from
+// tensr.cast operation.
+// 3. If one of the outputs of generic op has static info and it is coming from
+// tenso.cast operation.
+#map = affine_map < (d0, d1, d2)->(d0, d1, d2)>
// CHECK-LABEL: func @static_input_without_cast
// CHECK-SAME: (%[[ARG0:.*]]: tensor<2x3x4xf32>, %[[ARG1:.*]]: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
func.func @static_input_without_cast(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
@@ -529,7 +545,7 @@ func.func @static_input_without_cast(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<?x
// -----
-#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
+#map = affine_map < (d0, d1, d2)->(d0, d1, d2)>
// CHECK-LABEL: func @static_input_with_cast
// CHECK-SAME: (%[[ARG0:.*]]: tensor<2x3x4xf32>, %[[ARG1:.*]]: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
func.func @static_input_with_cast(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
@@ -560,7 +576,7 @@ func.func @static_input_with_cast(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<?x?x?
// -----
-#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
+#map = affine_map < (d0, d1, d2)->(d0, d1, d2)>
// CHECK-LABEL: func @static_output_with_cast
// CHECK-SAME: (%[[ARG0:.*]]: tensor<?x?x?xf32>, %[[ARG1:.*]]: tensor<?x?x?xf32>, %[[ARG2:.*]]: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
func.func @static_output_with_cast(%arg0 : tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>, %arg2: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
@@ -592,9 +608,9 @@ func.func @static_output_with_cast(%arg0 : tensor<?x?x?xf32>, %arg1: tensor<?x?x
// -----
-// This test checks the folding of tensor.cast operation when the source value of cast
-// has more static information than the destination value.
-#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
+// This test checks the folding of tensor.cast operation when the source value
+// of cast has more static information than the destination value.
+#map = affine_map < (d0, d1, d2)->(d0, d1, d2)>
// CHECK-LABEL: func @cast_source
// CHECK-SAME: (%[[ARG0:.*]]: tensor<2x3x4xf32>, %[[ARG1:.*]]: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
func.func @cast_source(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
@@ -625,7 +641,7 @@ func.func @cast_source(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<2x3x4xf32>) -> t
// -----
-#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
+#map = affine_map < (d0, d1, d2)->(d0, d1, d2)>
// CHECK-LABEL: func @cast_dest
// CHECK-SAME: (%[[ARG0:.*]]: tensor<?x?x?xf32>, %[[ARG1:.*]]: tensor<1x?x?xf32>,
func.func @cast_dest(%arg0: tensor<?x?x?xf32>, %arg1: tensor<1x?x?xf32>, %arg2: index, %arg3: index, %arg4: index) -> tensor<?x?x?xf32> {
@@ -649,8 +665,9 @@ func.func @cast_dest(%arg0: tensor<?x?x?xf32>, %arg1: tensor<1x?x?xf32>, %arg2:
// -----
-#map = affine_map<(d0, d1) -> (d0, d1)>
-#sparse = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed) }>
+#map = affine_map < (d0, d1)->(d0, d1)>
+#sparse = #sparse_tensor.encoding < \
+ {map = (d0, d1)->(d0 : dense, d1 : compressed) }>
// CHECK-DAG: #[[$SPARSE:.+]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed) }>
// CHECK-LABEL: func @static_shape_inference_with_encoding(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
@@ -839,23 +856,25 @@ func.func @linalgop_with_cond_cast_consumer(%arg0 : tensor<?x?xf32>, %arg1 : ten
%0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
scf.if %arg3 {
- %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<4x8xf32>
- func.call @some_use(%1) : (tensor<4x8xf32>) -> ()
+ % 1 = tensor.cast % 0 : tensor < ? x ? xf32 > to tensor<4x8xf32> func.call
+ @some_use(% 1)
+ : (tensor<4x8xf32>)->()
}
- return %0 : tensor<?x?xf32>
+ return % 0 : tensor < ? x ? xf32 >
}
// Check conditionally reachable cast is not folded into producer.
// CHECK-LABEL: func @linalgop_with_cond_cast_consumer
-// CHECK-SAME: (%[[ARG0:.*]]: tensor<?x?xf32>, %[[ARG1:.*]]: tensor<?x?xf32>, %[[ARG2:.*]]: tensor<?x?xf32>, %[[ARG3:.*]]: i1)
-// CHECK: %[[RES:.*]] = linalg.matmul ins(%[[ARG0]], %[[ARG1]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// CHECK-SAME: (%[[ARG0:.*]]: tensor<?x?xf32>, %[[ARG1:.*]]:
+// tensor<?x?xf32>, %[[ARG2:.*]]: tensor<?x?xf32>, %[[ARG3:.*]]: i1)
+// CHECK: %[[RES:.*]] = linalg.matmul ins(%[[ARG0]], %[[ARG1]] :
+// tensor<?x?xf32>, tensor<?x?xf32>)
// CHECK-SAME: outs(%[[ARG2]] : tensor<?x?xf32>) -> tensor<?x?xf32>
// CHECK: scf.if %[[ARG3]] {
-// CHECK: %[[CAST:.*]] = tensor.cast %[[RES]] : tensor<?x?xf32> to tensor<4x8xf32>
-// CHECK: func.call @some_use(%[[CAST]]) : (tensor<4x8xf32>) -> ()
-// CHECK: }
-// CHECK: return %[[RES]] : tensor<?x?xf32>
-
+// CHECK: %[[CAST:.*]] = tensor.cast %[[RES]] : tensor<?x?xf32> to
+// tensor<4x8xf32> CHECK: func.call @some_use(%[[CAST]]) :
+// (tensor<4x8xf32>) -> () CHECK: } CHECK: return %[[RES]] :
+// tensor<?x?xf32>
// -----
@@ -904,17 +923,19 @@ func.func @fold_multi_use_generic_op_with_consumer(%arg0 : tensor<?x?x?xf32>) ->
// CHECK: func @fold_multi_use_generic_op_with_consumer
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x?xf32>
// CHECK-DAG: %[[INIT1:.+]] = tensor.empty() : tensor<2x3x4xf32>
-// CHECK-DAG: %[[CAST:.+]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to tensor<4x3x2xf32>
-// CHECK-DAG: %[[INIT2:.+]] = tensor.empty() : tensor<3x2x4xf32>
+// CHECK-DAG: %[[CAST:.+]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to
+// tensor<4x3x2xf32> CHECK-DAG: %[[INIT2:.+]] = tensor.empty() :
+// tensor<3x2x4xf32>
// CHECK: %[[GENERIC:.+]]:2 = linalg.generic
// CHECK-SAME: ins(%[[CAST]] :
// CHECK-SAME: outs(%[[INIT2]], %[[INIT1]] :
-// CHECK: %[[RETURN_CAST:.+]] = tensor.cast %[[GENERIC]]#0 : tensor<3x2x4xf32> to tensor<?x?x?xf32>
-// CHECK: return %[[RETURN_CAST]], %[[GENERIC]]#1
+// CHECK: %[[RETURN_CAST:.+]] = tensor.cast %[[GENERIC]]#0 :
+// tensor<3x2x4xf32> to tensor<?x?x?xf32> CHECK: return
+// %[[RETURN_CAST]], %[[GENERIC]]#1
// -----
-#map = affine_map<(d0) -> (d0)>
+#map = affine_map < (d0)->(d0)>
func.func @identity_buffer(%arg0 : memref<?xf32>, %arg1: memref<?xf32>) {
linalg.generic {
indexing_maps = [#map, #map],
@@ -938,7 +959,7 @@ func.func @identity_buffer(%arg0 : memref<?xf32>, %arg1: memref<?xf32>) {
// -----
-#map = affine_map<(d0, d1) -> (d1, d0)>
+#map = affine_map < (d0, d1)->(d1, d0)>
func.func @erase_non_identity_noop(%arg0 : tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.generic {
indexing_maps = [#map, #map],
@@ -1722,6 +1743,21 @@ func.func @infer_and_fold_pack_unpack_same_tiles(%t: tensor<10x20x4x4xf32>) -> t
// -----
+func.func @infer_and_fold_pack_unpack_same_tiles_memref(%t: memref<10x20x4x4xf32>) -> memref<10x20x4x4xf32> {
+ %c40 = arith.constant 40 : index
+ %c80 = arith.constant 80 : index
+ %buf_unpacked = memref.alloc() : memref<40x80xf32>
+ %unpacked = linalg.unpack %t inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %buf_unpacked : memref<10x20x4x4xf32> -> memref<40x80xf32>
+ %buf_packed = memref.alloc() : memref<10x20x4x4xf32>
+ %packed = linalg.pack %unpacked inner_dims_pos = [0, 1] inner_tiles = [4, 4] into %buf_packed : memref<40x80xf32> -> memref<10x20x4x4xf32>
+ return %packed : memref<10x20x4x4xf32>
+}
+// CHECK-LABEL: func.func @infer_and_fold_pack_unpack_same_tiles_memref
+// CHECK-SAME: %[[SRC:[0-9a-zA-Z]+]]
+// CHECK: return %[[SRC]]
+
+// -----
+
// CHECK-LABEL: func.func @pack_dont_drop_attributes(
// CHECK: linalg.pack {{.*}} {test_attr}
func.func @pack_dont_drop_attributes(%arg0: tensor<?x?x?xf16>, %arg1: tensor<128x?x100x16x1xf16>) -> tensor<128x?x100x16x1xf16> {
@@ -1830,14 +1866,16 @@ func.func @no_fold_extract_slice_into_unpack_rank_reducing(
func.func @no_fold_extract_slice_into_unpack_non_zero_offset(
%src : tensor<28x2x16xf32>, %dest : tensor<28x32xf32>
) -> tensor<28x28xf32> {
- %unpack = linalg.unpack %src
- outer_dims_perm = [0, 1]
- inner_dims_pos = [1]
- inner_tiles = [16]
- into %dest : tensor<28x2x16xf32> -> tensor<28x32xf32>
- %extracted_slice = tensor.extract_slice %unpack
- [0, 1] [28, 28] [1, 1] : tensor<28x32xf32> to tensor<28x28xf32>
- return %extracted_slice : tensor<28x28xf32>
+ % unpack =
+ linalg.unpack % src outer_dims_perm =
+ [ 0, 1 ] inner_dims_pos = [1] inner_tiles =
+ [16] into % dest : tensor<28x2x16xf32>->tensor<28x32xf32> %
+ extracted_slice =
+ tensor.extract_slice %
+ unpack[0, 1][28, 28][1, 1] : tensor<28x32xf32> to
+ tensor<28x28xf32> return %
+ extracted_slice
+ : tensor<28x28xf32>
}
// CHECK-LABEL: func @no_fold_extract_slice_into_unpack_non_zero_offset
diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir
index aa12778ffbf7f..8177f1ee98584 100644
--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@@ -32,90 +32,74 @@ func.func @index_parent() {
// -----
func.func @index_dim_lower_than_number_of_loops(%arg0: memref<f32>) {
- // expected-error @+6 {{op expected dim (2) to be lower than the number of loops (0) of the enclosing LinalgOp}}
- linalg.generic {
- indexing_maps = [ affine_map<() -> ()> ],
- iterator_types = []}
- outs(%arg0 : memref<f32>) {
- ^bb(%0: f32):
- linalg.index 2 : index
- linalg.yield %0 : f32
+ // expected-error @+6 {{op expected dim (2) to be lower than the number of
+ // loops (0) of the enclosing LinalgOp}}
+ linalg.generic{indexing_maps = [affine_map<()->()>],
+ iterator_types = []} outs(% arg0 : memref<f32>) {
+ ^bb(% 0 : f32) : linalg.index 2 : index linalg.yield % 0 : f32
}
}
// -----
-func.func @index_dim_negative(%arg0: memref<f32>) {
- // expected-error @+6 {{op attribute 'dim' failed to satisfy constraint: 64-bit signless integer attribute whose minimum value is 0}}
- linalg.generic {
- indexing_maps = [ affine_map<() -> ()> ],
- iterator_types = []}
- outs(%arg0 : memref<f32>) {
- ^bb(%0: f32):
- linalg.index -1 : index
- linalg.yield %0 : f32
+func.func @index_dim_negative(% arg0 : memref<f32>) {
+ // expected-error @+6 {{op attribute 'dim' failed to satisfy constraint:
+ // 64-bit signless integer attribute whose minimum value is 0}}
+ linalg.generic{indexing_maps = [affine_map<()->()>],
+ iterator_types = []} outs(% arg0 : memref<f32>) {
+ ^bb(% 0 : f32) : linalg.index - 1 : index linalg.yield % 0 : f32
}
}
// -----
-func.func @generic_no_region(%arg0: memref<f32>) {
- // expected-error @+4 {{expected '{' to begin a region}}
- linalg.generic {
- indexing_maps = [ affine_map<() -> (0)> ],
- iterator_types = []
- } ins(%arg0 : memref<f32>)
-}
+func.func @generic_no_region(% arg0 : memref<f32>){
+ // expected-error @+4 {{expected '{' to begin a region}}
+ linalg.generic{indexing_maps = [affine_map<()->(0)>],
+ iterator_types = []} ins(% arg0 : memref<f32>)}
// -----
-func.func @generic_mismatched_num_returns(%arg0: memref<f32>) {
- // expected-error @+6 {{op expected number of yield values (0) to match the number of inits / outs operands of the enclosing LinalgOp (1)}}
- linalg.generic {
- indexing_maps = [ affine_map<() -> ()> ],
- iterator_types = []}
- outs(%arg0 : memref<f32>) {
- ^bb(%0: f32):
- linalg.yield
+func.func @generic_mismatched_num_returns(% arg0 : memref<f32>) {
+ // expected-error @+6 {{op expected number of yield values (0) to match the
+ // number of inits / outs operands of the enclosing LinalgOp (1)}}
+ linalg.generic{indexing_maps = [affine_map<()->()>],
+ iterator_types = []} outs(% arg0 : memref<f32>) {
+ ^bb(% 0 : f32) : linalg.yield
}
}
// -----
-func.func @generic_wrong_dim_in_map(%arg0: memref<1xi32>) {
- // expected-error @+1 {{op expected indexing_map #0 to have 1 dim(s) to match the number of loops}}
- linalg.generic {
- indexing_maps = [ affine_map<() -> (0)> ],
- iterator_types = ["parallel"]}
- outs(%arg0 : memref<1xi32>) {
- ^bb(%i : i32):
- linalg.yield %i : i32
+func.func @generic_wrong_dim_in_map(% arg0 : memref<1xi32>) {
+ // expected-error @+1 {{op expected indexing_map #0 to have 1 dim(s) to match
+ // the number of loops}}
+ linalg.generic{indexing_maps = [affine_map<()->(0)>],
+ iterator_types = ["parallel"]} outs(% arg0 : memref<1xi32>) {
+ ^bb(% i : i32) : linalg.yield % i : i32
}
}
// -----
-func.func @generic_wrong_iterator(%arg0: memref<1xi32>) {
+func.func @generic_wrong_iterator(% arg0 : memref<1xi32>) {
// expected-error @+4 {{unexpected iterator_type (random)}}
- linalg.generic {
- indexing_maps = [ affine_map<(i) -> (i)> ],
- iterator_types = ["random"]}
- outs(%arg0 : memref<1xi32>) {
- ^bb(%i : i32):
- linalg.yield %i : i32
+ linalg.generic{indexing_maps = [affine_map<(i)->(i)>],
+ iterator_types = ["random"]} outs(% arg0 : memref<1xi32>) {
+ ^bb(% i : i32) : linalg.yield % i : i32
}
}
// -----
func.func @generic_one_d_view(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
- // expected-error @+1 {{expected operand rank (1) to match the result rank of indexing_map #0 (2)}}
+ // expected-error @+1 {{expected operand rank (1) to match the result rank of
+ // indexing_map #0 (2)}}
linalg.generic {
indexing_maps = [ affine_map<() -> (0, 0)> ],
iterator_types = []}
outs(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
- ^bb(%f : f32):
- linalg.yield %f: f32
+ ^bb(% f : f32) : linalg.yield % f : f32
}
}
@@ -129,22 +113,20 @@ func.func @generic_scalar_view(%arg0: memref<?xf32, affine_map<(i)[off]->(off +
iterator_types = []}
ins(%cst : f32)
outs(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
- ^bb(%0 : f32, %1 : f32):
- linalg.yield %0: f32
+ ^bb(% 0 : f32, % 1 : f32) : linalg.yield % 0 : f32
}
}
// -----
func.func @generic_result_0_element_type(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
- // expected-error @+7 {{'linalg.yield' op type of yield operand 1 ('i4') doesn't match the element type of the enclosing linalg.generic op ('f32')}}
+ // expected-error @+7 {{'linalg.yield' op type of yield operand 1 ('i4')
+ // doesn't match the element type of the enclosing linalg.generic op ('f32')}}
linalg.generic {
indexing_maps = [ affine_map<(i) -> (i)> ],
iterator_types = ["parallel"]}
outs(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
- ^bb(%0: f32):
- %1 = arith.constant 1: i4
- linalg.yield %1: i4
+ ^bb(% 0 : f32) : % 1 = arith.constant 1 : i4 linalg.yield % 1 : i4
}
}
@@ -160,8 +142,7 @@ func.func @generic_singular_maps(%arg0: memref<?xf32, affine_map<(i)[off]->(off
iterator_types = ["parallel","parallel"]}
ins(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>)
outs(%arg1 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
- ^bb(%0: f32, %1: f32):
- linalg.yield %1: f32
+ ^bb(% 0 : f32, % 1 : f32) : linalg.yield % 1 : f32
}
}
@@ -171,57 +152,53 @@ func.func @generic_singular_maps(%arg0: memref<?xf32, affine_map<(i)[off]->(off
// -----
-func.func @generic_empty_region(%arg0: memref<f32>) {
- %f0 = arith.constant 0.0: f32
- // expected-error @+1 {{op expects region #0 to have 0 or 1 blocks}}
- linalg.generic {
- indexing_maps = [ affine_map<() -> ()>, affine_map<() -> ()> ],
- iterator_types = []}
- ins(%arg0 : memref<f32>)
- outs(%arg0 : memref<f32>) {
- ^bb1:
- linalg.yield %f0: f32
- ^bb2:
- linalg.yield %f0: f32
+func.func @generic_empty_region(% arg0 : memref<f32>) {
+ % f0 = arith
+ .constant 0.0
+ : f32
+ // expected-error @+1 {{op expects region #0 to have 0 or 1 blocks}}
+ linalg.generic{indexing_maps =
+ [ affine_map<()->()>, affine_map<()->()> ],
+ iterator_types = []} ins(% arg0 : memref<f32>)
+ outs(% arg0 : memref<f32>) {
+ ^bb1 : linalg.yield % f0 : f32 ^ bb2 : linalg.yield % f0 : f32
}
}
// -----
-func.func @generic_empty_region(%arg0: memref<f32>) {
- %f0 = arith.constant 0.0: f32
- // expected-error @+1 {{op expects to have 1 region with 1 block}}
- linalg.generic {
- indexing_maps = [ affine_map<() -> ()> , affine_map<() -> ()> ],
- iterator_types = []}
- ins(%arg0 : memref<f32>)
- outs(%arg0 : memref<f32>) {
- }
+func.func @generic_empty_region(% arg0 : memref<f32>) {
+ % f0 = arith
+ .constant 0.0
+ : f32
+ // expected-error @+1 {{op expects to have 1 region with 1 block}}
+ linalg.generic{indexing_maps =
+ [ affine_map<()->()>, affine_map<()->()> ],
+ iterator_types = []} ins(% arg0 : memref<f32>)
+ outs(% arg0 : memref<f32>) {}
}
// -----
-func.func @generic_mismatched_num_arguments(%arg0: memref<f32>) {
- // expected-error @+6 {{'linalg.yield' op expected number of yield values (1) to match the number of inits / outs operands of the enclosing LinalgOp (2)}}
- linalg.generic {
- indexing_maps = [ affine_map<() -> ()>, affine_map<() -> ()> ],
- iterator_types = []}
- outs(%arg0, %arg0 : memref<f32>, memref<f32>) {
- ^bb(%f: f32):
- linalg.yield %f: f32
+func.func @generic_mismatched_num_arguments(% arg0 : memref<f32>) {
+ // expected-error @+6 {{'linalg.yield' op expected number of yield values (1)
+ // to match the number of inits / outs operands of the enclosing LinalgOp
+ // (2)}}
+ linalg.generic{indexing_maps = [ affine_map<()->()>, affine_map<()->()> ],
+ iterator_types = []} outs(% arg0, % arg0 : memref<f32>,
+ memref<f32>) {
+ ^bb(% f : f32) : linalg.yield % f : f32
}
}
// -----
-func.func @generic_shaped_operand_block_arg_type(%arg0: memref<f32>) {
- // expected-error @+6 {{'linalg.yield' op type of yield operand 1 ('i1') doesn't match the element type of the enclosing linalg.generic op ('f32')}}
- linalg.generic {
- indexing_maps = [ affine_map<() -> ()> ],
- iterator_types = []}
- outs(%arg0 : memref<f32>) {
- ^bb(%i: i1):
- linalg.yield %i : i1
+func.func @generic_shaped_operand_block_arg_type(% arg0 : memref<f32>) {
+ // expected-error @+6 {{'linalg.yield' op type of yield operand 1 ('i1')
+ // doesn't match the element type of the enclosing linalg.generic op ('f32')}}
+ linalg.generic{indexing_maps = [affine_map<()->()>],
+ iterator_types = []} outs(% arg0 : memref<f32>) {
+ ^bb(% i : i1) : linalg.yield % i : i1
}
}
@@ -241,14 +218,13 @@ func.func @generic_scalar_operand_block_arg_type(%arg0: tensor<f32>) {
// -----
func.func @generic_result_0_element_type(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
- // expected-error @+7 {{type of yield operand 1 ('i1') doesn't match the element type of the enclosing linalg.generic op ('f32')}}
+ // expected-error @+7 {{type of yield operand 1 ('i1') doesn't match the
+ // element type of the enclosing linalg.generic op ('f32')}}
linalg.generic {
indexing_maps = [ affine_map<(i) -> (i)> ],
iterator_types = ["parallel"]}
outs(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
- ^bb(%i: f32):
- %0 = arith.constant 0: i1
- linalg.yield %0: i1
+ ^bb(% i : f32) : % 0 = arith.constant 0 : i1 linalg.yield % 0 : i1
}
}
@@ -664,83 +640,96 @@ func.func @invalid_static_2d_conv(%input : memref<1x3x4x2xf32>, %filter: memref<
iterator_types = ["parallel"]
}
-func.func @invalid_reverse(%A: memref<5xf32>, %B: memref<5xf32>) {
- // expected-error @+1 {{unexpected result less than 0 at expression #0 in}}
- linalg.generic #attrs ins(%A: memref<5xf32>) outs(%B: memref<5xf32>) {
- ^bb0(%a: f32, %b: f32):
- linalg.yield %a : f32
- }
- return
-}
-
-// -----
-
-func.func @map_binary_wrong_yield_operands(
- %lhs: tensor<64xf32>, %rhs: tensor<64xf32>, %init: tensor<64xf32>)
- -> tensor<64xf32> {
- %add = linalg.map
- ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>)
- outs(%init:tensor<64xf32>)
- (%lhs_elem: f32, %rhs_elem: f32) {
- %0 = arith.addf %lhs_elem, %rhs_elem: f32
- // expected-error @+1{{'linalg.yield' op expected number of yield values (2) to match the number of inits / outs operands of the enclosing LinalgOp (1)}}
- linalg.yield %0, %0: f32, f32
- }
- func.return %add : tensor<64xf32>
-}
-
-// -----
-
-func.func @map_input_mapper_arity_mismatch(
- %lhs: tensor<64xf32>, %rhs: tensor<64xf32>, %init: tensor<64xf32>)
- -> tensor<64xf32> {
- // expected-error at +1{{'linalg.map' op expects number of operands to match the arity of mapper, but got: 2 and 3}}
- %add = linalg.map
- ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>)
- outs(%init:tensor<64xf32>)
- (%lhs_elem: f32, %rhs_elem: f32, %extra_elem: f32) {
- %0 = arith.addf %lhs_elem, %rhs_elem: f32
- linalg.yield %0: f32
- }
- func.return %add : tensor<64xf32>
-}
-
-// -----
-
-func.func @map_input_mapper_type_mismatch(
- %lhs: tensor<64xf32>, %rhs: tensor<64xf32>, %init: tensor<64xf32>)
- -> tensor<64xf32> {
- // expected-error at +1{{'linalg.map' op expected element type of input 'f32' to match bbArg type 'f64'}}
- %add = linalg.map
- ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>)
- outs(%init:tensor<64xf32>)
- (%lhs_elem: f64, %rhs_elem: f64) {
- %0 = arith.addf %lhs_elem, %rhs_elem: f64
- linalg.yield %0: f64
- }
- func.return %add : tensor<64xf32>
-}
-
-// -----
-
-func.func @map_input_output_shape_mismatch(
- %lhs: tensor<64x64xf32>, %rhs: tensor<64x64xf32>, %init: tensor<32xf32>)
- -> tensor<32xf32> {
- // expected-error at +1{{'linalg.map' op expected shape of input (64, 64) to match shape of output (32)}}
- %add = linalg.map
- ins(%lhs, %rhs : tensor<64x64xf32>, tensor<64x64xf32>)
- outs(%init:tensor<32xf32>)
- (%lhs_elem: f32, %rhs_elem: f32) {
- %0 = arith.addf %lhs_elem, %rhs_elem: f32
- linalg.yield %0: f32
- }
- func.return %add : tensor<32xf32>
-}
+func.func @invalid_reverse(% A : memref<5xf32>, % B : memref<5xf32>){
+ // expected-error @+1 {{unexpected result less than 0 at expression #0 in}}
+ linalg.generic #attrs ins(% A : memref<5xf32>) outs(% B : memref<5xf32>){
+ ^bb0(% a : f32, % b : f32) : linalg.yield % a : f32
+ } return }
+
+// -----
+
+func.func @map_binary_wrong_yield_operands(% lhs : tensor<64xf32>,
+ % rhs : tensor<64xf32>,
+ % init : tensor<64xf32>)
+ ->tensor<64xf32>{
+ % add =
+ linalg
+ .map ins(% lhs, % rhs : tensor<64xf32>, tensor<64xf32>) outs(
+ % init : tensor<64xf32>)(% lhs_elem : f32, % rhs_elem : f32){
+ % 0 = arith.addf % lhs_elem,
+ %
+ rhs_elem : f32
+ // expected-error @+1{{'linalg.yield' op expected number
+ // of yield values (2) to match the number of inits /
+ // outs operands of the enclosing LinalgOp (1)}}
+ linalg.yield %
+ 0,
+ % 0 : f32,
+ f32
+ } func.return %
+ add : tensor<64xf32>
+ }
+
+// -----
+
+func.func @map_input_mapper_arity_mismatch(% lhs : tensor<64xf32>,
+ % rhs : tensor<64xf32>,
+ % init : tensor<64xf32>)
+ ->tensor<64xf32>{
+ // expected-error at +1{{'linalg.map' op expects number of operands to match
+ // the arity of mapper, but got: 2 and 3}}
+ % add = linalg
+ .map ins(% lhs, % rhs : tensor<64xf32>, tensor<64xf32>)
+ outs(% init : tensor<64xf32>)(% lhs_elem : f32,
+ % rhs_elem : f32,
+ % extra_elem : f32){
+ % 0 = arith.addf % lhs_elem,
+ % rhs_elem : f32 linalg.yield % 0 : f32
+ } func.return %
+ add : tensor<64xf32>
+ }
+
+// -----
+
+func.func @map_input_mapper_type_mismatch(% lhs : tensor<64xf32>,
+ % rhs : tensor<64xf32>,
+ % init : tensor<64xf32>)
+ ->tensor<64xf32>{
+ // expected-error at +1{{'linalg.map' op expected element type of input 'f32'
+ // to match bbArg type 'f64'}}
+ % add = linalg
+ .map ins(% lhs, % rhs : tensor<64xf32>, tensor<64xf32>)
+ outs(% init : tensor<64xf32>)(% lhs_elem : f64,
+ % rhs_elem : f64){
+ % 0 = arith.addf % lhs_elem,
+ % rhs_elem : f64 linalg.yield % 0 : f64
+ } func.return %
+ add : tensor<64xf32>
+ }
+
+// -----
+
+func.func @map_input_output_shape_mismatch(% lhs : tensor<64x64xf32>,
+ % rhs : tensor<64x64xf32>,
+ % init : tensor<32xf32>)
+ ->tensor<32xf32>{
+ // expected-error at +1{{'linalg.map' op expected shape of input (64, 64) to
+ // match shape of output (32)}}
+ % add = linalg
+ .map ins(% lhs, % rhs : tensor<64x64xf32>, tensor<64x64xf32>)
+ outs(% init : tensor<32xf32>)(% lhs_elem : f32,
+ % rhs_elem : f32){
+ % 0 = arith.addf % lhs_elem,
+ % rhs_elem : f32 linalg.yield % 0 : f32
+ } func.return %
+ add : tensor<32xf32>
+ }
// -----
func.func @map_no_operands1() {
- // expected-error @+1 {{'linalg.map' op expected 1 or more operands, but found 0}}
+ // expected-error @+1 {{'linalg.map' op expected 1 or more operands, but found
+ // 0}}
linalg.map { arith.addf }
}
@@ -1676,7 +1665,6 @@ func.func @pack_source_dest_type_mismatch_1(%source: tensor<128x256xf32>, %dest:
return
}
-
// -----
func.func @pack_source_dest_type_mismatch_2(%source: memref<128x256xf32>, %dest: tensor<8x16x8x32xf32>) {
@@ -1699,7 +1687,7 @@ func.func @unpack_source_dest_type_mismatch_1(%source: tensor<16x8x8x32xf32>, %d
func.func @unpack_source_dest_type_mismatch_1(%source: memref<16x8x8x32xf32>, %dest: tensor<128x256xf32>) {
// expected-error at +1 {{mixing tensor and buffer semantics is not allowed}}
- %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
- into %dest : memref<16x8x8x32xf32> -> tensor<128x256xf32>
- return
+ % 0 = linalg.unpack % source inner_dims_pos = [ 0, 1 ] inner_tiles =
+ [ 8, 32 ] into %
+ dest : memref<16x8x8x32xf32>->tensor<128x256xf32> return
}
>From 021f88098a8649b2be30c068b8889a998a26362f Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <ita9naiwa at gmail.com>
Date: Sun, 13 Apr 2025 12:48:01 +0900
Subject: [PATCH 30/32] nit
---
mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h | 2 +-
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 9 ---------
mlir/test/Dialect/Linalg/invalid.mlir | 2 +-
3 files changed, 2 insertions(+), 11 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h b/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h
index 99c80a2196567..3af89a6ab3799 100644
--- a/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h
+++ b/mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h
@@ -451,7 +451,7 @@ getLinearizedDimensions(ArrayRef<ReassociationIndices> reassociationIndices);
/// %4 = tensor.extract_slice %0 [%3#0, %3#1, %3#2, 0] [1, 1, 1, 10] [1, 1, 1, 1] :
/// tensor<3x7x11x10xf32> to tensor<1x1x1x10xf32>
///
-/// %5 = tensor.collapse_shape %4 [[0, 1, 2], [3]] :
+/// %5 = tensor.collapse_shape %4 [[0, 1, 2], [3]] :
/// tensor<1x1x1x10xf32> into tensor<1x10xf32>
/// %6 = tensor.insert_slice %5 into %arg0 [%iv, 0] [1, 10] [1, 1] :
/// tensor<1x10xf32> into tensor<10x10xf32>
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index f01e2f96e19d6..2aff7b67ce6dd 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -5042,15 +5042,6 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
// Insert a cast if needed
if (needUpdateDestType) {
rewriter.setInsertionPointAfter(packOp);
- // if (hasTensorSemantics) {
- // auto castOp =
- // rewriter.create<tensor::CastOp>(loc, originalResultType, packOp);
- // rewriter.replaceAllUsesExcept(packOp, castOp, castOp);
- // } else {
- // auto castOp =
- // rewriter.create<memref::CastOp>(loc, originalResultType, packOp);
- // rewriter.replaceAllUsesExcept(packOp, castOp, castOp);
- // }
Operation *castOp;
if (hasTensorSemantics) {
castOp =
diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir
index 8177f1ee98584..852180aa28055 100644
--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@@ -662,7 +662,7 @@ func.func @map_binary_wrong_yield_operands(% lhs : tensor<64xf32>,
// expected-error @+1{{'linalg.yield' op expected number
// of yield values (2) to match the number of inits /
// outs operands of the enclosing LinalgOp (1)}}
- linalg.yield %
+ linalg.yield %
0,
% 0 : f32,
f32
>From 3115f4c096fae9e88f9ffcb71e6b26615b46c430 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <ita9naiwa at gmail.com>
Date: Sun, 13 Apr 2025 13:41:10 +0900
Subject: [PATCH 31/32] revert
---
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 9 +-
mlir/test/Dialect/Linalg/invalid.mlir | 342 ++++++++++++-----------
2 files changed, 182 insertions(+), 169 deletions(-)
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index 2aff7b67ce6dd..82eb513ff940c 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -5013,10 +5013,7 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
return success();
}
- // Insert tensor.cast if static shape inference is available..
- bool hasTensorSemantics = packOp.hasPureTensorSemantics();
-
- // TODO: support memref.cast if static shape inference is available.
+ // Insert tensor.cast ops if static shape inference is available..
SmallVector<int64_t> srcShape, destShape;
if (inferStaticShape(packOp, srcShape, destShape)) {
Location loc = packOp.getLoc();
@@ -5043,6 +5040,7 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
if (needUpdateDestType) {
rewriter.setInsertionPointAfter(packOp);
Operation *castOp;
+ bool hasTensorSemantics = packOp.hasPureTensorSemantics();
if (hasTensorSemantics) {
castOp =
rewriter.create<tensor::CastOp>(loc, originalResultType, packOp);
@@ -5051,6 +5049,9 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
rewriter.create<memref::CastOp>(loc, originalResultType, packOp);
}
rewriter.replaceAllUsesExcept(packOp, castOp->getResult(0), castOp);
+ } else {
+ // TODO: support memref.cast if static shape inference is available.
+ return failure();
}
return success();
}
diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir
index 852180aa28055..b25d81c71ae1f 100644
--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@@ -32,74 +32,90 @@ func.func @index_parent() {
// -----
func.func @index_dim_lower_than_number_of_loops(%arg0: memref<f32>) {
- // expected-error @+6 {{op expected dim (2) to be lower than the number of
- // loops (0) of the enclosing LinalgOp}}
- linalg.generic{indexing_maps = [affine_map<()->()>],
- iterator_types = []} outs(% arg0 : memref<f32>) {
- ^bb(% 0 : f32) : linalg.index 2 : index linalg.yield % 0 : f32
+ // expected-error @+6 {{op expected dim (2) to be lower than the number of loops (0) of the enclosing LinalgOp}}
+ linalg.generic {
+ indexing_maps = [ affine_map<() -> ()> ],
+ iterator_types = []}
+ outs(%arg0 : memref<f32>) {
+ ^bb(%0: f32):
+ linalg.index 2 : index
+ linalg.yield %0 : f32
}
}
// -----
-func.func @index_dim_negative(% arg0 : memref<f32>) {
- // expected-error @+6 {{op attribute 'dim' failed to satisfy constraint:
- // 64-bit signless integer attribute whose minimum value is 0}}
- linalg.generic{indexing_maps = [affine_map<()->()>],
- iterator_types = []} outs(% arg0 : memref<f32>) {
- ^bb(% 0 : f32) : linalg.index - 1 : index linalg.yield % 0 : f32
+func.func @index_dim_negative(%arg0: memref<f32>) {
+ // expected-error @+6 {{op attribute 'dim' failed to satisfy constraint: 64-bit signless integer attribute whose minimum value is 0}}
+ linalg.generic {
+ indexing_maps = [ affine_map<() -> ()> ],
+ iterator_types = []}
+ outs(%arg0 : memref<f32>) {
+ ^bb(%0: f32):
+ linalg.index -1 : index
+ linalg.yield %0 : f32
}
}
// -----
-func.func @generic_no_region(% arg0 : memref<f32>){
- // expected-error @+4 {{expected '{' to begin a region}}
- linalg.generic{indexing_maps = [affine_map<()->(0)>],
- iterator_types = []} ins(% arg0 : memref<f32>)}
+func.func @generic_no_region(%arg0: memref<f32>) {
+ // expected-error @+4 {{expected '{' to begin a region}}
+ linalg.generic {
+ indexing_maps = [ affine_map<() -> (0)> ],
+ iterator_types = []
+ } ins(%arg0 : memref<f32>)
+}
// -----
-func.func @generic_mismatched_num_returns(% arg0 : memref<f32>) {
- // expected-error @+6 {{op expected number of yield values (0) to match the
- // number of inits / outs operands of the enclosing LinalgOp (1)}}
- linalg.generic{indexing_maps = [affine_map<()->()>],
- iterator_types = []} outs(% arg0 : memref<f32>) {
- ^bb(% 0 : f32) : linalg.yield
+func.func @generic_mismatched_num_returns(%arg0: memref<f32>) {
+ // expected-error @+6 {{op expected number of yield values (0) to match the number of inits / outs operands of the enclosing LinalgOp (1)}}
+ linalg.generic {
+ indexing_maps = [ affine_map<() -> ()> ],
+ iterator_types = []}
+ outs(%arg0 : memref<f32>) {
+ ^bb(%0: f32):
+ linalg.yield
}
}
// -----
-func.func @generic_wrong_dim_in_map(% arg0 : memref<1xi32>) {
- // expected-error @+1 {{op expected indexing_map #0 to have 1 dim(s) to match
- // the number of loops}}
- linalg.generic{indexing_maps = [affine_map<()->(0)>],
- iterator_types = ["parallel"]} outs(% arg0 : memref<1xi32>) {
- ^bb(% i : i32) : linalg.yield % i : i32
+func.func @generic_wrong_dim_in_map(%arg0: memref<1xi32>) {
+ // expected-error @+1 {{op expected indexing_map #0 to have 1 dim(s) to match the number of loops}}
+ linalg.generic {
+ indexing_maps = [ affine_map<() -> (0)> ],
+ iterator_types = ["parallel"]}
+ outs(%arg0 : memref<1xi32>) {
+ ^bb(%i : i32):
+ linalg.yield %i : i32
}
}
// -----
-func.func @generic_wrong_iterator(% arg0 : memref<1xi32>) {
+func.func @generic_wrong_iterator(%arg0: memref<1xi32>) {
// expected-error @+4 {{unexpected iterator_type (random)}}
- linalg.generic{indexing_maps = [affine_map<(i)->(i)>],
- iterator_types = ["random"]} outs(% arg0 : memref<1xi32>) {
- ^bb(% i : i32) : linalg.yield % i : i32
+ linalg.generic {
+ indexing_maps = [ affine_map<(i) -> (i)> ],
+ iterator_types = ["random"]}
+ outs(%arg0 : memref<1xi32>) {
+ ^bb(%i : i32):
+ linalg.yield %i : i32
}
}
// -----
func.func @generic_one_d_view(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
- // expected-error @+1 {{expected operand rank (1) to match the result rank of
- // indexing_map #0 (2)}}
+ // expected-error @+1 {{expected operand rank (1) to match the result rank of indexing_map #0 (2)}}
linalg.generic {
indexing_maps = [ affine_map<() -> (0, 0)> ],
iterator_types = []}
outs(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
- ^bb(% f : f32) : linalg.yield % f : f32
+ ^bb(%f : f32):
+ linalg.yield %f: f32
}
}
@@ -113,20 +129,22 @@ func.func @generic_scalar_view(%arg0: memref<?xf32, affine_map<(i)[off]->(off +
iterator_types = []}
ins(%cst : f32)
outs(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
- ^bb(% 0 : f32, % 1 : f32) : linalg.yield % 0 : f32
+ ^bb(%0 : f32, %1 : f32):
+ linalg.yield %0: f32
}
}
// -----
func.func @generic_result_0_element_type(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
- // expected-error @+7 {{'linalg.yield' op type of yield operand 1 ('i4')
- // doesn't match the element type of the enclosing linalg.generic op ('f32')}}
+ // expected-error @+7 {{'linalg.yield' op type of yield operand 1 ('i4') doesn't match the element type of the enclosing linalg.generic op ('f32')}}
linalg.generic {
indexing_maps = [ affine_map<(i) -> (i)> ],
iterator_types = ["parallel"]}
outs(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
- ^bb(% 0 : f32) : % 1 = arith.constant 1 : i4 linalg.yield % 1 : i4
+ ^bb(%0: f32):
+ %1 = arith.constant 1: i4
+ linalg.yield %1: i4
}
}
@@ -142,7 +160,8 @@ func.func @generic_singular_maps(%arg0: memref<?xf32, affine_map<(i)[off]->(off
iterator_types = ["parallel","parallel"]}
ins(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>)
outs(%arg1 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
- ^bb(% 0 : f32, % 1 : f32) : linalg.yield % 1 : f32
+ ^bb(%0: f32, %1: f32):
+ linalg.yield %1: f32
}
}
@@ -152,53 +171,57 @@ func.func @generic_singular_maps(%arg0: memref<?xf32, affine_map<(i)[off]->(off
// -----
-func.func @generic_empty_region(% arg0 : memref<f32>) {
- % f0 = arith
- .constant 0.0
- : f32
- // expected-error @+1 {{op expects region #0 to have 0 or 1 blocks}}
- linalg.generic{indexing_maps =
- [ affine_map<()->()>, affine_map<()->()> ],
- iterator_types = []} ins(% arg0 : memref<f32>)
- outs(% arg0 : memref<f32>) {
- ^bb1 : linalg.yield % f0 : f32 ^ bb2 : linalg.yield % f0 : f32
+func.func @generic_empty_region(%arg0: memref<f32>) {
+ %f0 = arith.constant 0.0: f32
+ // expected-error @+1 {{op expects region #0 to have 0 or 1 blocks}}
+ linalg.generic {
+ indexing_maps = [ affine_map<() -> ()>, affine_map<() -> ()> ],
+ iterator_types = []}
+ ins(%arg0 : memref<f32>)
+ outs(%arg0 : memref<f32>) {
+ ^bb1:
+ linalg.yield %f0: f32
+ ^bb2:
+ linalg.yield %f0: f32
}
}
// -----
-func.func @generic_empty_region(% arg0 : memref<f32>) {
- % f0 = arith
- .constant 0.0
- : f32
- // expected-error @+1 {{op expects to have 1 region with 1 block}}
- linalg.generic{indexing_maps =
- [ affine_map<()->()>, affine_map<()->()> ],
- iterator_types = []} ins(% arg0 : memref<f32>)
- outs(% arg0 : memref<f32>) {}
+func.func @generic_empty_region(%arg0: memref<f32>) {
+ %f0 = arith.constant 0.0: f32
+ // expected-error @+1 {{op expects to have 1 region with 1 block}}
+ linalg.generic {
+ indexing_maps = [ affine_map<() -> ()> , affine_map<() -> ()> ],
+ iterator_types = []}
+ ins(%arg0 : memref<f32>)
+ outs(%arg0 : memref<f32>) {
+ }
}
// -----
-func.func @generic_mismatched_num_arguments(% arg0 : memref<f32>) {
- // expected-error @+6 {{'linalg.yield' op expected number of yield values (1)
- // to match the number of inits / outs operands of the enclosing LinalgOp
- // (2)}}
- linalg.generic{indexing_maps = [ affine_map<()->()>, affine_map<()->()> ],
- iterator_types = []} outs(% arg0, % arg0 : memref<f32>,
- memref<f32>) {
- ^bb(% f : f32) : linalg.yield % f : f32
+func.func @generic_mismatched_num_arguments(%arg0: memref<f32>) {
+ // expected-error @+6 {{'linalg.yield' op expected number of yield values (1) to match the number of inits / outs operands of the enclosing LinalgOp (2)}}
+ linalg.generic {
+ indexing_maps = [ affine_map<() -> ()>, affine_map<() -> ()> ],
+ iterator_types = []}
+ outs(%arg0, %arg0 : memref<f32>, memref<f32>) {
+ ^bb(%f: f32):
+ linalg.yield %f: f32
}
}
// -----
-func.func @generic_shaped_operand_block_arg_type(% arg0 : memref<f32>) {
- // expected-error @+6 {{'linalg.yield' op type of yield operand 1 ('i1')
- // doesn't match the element type of the enclosing linalg.generic op ('f32')}}
- linalg.generic{indexing_maps = [affine_map<()->()>],
- iterator_types = []} outs(% arg0 : memref<f32>) {
- ^bb(% i : i1) : linalg.yield % i : i1
+func.func @generic_shaped_operand_block_arg_type(%arg0: memref<f32>) {
+ // expected-error @+6 {{'linalg.yield' op type of yield operand 1 ('i1') doesn't match the element type of the enclosing linalg.generic op ('f32')}}
+ linalg.generic {
+ indexing_maps = [ affine_map<() -> ()> ],
+ iterator_types = []}
+ outs(%arg0 : memref<f32>) {
+ ^bb(%i: i1):
+ linalg.yield %i : i1
}
}
@@ -218,13 +241,14 @@ func.func @generic_scalar_operand_block_arg_type(%arg0: tensor<f32>) {
// -----
func.func @generic_result_0_element_type(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
- // expected-error @+7 {{type of yield operand 1 ('i1') doesn't match the
- // element type of the enclosing linalg.generic op ('f32')}}
+ // expected-error @+7 {{type of yield operand 1 ('i1') doesn't match the element type of the enclosing linalg.generic op ('f32')}}
linalg.generic {
indexing_maps = [ affine_map<(i) -> (i)> ],
iterator_types = ["parallel"]}
outs(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
- ^bb(% i : f32) : % 0 = arith.constant 0 : i1 linalg.yield % 0 : i1
+ ^bb(%i: f32):
+ %0 = arith.constant 0: i1
+ linalg.yield %0: i1
}
}
@@ -640,96 +664,83 @@ func.func @invalid_static_2d_conv(%input : memref<1x3x4x2xf32>, %filter: memref<
iterator_types = ["parallel"]
}
-func.func @invalid_reverse(% A : memref<5xf32>, % B : memref<5xf32>){
- // expected-error @+1 {{unexpected result less than 0 at expression #0 in}}
- linalg.generic #attrs ins(% A : memref<5xf32>) outs(% B : memref<5xf32>){
- ^bb0(% a : f32, % b : f32) : linalg.yield % a : f32
- } return }
-
-// -----
-
-func.func @map_binary_wrong_yield_operands(% lhs : tensor<64xf32>,
- % rhs : tensor<64xf32>,
- % init : tensor<64xf32>)
- ->tensor<64xf32>{
- % add =
- linalg
- .map ins(% lhs, % rhs : tensor<64xf32>, tensor<64xf32>) outs(
- % init : tensor<64xf32>)(% lhs_elem : f32, % rhs_elem : f32){
- % 0 = arith.addf % lhs_elem,
- %
- rhs_elem : f32
- // expected-error @+1{{'linalg.yield' op expected number
- // of yield values (2) to match the number of inits /
- // outs operands of the enclosing LinalgOp (1)}}
- linalg.yield %
- 0,
- % 0 : f32,
- f32
- } func.return %
- add : tensor<64xf32>
- }
-
-// -----
-
-func.func @map_input_mapper_arity_mismatch(% lhs : tensor<64xf32>,
- % rhs : tensor<64xf32>,
- % init : tensor<64xf32>)
- ->tensor<64xf32>{
- // expected-error at +1{{'linalg.map' op expects number of operands to match
- // the arity of mapper, but got: 2 and 3}}
- % add = linalg
- .map ins(% lhs, % rhs : tensor<64xf32>, tensor<64xf32>)
- outs(% init : tensor<64xf32>)(% lhs_elem : f32,
- % rhs_elem : f32,
- % extra_elem : f32){
- % 0 = arith.addf % lhs_elem,
- % rhs_elem : f32 linalg.yield % 0 : f32
- } func.return %
- add : tensor<64xf32>
- }
-
-// -----
-
-func.func @map_input_mapper_type_mismatch(% lhs : tensor<64xf32>,
- % rhs : tensor<64xf32>,
- % init : tensor<64xf32>)
- ->tensor<64xf32>{
- // expected-error at +1{{'linalg.map' op expected element type of input 'f32'
- // to match bbArg type 'f64'}}
- % add = linalg
- .map ins(% lhs, % rhs : tensor<64xf32>, tensor<64xf32>)
- outs(% init : tensor<64xf32>)(% lhs_elem : f64,
- % rhs_elem : f64){
- % 0 = arith.addf % lhs_elem,
- % rhs_elem : f64 linalg.yield % 0 : f64
- } func.return %
- add : tensor<64xf32>
- }
-
-// -----
-
-func.func @map_input_output_shape_mismatch(% lhs : tensor<64x64xf32>,
- % rhs : tensor<64x64xf32>,
- % init : tensor<32xf32>)
- ->tensor<32xf32>{
- // expected-error at +1{{'linalg.map' op expected shape of input (64, 64) to
- // match shape of output (32)}}
- % add = linalg
- .map ins(% lhs, % rhs : tensor<64x64xf32>, tensor<64x64xf32>)
- outs(% init : tensor<32xf32>)(% lhs_elem : f32,
- % rhs_elem : f32){
- % 0 = arith.addf % lhs_elem,
- % rhs_elem : f32 linalg.yield % 0 : f32
- } func.return %
- add : tensor<32xf32>
- }
+func.func @invalid_reverse(%A: memref<5xf32>, %B: memref<5xf32>) {
+ // expected-error @+1 {{unexpected result less than 0 at expression #0 in}}
+ linalg.generic #attrs ins(%A: memref<5xf32>) outs(%B: memref<5xf32>) {
+ ^bb0(%a: f32, %b: f32):
+ linalg.yield %a : f32
+ }
+ return
+}
+
+// -----
+
+func.func @map_binary_wrong_yield_operands(
+ %lhs: tensor<64xf32>, %rhs: tensor<64xf32>, %init: tensor<64xf32>)
+ -> tensor<64xf32> {
+ %add = linalg.map
+ ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>)
+ outs(%init:tensor<64xf32>)
+ (%lhs_elem: f32, %rhs_elem: f32) {
+ %0 = arith.addf %lhs_elem, %rhs_elem: f32
+ // expected-error @+1{{'linalg.yield' op expected number of yield values (2) to match the number of inits / outs operands of the enclosing LinalgOp (1)}}
+ linalg.yield %0, %0: f32, f32
+ }
+ func.return %add : tensor<64xf32>
+}
+
+// -----
+
+func.func @map_input_mapper_arity_mismatch(
+ %lhs: tensor<64xf32>, %rhs: tensor<64xf32>, %init: tensor<64xf32>)
+ -> tensor<64xf32> {
+ // expected-error at +1{{'linalg.map' op expects number of operands to match the arity of mapper, but got: 2 and 3}}
+ %add = linalg.map
+ ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>)
+ outs(%init:tensor<64xf32>)
+ (%lhs_elem: f32, %rhs_elem: f32, %extra_elem: f32) {
+ %0 = arith.addf %lhs_elem, %rhs_elem: f32
+ linalg.yield %0: f32
+ }
+ func.return %add : tensor<64xf32>
+}
+
+// -----
+
+func.func @map_input_mapper_type_mismatch(
+ %lhs: tensor<64xf32>, %rhs: tensor<64xf32>, %init: tensor<64xf32>)
+ -> tensor<64xf32> {
+ // expected-error at +1{{'linalg.map' op expected element type of input 'f32' to match bbArg type 'f64'}}
+ %add = linalg.map
+ ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>)
+ outs(%init:tensor<64xf32>)
+ (%lhs_elem: f64, %rhs_elem: f64) {
+ %0 = arith.addf %lhs_elem, %rhs_elem: f64
+ linalg.yield %0: f64
+ }
+ func.return %add : tensor<64xf32>
+}
+
+// -----
+
+func.func @map_input_output_shape_mismatch(
+ %lhs: tensor<64x64xf32>, %rhs: tensor<64x64xf32>, %init: tensor<32xf32>)
+ -> tensor<32xf32> {
+ // expected-error at +1{{'linalg.map' op expected shape of input (64, 64) to match shape of output (32)}}
+ %add = linalg.map
+ ins(%lhs, %rhs : tensor<64x64xf32>, tensor<64x64xf32>)
+ outs(%init:tensor<32xf32>)
+ (%lhs_elem: f32, %rhs_elem: f32) {
+ %0 = arith.addf %lhs_elem, %rhs_elem: f32
+ linalg.yield %0: f32
+ }
+ func.return %add : tensor<32xf32>
+}
// -----
func.func @map_no_operands1() {
- // expected-error @+1 {{'linalg.map' op expected 1 or more operands, but found
- // 0}}
+ // expected-error @+1 {{'linalg.map' op expected 1 or more operands, but found 0}}
linalg.map { arith.addf }
}
@@ -1665,6 +1676,7 @@ func.func @pack_source_dest_type_mismatch_1(%source: tensor<128x256xf32>, %dest:
return
}
+
// -----
func.func @pack_source_dest_type_mismatch_2(%source: memref<128x256xf32>, %dest: tensor<8x16x8x32xf32>) {
@@ -1687,7 +1699,7 @@ func.func @unpack_source_dest_type_mismatch_1(%source: tensor<16x8x8x32xf32>, %d
func.func @unpack_source_dest_type_mismatch_1(%source: memref<16x8x8x32xf32>, %dest: tensor<128x256xf32>) {
// expected-error at +1 {{mixing tensor and buffer semantics is not allowed}}
- % 0 = linalg.unpack % source inner_dims_pos = [ 0, 1 ] inner_tiles =
- [ 8, 32 ] into %
- dest : memref<16x8x8x32xf32>->tensor<128x256xf32> return
-}
+ %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
+ into %dest : memref<16x8x8x32xf32> -> tensor<128x256xf32>
+ return
+}
\ No newline at end of file
>From 4557fdeda980fabed1b0da75b52339ca1b4a93c4 Mon Sep 17 00:00:00 2001
From: Hyunsung Lee <ita9naiwa at gmail.com>
Date: Sun, 13 Apr 2025 13:43:41 +0900
Subject: [PATCH 32/32] revert
---
mlir/test/Dialect/Linalg/canonicalize.mlir | 235 +++++----------------
1 file changed, 55 insertions(+), 180 deletions(-)
diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
index eafbb99caecaa..8ad008d8bbebd 100644
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -1,42 +1,30 @@
-// RUN: mlir-opt %s -canonicalize="test-convergence" -split-input-file |
-// FileCheck %s
+// RUN: mlir-opt %s -canonicalize="test-convergence" -split-input-file | FileCheck %s
// CHECK-LABEL: func @memref_cast(
-func.func @memref_cast(% a : index, % b : index)->memref < ? x ? xf32>{
- % c0 = arith.constant 0 : index %
- c1 = arith.constant 1 : index %
- c8 = arith.constant 8 : index %
- c16 = arith.constant 16 : index %
- 1 = memref.alloc(% b) : memref <
- ? xi8 > % 2 = memref.view % 1 [% c0][]
- : memref < ? xi8 > to memref<16x16xf32> %
- 3 = memref.cast % 2
- : memref<16x16xf32> to memref <
- ? x
- ? xf32 >
-
- // CHECK: linalg.matmul
- // ins({{.*}}memref<16x16xf32>,
- // memref<16x16xf32>)
- // outs({{.*}}memref<16x16xf32>)
- linalg.matmul ins(
- % 3, % 3 : memref < ? x ? xf32 >,
- memref < ? x
- ? xf32 >)
- outs(% 3 : memref <
- ? x ? xf32 >) return % 3
- : memref <
- ? x
- ? xf32 > }
+func.func @memref_cast(%a: index, %b: index) -> memref<?x?xf32> {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c8 = arith.constant 8 : index
+ %c16 = arith.constant 16 : index
+ %1 = memref.alloc (%b) : memref<?xi8>
+ %2 = memref.view %1[%c0][] : memref<?xi8> to memref<16x16xf32>
+ %3 = memref.cast %2 : memref<16x16xf32> to memref<?x?xf32>
+
+ // CHECK: linalg.matmul ins({{.*}}memref<16x16xf32>, memref<16x16xf32>) outs({{.*}}memref<16x16xf32>)
+ linalg.matmul ins(%3, %3: memref<?x?xf32>, memref<?x?xf32>)
+ outs(%3: memref<?x?xf32>)
+ return %3: memref<?x?xf32>
+}
// -----
#accesses = [
- affine_map<(i)->(i)>]
+ affine_map<(i) -> (i)>
+]
#trait = {
- indexing_maps = #accesses,
- iterator_types = ["parallel"]
+ indexing_maps = #accesses,
+ iterator_types = ["parallel"]
}
func.func @dce_zero_memref(%arg0 : memref<0xf32>, %arg1: tensor<0xf32>) -> tensor<0xf32> {
@@ -129,7 +117,7 @@ func.func @linalg_effects(
// -----
-#map = affine_map < (d0, d1, d2)->(d0, d1, d2)>
+#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
func.func @remove_no_op(%arg0 : tensor<?x?x?xf32>, %arg1 : tensor<?x?x?xf32>)
-> (tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
%c0 = arith.constant 0 : index
@@ -156,7 +144,7 @@ func.func @remove_no_op(%arg0 : tensor<?x?x?xf32>, %arg1 : tensor<?x?x?xf32>)
// -----
-#map = affine_map < (d0, d1, d2)->(d0, d1, d2)>
+#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
func.func @remove_no_op_mismatched_types(%arg0 : tensor<?x?x?xf32>)
-> tensor<1x2x3xf32> {
%out = tensor.empty() : tensor<1x2x3xf32>
@@ -172,12 +160,12 @@ func.func @remove_no_op_mismatched_types(%arg0 : tensor<?x?x?xf32>)
}
// CHECK-LABEL: func @remove_no_op_mismatched_types
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?x?xf32>
-// CHECK: %[[CAST:.*]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to
-// tensor<1x2x3xf32> CHECK: return %[[CAST]]
+// CHECK: %[[CAST:.*]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to tensor<1x2x3xf32>
+// CHECK: return %[[CAST]]
// -----
-#map = affine_map < ()->()>
+#map = affine_map<() -> ()>
func.func @cant_fold_to_tensor_cast(%arg0 : f32) -> tensor<f32> {
%out = tensor.empty() : tensor<f32>
%g = linalg.generic {
@@ -195,7 +183,7 @@ func.func @cant_fold_to_tensor_cast(%arg0 : f32) -> tensor<f32> {
// -----
-#map = affine_map < (d0, d1)->(d0, d1)>
+#map = affine_map<(d0, d1) -> (d0, d1)>
func.func @keep_not_noop(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
@@ -220,7 +208,7 @@ func.func @keep_not_noop(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32> {
// -----
-#map = affine_map < (d0, d1)->(d0, d1)>
+#map = affine_map<(d0, d1) -> (d0, d1)>
func.func @keep_not_noop(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>)
-> (tensor<?x?xf32>, tensor<?x?xf32>) {
%c0 = arith.constant 0 : index
@@ -398,7 +386,7 @@ func.func @fill_pack_general() -> tensor<1x1x8x4x4x8xi32>{
// -----
-#map = affine_map < ()[s0]->(s0 ceildiv 16)>
+#map = affine_map<()[s0] -> (s0 ceildiv 16)>
func.func @dynamic_fill_pack(%arg0: tensor<?x?xf32>) -> tensor<?x?x16x16xf32> {
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
@@ -507,15 +495,11 @@ func.func @no_fold_pad_fill_value_mismatch() -> tensor<412x276xf32> {
// -----
-// Tests below verify whether static information is propagated through all the
-// operands of generic op.
-// 1. If one of the inputs of generic op has static info and it has no cast
-// source.
-// 2. If one of the inputs of generic op has static info and it is coming from
-// tensr.cast operation.
-// 3. If one of the outputs of generic op has static info and it is coming from
-// tenso.cast operation.
-#map = affine_map < (d0, d1, d2)->(d0, d1, d2)>
+// Tests below verify whether static information is propagated through all the operands of generic op.
+// 1. If one of the inputs of generic op has static info and it has no cast source.
+// 2. If one of the inputs of generic op has static info and it is coming from tensr.cast operation.
+// 3. If one of the outputs of generic op has static info and it is coming from tenso.cast operation.
+#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
// CHECK-LABEL: func @static_input_without_cast
// CHECK-SAME: (%[[ARG0:.*]]: tensor<2x3x4xf32>, %[[ARG1:.*]]: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
func.func @static_input_without_cast(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
@@ -545,7 +529,7 @@ func.func @static_input_without_cast(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<?x
// -----
-#map = affine_map < (d0, d1, d2)->(d0, d1, d2)>
+#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
// CHECK-LABEL: func @static_input_with_cast
// CHECK-SAME: (%[[ARG0:.*]]: tensor<2x3x4xf32>, %[[ARG1:.*]]: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
func.func @static_input_with_cast(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<?x?x?xf32>) -> tensor<2x3x4xf32> {
@@ -576,7 +560,7 @@ func.func @static_input_with_cast(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<?x?x?
// -----
-#map = affine_map < (d0, d1, d2)->(d0, d1, d2)>
+#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
// CHECK-LABEL: func @static_output_with_cast
// CHECK-SAME: (%[[ARG0:.*]]: tensor<?x?x?xf32>, %[[ARG1:.*]]: tensor<?x?x?xf32>, %[[ARG2:.*]]: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
func.func @static_output_with_cast(%arg0 : tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>, %arg2: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
@@ -608,9 +592,9 @@ func.func @static_output_with_cast(%arg0 : tensor<?x?x?xf32>, %arg1: tensor<?x?x
// -----
-// This test checks the folding of tensor.cast operation when the source value
-// of cast has more static information than the destination value.
-#map = affine_map < (d0, d1, d2)->(d0, d1, d2)>
+// This test checks the folding of tensor.cast operation when the source value of cast
+// has more static information than the destination value.
+#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
// CHECK-LABEL: func @cast_source
// CHECK-SAME: (%[[ARG0:.*]]: tensor<2x3x4xf32>, %[[ARG1:.*]]: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
func.func @cast_source(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<2x3x4xf32>) -> tensor<2x3x4xf32> {
@@ -641,7 +625,7 @@ func.func @cast_source(%arg0 : tensor<2x3x4xf32>, %arg1: tensor<2x3x4xf32>) -> t
// -----
-#map = affine_map < (d0, d1, d2)->(d0, d1, d2)>
+#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
// CHECK-LABEL: func @cast_dest
// CHECK-SAME: (%[[ARG0:.*]]: tensor<?x?x?xf32>, %[[ARG1:.*]]: tensor<1x?x?xf32>,
func.func @cast_dest(%arg0: tensor<?x?x?xf32>, %arg1: tensor<1x?x?xf32>, %arg2: index, %arg3: index, %arg4: index) -> tensor<?x?x?xf32> {
@@ -665,34 +649,6 @@ func.func @cast_dest(%arg0: tensor<?x?x?xf32>, %arg1: tensor<1x?x?xf32>, %arg2:
// -----
-#map = affine_map < (d0, d1)->(d0, d1)>
-#sparse = #sparse_tensor.encoding < \
- {map = (d0, d1)->(d0 : dense, d1 : compressed) }>
-// CHECK-DAG: #[[$SPARSE:.+]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed) }>
-// CHECK-LABEL: func @static_shape_inference_with_encoding(
-// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]
-// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]
-func.func @static_shape_inference_with_encoding(%arg0: tensor<?x?xf32, #sparse>, %arg1: tensor<?x?xf32>) -> tensor<3x4xf32> {
- %0 = tensor.empty() : tensor<3x4xf32>
- %1 = linalg.generic {
- indexing_maps = [#map, #map, #map],
- iterator_types = ["parallel", "parallel"]
- } ins(%arg0, %arg1 : tensor<?x?xf32, #sparse>, tensor<?x?xf32>)
- outs(%0 : tensor<3x4xf32>) {
- ^bb0(%in: f32, %in_0: f32, %out: f32):
- %2 = arith.addf %in, %in_0 : f32
- linalg.yield %2 : f32
- } -> tensor<3x4xf32>
- return %1 : tensor<3x4xf32>
- // CHECK: %[[CAST_ARG0:.*]] = tensor.cast %[[ARG0]] : tensor<?x?xf32, #[[$SPARSE]]> to tensor<3x4xf32, #[[$SPARSE]]>
- // CHECK-NEXT: %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor<?x?xf32> to tensor<3x4xf32>
- // CHECK-NEXT: %[[GENERIC_OP:.*]] = linalg.generic
- // CHECK-SAME: ins(%[[CAST_ARG0]], %[[CAST_ARG1]] : tensor<3x4xf32, #[[$SPARSE]]>, tensor<3x4xf32>)
- // CHECK-SAME: outs({{.*}} : tensor<3x4xf32>)
-}
-
-// -----
-
// CHECK: #[[$MAP:.+]] = affine_map<()[s0] -> (s0 + 1)>
// CHECK-LABEL: func @insert_pad_into_fill
// CHECK-SAME: (%[[INPUT:.+]]: tensor<?x?x?xf32>, %[[LOW0:.+]]: index, %[[LOW1:.+]]: index, %{{.+}}: index, %{{.+}}: index)
@@ -856,25 +812,23 @@ func.func @linalgop_with_cond_cast_consumer(%arg0 : tensor<?x?xf32>, %arg1 : ten
%0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
scf.if %arg3 {
- % 1 = tensor.cast % 0 : tensor < ? x ? xf32 > to tensor<4x8xf32> func.call
- @some_use(% 1)
- : (tensor<4x8xf32>)->()
+ %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<4x8xf32>
+ func.call @some_use(%1) : (tensor<4x8xf32>) -> ()
}
- return % 0 : tensor < ? x ? xf32 >
+ return %0 : tensor<?x?xf32>
}
// Check conditionally reachable cast is not folded into producer.
// CHECK-LABEL: func @linalgop_with_cond_cast_consumer
-// CHECK-SAME: (%[[ARG0:.*]]: tensor<?x?xf32>, %[[ARG1:.*]]:
-// tensor<?x?xf32>, %[[ARG2:.*]]: tensor<?x?xf32>, %[[ARG3:.*]]: i1)
-// CHECK: %[[RES:.*]] = linalg.matmul ins(%[[ARG0]], %[[ARG1]] :
-// tensor<?x?xf32>, tensor<?x?xf32>)
+// CHECK-SAME: (%[[ARG0:.*]]: tensor<?x?xf32>, %[[ARG1:.*]]: tensor<?x?xf32>, %[[ARG2:.*]]: tensor<?x?xf32>, %[[ARG3:.*]]: i1)
+// CHECK: %[[RES:.*]] = linalg.matmul ins(%[[ARG0]], %[[ARG1]] : tensor<?x?xf32>, tensor<?x?xf32>)
// CHECK-SAME: outs(%[[ARG2]] : tensor<?x?xf32>) -> tensor<?x?xf32>
// CHECK: scf.if %[[ARG3]] {
-// CHECK: %[[CAST:.*]] = tensor.cast %[[RES]] : tensor<?x?xf32> to
-// tensor<4x8xf32> CHECK: func.call @some_use(%[[CAST]]) :
-// (tensor<4x8xf32>) -> () CHECK: } CHECK: return %[[RES]] :
-// tensor<?x?xf32>
+// CHECK: %[[CAST:.*]] = tensor.cast %[[RES]] : tensor<?x?xf32> to tensor<4x8xf32>
+// CHECK: func.call @some_use(%[[CAST]]) : (tensor<4x8xf32>) -> ()
+// CHECK: }
+// CHECK: return %[[RES]] : tensor<?x?xf32>
+
// -----
@@ -923,19 +877,17 @@ func.func @fold_multi_use_generic_op_with_consumer(%arg0 : tensor<?x?x?xf32>) ->
// CHECK: func @fold_multi_use_generic_op_with_consumer
// CHECK-SAME: %[[ARG0:.+]]: tensor<?x?x?xf32>
// CHECK-DAG: %[[INIT1:.+]] = tensor.empty() : tensor<2x3x4xf32>
-// CHECK-DAG: %[[CAST:.+]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to
-// tensor<4x3x2xf32> CHECK-DAG: %[[INIT2:.+]] = tensor.empty() :
-// tensor<3x2x4xf32>
+// CHECK-DAG: %[[CAST:.+]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to tensor<4x3x2xf32>
+// CHECK-DAG: %[[INIT2:.+]] = tensor.empty() : tensor<3x2x4xf32>
// CHECK: %[[GENERIC:.+]]:2 = linalg.generic
// CHECK-SAME: ins(%[[CAST]] :
// CHECK-SAME: outs(%[[INIT2]], %[[INIT1]] :
-// CHECK: %[[RETURN_CAST:.+]] = tensor.cast %[[GENERIC]]#0 :
-// tensor<3x2x4xf32> to tensor<?x?x?xf32> CHECK: return
-// %[[RETURN_CAST]], %[[GENERIC]]#1
+// CHECK: %[[RETURN_CAST:.+]] = tensor.cast %[[GENERIC]]#0 : tensor<3x2x4xf32> to tensor<?x?x?xf32>
+// CHECK: return %[[RETURN_CAST]], %[[GENERIC]]#1
// -----
-#map = affine_map < (d0)->(d0)>
+#map = affine_map<(d0) -> (d0)>
func.func @identity_buffer(%arg0 : memref<?xf32>, %arg1: memref<?xf32>) {
linalg.generic {
indexing_maps = [#map, #map],
@@ -959,7 +911,7 @@ func.func @identity_buffer(%arg0 : memref<?xf32>, %arg1: memref<?xf32>) {
// -----
-#map = affine_map < (d0, d1)->(d1, d0)>
+#map = affine_map<(d0, d1) -> (d1, d0)>
func.func @erase_non_identity_noop(%arg0 : tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.generic {
indexing_maps = [#map, #map],
@@ -1807,81 +1759,4 @@ func.func @fold_cast_unpack_dynamic_tile_size(
inner_tiles = [%c8, 1]
into %res {test_attr} : tensor<1x1x?x1xi32> -> tensor<7x?xi32>
return %unpack : tensor<7x?xi32>
-}
-
-// -----
-
-//===----------------------------------------------------------------------===//
-// linalg.unpack + tensor.extract_slice
-//===----------------------------------------------------------------------===//
-
-func.func @fold_extract_slice_into_unpack(
- %src : tensor<28x2x?x16x16xf32>, %dest : tensor<28x32x?xf32>, %size : index
-) -> tensor<28x28x?xf32> {
- %unpack = linalg.unpack %src
- outer_dims_perm = [0, 1, 2]
- inner_dims_pos = [1, 2]
- inner_tiles = [16, 16]
- into %dest : tensor<28x2x?x16x16xf32> -> tensor<28x32x?xf32>
- %extracted_slice = tensor.extract_slice %unpack
- [0, 0, 0] [28, 28, %size] [1, 1, 1] : tensor<28x32x?xf32> to tensor<28x28x?xf32>
- return %extracted_slice : tensor<28x28x?xf32>
-}
-
-// CHECK-LABEL: func @fold_extract_slice_into_unpack
-// CHECK-SAME: %[[SRC:.+]]: tensor<28x2x?x16x16xf32>
-// CHECK-SAME: %[[DEST:.+]]: tensor<28x32x?xf32>
-// CHECK-SAME: %[[SIZE:.+]]: index
-// CHECK: %[[DEST_SLICE:.+]] = tensor.extract_slice %[[DEST]]
-// CHECK-SAME: [0, 0, 0] [28, 28, %[[SIZE]]] [1, 1, 1]
-// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[SRC]]
-// CHECK-SAME: into %[[DEST_SLICE]]
-// CHECK: return %[[UNPACK]]
-
-// -----
-
-func.func @no_fold_extract_slice_into_unpack_rank_reducing(
- %src : tensor<28x2x16xf32>, %dest : tensor<28x32xf32>
-) -> tensor<28xf32> {
- %unpack = linalg.unpack %src
- outer_dims_perm = [0, 1]
- inner_dims_pos = [1]
- inner_tiles = [16]
- into %dest : tensor<28x2x16xf32> -> tensor<28x32xf32>
- %extracted_slice = tensor.extract_slice %unpack
- [0, 0] [1, 28] [1, 1] : tensor<28x32xf32> to tensor<28xf32>
- return %extracted_slice : tensor<28xf32>
-}
-
-// CHECK-LABEL: func @no_fold_extract_slice_into_unpack_rank_reducing
-// CHECK-SAME: %[[SRC:.+]]: tensor<28x2x16xf32>
-// CHECK-SAME: %[[DEST:.+]]: tensor<28x32xf32>
-// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[SRC]]
-// CHECK-SAME: into %[[DEST]]
-// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[UNPACK]]
-// CHECK: return %[[SLICE]]
-
-// -----
-
-func.func @no_fold_extract_slice_into_unpack_non_zero_offset(
- %src : tensor<28x2x16xf32>, %dest : tensor<28x32xf32>
-) -> tensor<28x28xf32> {
- % unpack =
- linalg.unpack % src outer_dims_perm =
- [ 0, 1 ] inner_dims_pos = [1] inner_tiles =
- [16] into % dest : tensor<28x2x16xf32>->tensor<28x32xf32> %
- extracted_slice =
- tensor.extract_slice %
- unpack[0, 1][28, 28][1, 1] : tensor<28x32xf32> to
- tensor<28x28xf32> return %
- extracted_slice
- : tensor<28x28xf32>
-}
-
-// CHECK-LABEL: func @no_fold_extract_slice_into_unpack_non_zero_offset
-// CHECK-SAME: %[[SRC:.+]]: tensor<28x2x16xf32>
-// CHECK-SAME: %[[DEST:.+]]: tensor<28x32xf32>
-// CHECK: %[[UNPACK:.+]] = linalg.unpack %[[SRC]]
-// CHECK-SAME: into %[[DEST]]
-// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[UNPACK]]
-// CHECK: return %[[SLICE]]
+}
\ No newline at end of file
More information about the Mlir-commits
mailing list