[Mlir-commits] [mlir] 9d25d5c - [mlir][linalg] Generic to category specialization (#184624)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Fri Mar 6 06:35:05 PST 2026
Author: Adam Siemieniuk
Date: 2026-03-06T15:35:00+01:00
New Revision: 9d25d5c4a7f7840ef7ea75633757469148f12ebf
URL: https://github.com/llvm/llvm-project/commit/9d25d5c4a7f7840ef7ea75633757469148f12ebf
DIFF: https://github.com/llvm/llvm-project/commit/9d25d5c4a7f7840ef7ea75633757469148f12ebf.diff
LOG: [mlir][linalg] Generic to category specialization (#184624)
Adds initial support for generic to category linalg morphism.
Only conversion to contraction op is supported for now.
Added:
mlir/test/Dialect/Linalg/roundtrip-morphism-linalg-category-ops.mlir
mlir/test/Dialect/Linalg/roundtrip-morphism-linalg-named-ops.mlir
Modified:
mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
mlir/include/mlir/Dialect/Linalg/Passes.td
mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
mlir/lib/Dialect/Linalg/Transforms/MorphOps.cpp
mlir/lib/Dialect/Linalg/Transforms/Specialize.cpp
mlir/test/Dialect/Linalg/specialize-generic-ops.mlir
Removed:
mlir/test/Dialect/Linalg/roundtrip-linalg-named-ops.mlir
################################################################################
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
index 4948bfffad5e0..5998f736ced34 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
@@ -889,6 +889,13 @@ def ContractOp : LinalgStructuredBase_Op<"contract", [
let skipDefaultBuilders = 1;
let builders = [
+ OpBuilder<
+ (ins "ValueRange":$inputs, "ValueRange":$outputs,
+ CArg<"ArrayRef<NamedAttribute>", "{}">:$attributes),
+ [{
+ buildStructuredOp($_builder, $_state, std::nullopt, inputs, outputs,
+ attributes, regionBuilder);
+ }]>,
OpBuilder<(ins "TypeRange":$resultTensorTypes, "ValueRange":$inputs,
"ValueRange":$outputs, "ArrayAttr":$indexingMaps,
CArg<"ArrayRef<NamedAttribute>", "{}">:$attributes),
diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td
index f48ea9849e237..26638b2a644c4 100644
--- a/mlir/include/mlir/Dialect/Linalg/Passes.td
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.td
@@ -70,8 +70,10 @@ def LinalgMorphOpsPass : Pass<"linalg-morph-ops"> {
// Specialization path is not guaranteed.
Option<"genericToNamed", "generic-to-named", "bool", /*default=*/"false",
- "convert linalg.generic to equivalent named ops"> ];
- // TODOs: `generic-to-category`, `category-to-named`
+ "convert linalg.generic to equivalent named ops">,
+ Option<"genericToCategory", "generic-to-category", "bool", /*default=*/"false",
+ "convert linalg.generic to equivalent category ops"> ];
+ // TODOs: `category-to-named`
}
def LinalgGeneralizeNamedOpsPass : Pass<"linalg-generalize-named-ops">,
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index fb9cede670801..dcb7f1f212207 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -923,10 +923,15 @@ FailureOr<GenericOp> interchangeGenericOp(RewriterBase &rewriter,
FailureOr<GenericOp> generalizeNamedOp(RewriterBase &rewriter,
LinalgOp linalgOp);
-/// Create a namedOp from the given GenericOp and replace the GenericOp.
-/// Currently we can specialize only trivial linalg copy operations.
-FailureOr<LinalgOp> specializeGenericOp(RewriterBase &rewriter,
- GenericOp genericOp);
+struct GenericOpSpecializationOptions {
+ // Specialize generics to category ops (default: named ops).
+ bool emitCategoryOps = false;
+};
+
+/// Replace the given GenericOp with a namedOp or categoryOp.
+FailureOr<LinalgOp>
+specializeGenericOp(RewriterBase &rewriter, GenericOp genericOp,
+ const GenericOpSpecializationOptions &options = {});
/// Create a new buffer using the `allocationFn` provided. The size of this
/// buffer is either the original subview size when 'useOriginalSubviewSize' is
@@ -1718,17 +1723,24 @@ struct LinalgGeneralizationPattern
};
struct LinalgSpecializationPattern : public OpRewritePattern<GenericOp> {
- using OpRewritePattern<GenericOp>::OpRewritePattern;
+
+ LinalgSpecializationPattern(
+ MLIRContext *context, const GenericOpSpecializationOptions &options = {},
+ PatternBenefit benefit = 1)
+ : OpRewritePattern<GenericOp>(context, benefit), options(options) {}
FailureOr<GenericOp>
returningMatchAndRewrite(GenericOp op, PatternRewriter &rewriter) const {
- return specializeGenericOp(rewriter, op);
+ return specializeGenericOp(rewriter, op, options);
}
LogicalResult matchAndRewrite(GenericOp op,
PatternRewriter &rewriter) const override {
return returningMatchAndRewrite(op, rewriter);
}
+
+private:
+ GenericOpSpecializationOptions options;
};
/// Vectorization pattern for memref::CopyOp.
@@ -1938,13 +1950,14 @@ void populateLinalgTilingCanonicalizationPatterns(RewritePatternSet &patterns);
void populateLinalgNamedOpsGeneralizationPatterns(RewritePatternSet &patterns);
/// Populates `patterns` with patterns to convert linalg.generic ops to named
-/// ops where possible. A linalg.generic can represent wide range and complex
-/// computations for which equivalent linalg named op may not exist e.g.
-/// linalg.generic that takes a tensor and computes a polynomial such as:
+/// or category ops where possible. A linalg.generic can represent wide range
+/// and complex computations for which equivalent linalg named op may not exist
+/// e.g. linalg.generic that takes a tensor and computes a polynomial such as:
/// p(x) = an*x^n + ... + a1x + a0
/// There is no equivalent named op to convert to. Many such cases exist.
void populateLinalgGenericOpsSpecializationPatterns(
- RewritePatternSet &patterns);
+ RewritePatternSet &patterns,
+ const GenericOpSpecializationOptions &options = {});
/// Populates `patterns` that convert linalg named ops e.g. `linalg.add`
/// to equivalent `linalg.elementwise`.
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index bfc03cc7436df..37b549a7fcd7f 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -193,14 +193,17 @@ static void buildMatmulOp(OpBuilder &b, OperationState &state,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes,
RegionBuilderFn regionBuilder,
- ArrayRef<AffineMap> indexingMaps) {
- // Initialize indexingMaps attribute, for MatmulOp.
- SmallVector<Attribute, 3> indexingMapsAttrVal;
- indexingMapsAttrVal =
- llvm::map_to_vector(indexingMaps, [](AffineMap map) -> Attribute {
- return AffineMapAttr::get(map);
- });
- state.addAttribute("indexing_maps", b.getArrayAttr(indexingMapsAttrVal));
+ ArrayRef<AffineMap> defaultIndexingMaps) {
+ // If indexing maps are not provided, apply the default ones.
+ if (none_of(attributes, [](NamedAttribute attr) {
+ return attr.getName() == "indexing_maps";
+ })) {
+ SmallVector<Attribute, 3> indexingMapsAttrVal;
+ indexingMapsAttrVal = llvm::map_to_vector(
+ defaultIndexingMaps,
+ [](AffineMap map) -> Attribute { return AffineMapAttr::get(map); });
+ state.addAttribute("indexing_maps", b.getArrayAttr(indexingMapsAttrVal));
+ }
return buildStructuredOp(b, state, resultTensorTypes, inputs, outputs,
attributes, regionBuilder);
}
@@ -210,14 +213,17 @@ static void buildBatchMatmulOp(OpBuilder &b, OperationState &state,
ValueRange inputs, ValueRange outputs,
ArrayRef<NamedAttribute> attributes,
RegionBuilderFn regionBuilder,
- ArrayRef<AffineMap> indexingMaps) {
- // Initialize indexingMaps attribute, for BatchMatmulOp.
- SmallVector<Attribute, 4> indexingMapsAttrVal;
- indexingMapsAttrVal =
- llvm::map_to_vector(indexingMaps, [](AffineMap map) -> Attribute {
- return AffineMapAttr::get(map);
- });
- state.addAttribute("indexing_maps", b.getArrayAttr(indexingMapsAttrVal));
+ ArrayRef<AffineMap> defaultIndexingMaps) {
+ // If indexing maps are not provided, apply the default ones.
+ if (none_of(attributes, [](NamedAttribute attr) {
+ return attr.getName() == "indexing_maps";
+ })) {
+ SmallVector<Attribute, 4> indexingMapsAttrVal;
+ indexingMapsAttrVal = llvm::map_to_vector(
+ defaultIndexingMaps,
+ [](AffineMap map) -> Attribute { return AffineMapAttr::get(map); });
+ state.addAttribute("indexing_maps", b.getArrayAttr(indexingMapsAttrVal));
+ }
return buildStructuredOp(b, state, resultTensorTypes, inputs, outputs,
attributes, regionBuilder);
}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/MorphOps.cpp b/mlir/lib/Dialect/Linalg/Transforms/MorphOps.cpp
index f261ccb1415fe..fee293647deda 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/MorphOps.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/MorphOps.cpp
@@ -44,16 +44,16 @@ void LinalgMorphOpsPass::runOnOperation() {
RewritePatternSet patterns(&getContext());
// Lowering paths (named -> category -> generic)
- if (namedToCategory) {
+ if (namedToCategory)
populateLinalgNamedToElementwisePatterns(patterns);
- }
- if (namedToGeneric || categoryToGeneric) {
+ if (namedToGeneric || categoryToGeneric)
populateLinalgNamedOpsGeneralizationPatterns(patterns);
- }
// Lifting paths (named <- category <- generic)
- if (genericToNamed) {
- populateLinalgGenericOpsSpecializationPatterns(patterns);
+ if (genericToNamed || genericToCategory) {
+ GenericOpSpecializationOptions opts;
+ opts.emitCategoryOps = genericToCategory;
+ populateLinalgGenericOpsSpecializationPatterns(patterns, opts);
}
if (failed(applyPatternsGreedily(getOperation(), std::move(patterns))))
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Specialize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Specialize.cpp
index b4de2bb1e1169..ee9fc77961bab 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Specialize.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Specialize.cpp
@@ -141,18 +141,28 @@ template <typename NamedOpTy>
static LinalgOp replaceWithMatmulVariant(RewriterBase &rewriter, GenericOp op,
std::optional<TypeFn> castTy,
ArrayRef<AffineMap> indexingMaps) {
- SmallVector<NamedAttribute> castAttrVec;
+ SmallVector<NamedAttribute> attributes;
// Only explicitly specify the cast attribute for unsigned cast; signed is
// the default for linalg.matmul/linalg.batch_matmul.
- if (castTy.has_value() && *castTy == TypeFn::cast_unsigned)
- castAttrVec = {rewriter.getNamedAttr(
- "cast", TypeFnAttr::get(rewriter.getContext(), *castTy))};
+ if (castTy.has_value() && *castTy == TypeFn::cast_unsigned) {
+ auto castAttr = rewriter.getNamedAttr(
+ "cast", TypeFnAttr::get(rewriter.getContext(), *castTy));
+ attributes.push_back(castAttr);
+ }
- auto namedOp = rewriter.replaceOpWithNewOp<NamedOpTy>(
+ // Set the original generic's maps to preserve operand indexing semantics like
+ // transposition.
+ SmallVector<Attribute, 3> indexingMapsAttrVal =
+ llvm::map_to_vector(indexingMaps, [](AffineMap map) -> Attribute {
+ return AffineMapAttr::get(map);
+ });
+ auto indexingMapsAttr = rewriter.getNamedAttr(
+ "indexing_maps", rewriter.getArrayAttr(indexingMapsAttrVal));
+ attributes.push_back(indexingMapsAttr);
+
+ LinalgOp namedOp = rewriter.replaceOpWithNewOp<NamedOpTy>(
op, ValueRange{op.getDpsInputs()[0], op.getDpsInputs()[1]},
- ValueRange{op.getDpsInits()[0]}, castAttrVec);
-
- namedOp.setIndexingMapsAttr(rewriter.getAffineMapArrayAttr(indexingMaps));
+ ValueRange{op.getDpsInits()[0]}, attributes);
return namedOp;
}
@@ -208,7 +218,8 @@ static std::optional<TypeFn> getCastTypeForMatmulLikeOp(GenericOp genericOp) {
// Converts linalg.generic to named linalg.*matmul* where possible.
static FailureOr<LinalgOp> specializeLinalgContractions(RewriterBase &rewriter,
- GenericOp genericOp) {
+ GenericOp genericOp,
+ bool emitCategoryOp) {
if (genericOp.getNumDpsInputs() != 2 || genericOp.getNumDpsInits() != 1)
return failure();
@@ -218,6 +229,31 @@ static FailureOr<LinalgOp> specializeLinalgContractions(RewriterBase &rewriter,
[](AffineMap m) { return !m.isProjectedPermutation(); }))
return failure();
+ // Only mul+add contraction is supported.
+ // Currently, there is no way to control the contraction body type in named
+ // and category ops which all default to mul+add only.
+ if (!mlir::linalg::detail::isContractionBody(
+ *genericOp.getBlock(), [](Operation *first, Operation *second) {
+ return (isa<arith::MulFOp>(first) && isa<arith::AddFOp>(second)) ||
+ (isa<arith::MulIOp>(first) && isa<arith::AddIOp>(second)) ||
+ (isa<complex::MulOp>(first) && isa<complex::AddOp>(second));
+ }))
+ return failure();
+
+ // Determine the cast type for the named matmul op, or bail out if casts
+ // cannot be represented by the named op.
+ std::optional<TypeFn> castTy = getCastTypeForMatmulLikeOp(genericOp);
+ if (!castTy)
+ return rewriter.notifyMatchFailure(
+ genericOp, "contains invalid cast ops for the named matmul op");
+
+ // In case of category op, wider range of variants is supported.
+ if (emitCategoryOp)
+ return replaceWithMatmulVariant<ContractOp>(
+ rewriter, genericOp, castTy, genericOp.getIndexingMapsArray());
+
+ // Further checks for named variants.
+ //
// Linalg generic contraction can be across multiple axis e.g.
// ```
// linalg.generic
@@ -244,14 +280,6 @@ static FailureOr<LinalgOp> specializeLinalgContractions(RewriterBase &rewriter,
if (dims.m.size() != 1 || dims.n.size() != 1 || dims.k.size() != 1)
return failure();
- if (!mlir::linalg::detail::isContractionBody(
- *genericOp.getBlock(), [](Operation *first, Operation *second) {
- return (isa<arith::MulFOp>(first) && isa<arith::AddFOp>(second)) ||
- (isa<arith::MulIOp>(first) && isa<arith::AddIOp>(second)) ||
- (isa<complex::MulOp>(first) && isa<complex::AddOp>(second));
- }))
- return failure();
-
// Check rank of operands
auto indexingMaps = genericOp.getIndexingMapsArray();
if (llvm::any_of(indexingMaps, [&dims](AffineMap m) {
@@ -290,13 +318,6 @@ static FailureOr<LinalgOp> specializeLinalgContractions(RewriterBase &rewriter,
if (llvm::is_contained({a, b, c}, IndexMatchResult::Mismatch))
return failure();
- // Determine the cast type for the named matmul op, or bail out if casts
- // cannot be represented by the named op.
- std::optional<TypeFn> castTy = getCastTypeForMatmulLikeOp(genericOp);
- if (!castTy)
- return rewriter.notifyMatchFailure(
- genericOp, "contains invalid cast ops for the named matmul op");
-
// Build indexing maps for the named op in its canonical dimension ordering
auto *ctx = genericOp.getContext();
unsigned numLoopDims = numOfBatchDims + 3;
@@ -431,8 +452,22 @@ static FailureOr<LinalgOp> specializeLinalgConvolutions(RewriterBase &rewriter,
//===----------------------------------------------------------------------===//
// Categorize linalg generic to named op where possible.
//===----------------------------------------------------------------------===//
-FailureOr<LinalgOp> mlir::linalg::specializeGenericOp(RewriterBase &rewriter,
- GenericOp genericOp) {
+FailureOr<LinalgOp> mlir::linalg::specializeGenericOp(
+ RewriterBase &rewriter, GenericOp genericOp,
+ const GenericOpSpecializationOptions &options) {
+ // Contraction - e.g. matmul
+ if (isaContractionOpInterface(genericOp)) {
+ return specializeLinalgContractions(rewriter, genericOp,
+ options.emitCategoryOps);
+ }
+
+ // Early exit in case of category specialization.
+ // TODO: Remove when matches for other ops account for both named and
+ // category.
+ if (options.emitCategoryOps)
+ return rewriter.notifyMatchFailure(
+ genericOp, "no matching category op specialization");
+
// Copy
if (isaCopyOpInterface(genericOp)) {
LinalgOp namedOp = rewriter.replaceOpWithNewOp<CopyOp>(
@@ -501,16 +536,12 @@ FailureOr<LinalgOp> mlir::linalg::specializeGenericOp(RewriterBase &rewriter,
}
}
- // Contraction - e.g. matmul
- if (isaContractionOpInterface(genericOp)) {
- return specializeLinalgContractions(rewriter, genericOp);
- }
-
// Convolution - e.g. *conv/pooling*
- if (isaConvolutionOpInterface(genericOp)) {
+ if (isaConvolutionOpInterface(genericOp))
return specializeLinalgConvolutions(rewriter, genericOp);
- }
- return failure();
+
+ return rewriter.notifyMatchFailure(genericOp,
+ "no matching named op specialization");
}
namespace {
@@ -534,6 +565,7 @@ void LinalgSpecializeGenericOpsPass::runOnOperation() {
}
void mlir::linalg::populateLinalgGenericOpsSpecializationPatterns(
- RewritePatternSet &patterns) {
- patterns.add<LinalgSpecializationPattern>(patterns.getContext());
+ RewritePatternSet &patterns,
+ const GenericOpSpecializationOptions &options) {
+ patterns.add<LinalgSpecializationPattern>(patterns.getContext(), options);
}
diff --git a/mlir/test/Dialect/Linalg/roundtrip-linalg-named-ops.mlir b/mlir/test/Dialect/Linalg/roundtrip-linalg-named-ops.mlir
deleted file mode 100644
index f15ae646e5765..0000000000000
--- a/mlir/test/Dialect/Linalg/roundtrip-linalg-named-ops.mlir
+++ /dev/null
@@ -1,72 +0,0 @@
-// The following test examples of linalg named ops lowered to linalg.generic and then
-// lifted back up to named op.
-// RUN: mlir-opt %s -linalg-generalize-named-ops | mlir-opt --linalg-specialize-generic-ops | FileCheck %s
-
-func.func @unary_exp(%A: memref<7x14x21xf32>, %Out: memref<7x14x21xf32>) {
- linalg.exp ins(%A : memref<7x14x21xf32>) outs(%Out : memref<7x14x21xf32>)
- return
-}
-
-// CHECK-LABEL: unary_exp
-// CHECK-SAME: %[[A:.+]]: memref<7x14x21xf32>, %[[Out:.+]]: memref<7x14x21xf32>)
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.exp ins(%[[A]] : memref<7x14x21xf32>) outs(%[[Out]] : memref<7x14x21xf32>)
-
-// -----
-
-func.func @binary_add(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
- %0 = linalg.add ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
-}
-
-// CHECK-LABEL: binary_add
-// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[Out:.+]]: tensor<?x?xf32>) -> tensor<?x?xf32>
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.add ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[Out]] : tensor<?x?xf32>) -> tensor<?x?xf32>
-
-// -----
-
-
-///----------------------------------------------------------------------------------------
-/// Tests for linalg.matmul
-///----------------------------------------------------------------------------------------
-
-func.func @matmul(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
- %0 = linalg.matmul ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
-}
-
-// CHECK-LABEL: @matmul
-// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[Out:.+]]: tensor<?x?xf32>) -> tensor<?x?xf32>
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.matmul ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[Out]] : tensor<?x?xf32>) -> tensor<?x?xf32>
-
-// -----
-
-// Check matmul with unsigned cast is correctly raised back to named op.
-func.func @matmul_unsigned_cast(%A: tensor<16x8xi16>, %B: tensor<8x32xi64>,
- %Out: tensor<16x32xi32>) -> tensor<16x32xi32> {
- %0 = linalg.matmul {cast = #linalg.type_fn<cast_unsigned>}
- ins(%A, %B : tensor<16x8xi16>, tensor<8x32xi64>)
- outs(%Out : tensor<16x32xi32>) -> tensor<16x32xi32>
- return %0 : tensor<16x32xi32>
-}
-
-// CHECK-LABEL: @matmul_unsigned_cast
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.matmul {cast = #linalg.type_fn<cast_unsigned>}
-
-// -----
-
-func.func @mixed_named_ops(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>,
- %C: tensor<?x?xf32>, %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
- %AB = linalg.matmul ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) -> tensor<?x?xf32>
- %1 = linalg.add ins(%AB, %C : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) -> tensor<?x?xf32>
- return %1 : tensor<?x?xf32>
-}
-
-// CHECK-LABEL: @mixed_named_ops
-// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[C:.+]]: tensor<?x?xf32>, %[[Out:.+]]: tensor<?x?xf32>) -> tensor<?x?xf32>
-// CHECK-NOT: linalg.generic
-// CHECK: %[[AB:.+]] = linalg.matmul ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[Out]] : tensor<?x?xf32>) -> tensor<?x?xf32>
-// CHECK: linalg.add ins(%[[AB]], %[[C]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[Out]] : tensor<?x?xf32>) -> tensor<?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/roundtrip-morphism-linalg-category-ops.mlir b/mlir/test/Dialect/Linalg/roundtrip-morphism-linalg-category-ops.mlir
new file mode 100644
index 0000000000000..d5e49a866eaec
--- /dev/null
+++ b/mlir/test/Dialect/Linalg/roundtrip-morphism-linalg-category-ops.mlir
@@ -0,0 +1,107 @@
+// The following test examples of linalg category ops lowered to linalg.generic
+// and then lifted back up to category op.
+
+// RUN: mlir-opt %s -split-input-file -linalg-morph-ops=category-to-generic \
+// RUN: | mlir-opt -split-input-file -linalg-morph-ops=generic-to-category \
+// RUN: | FileCheck %s
+
+#map = affine_map<(d0, d1, d2) -> (d0, d2)>
+#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
+#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
+func.func @contract_matmul(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>,
+ %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %0 = linalg.contract indexing_maps = [#map, #map1, #map2]
+ ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
+}
+
+// CHECK-DAG: #[[$MAP_A:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
+// CHECK-DAG: #[[$MAP_B:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
+// CHECK-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
+
+// CHECK-LABEL: contract_matmul
+// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>,
+// CHECK-SAME: %[[OUT:.+]]: tensor<?x?xf32>) -> tensor<?x?xf32>
+// CHECK-NOT: linalg.generic
+// CHECK: linalg.contract
+// CHECK-SAME: indexing_maps = {{\[}}#[[$MAP_A]], #[[$MAP_B]], #[[$MAP_C]]{{\]}}
+// CHECK-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// CHECK-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+
+
+func.func @contract_matmul_memref(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>,
+ %arg2: memref<?x?xf32>) {
+ linalg.contract indexing_maps = [#map, #map1, #map2]
+ ins(%arg0, %arg1 : memref<?x?xf32>, memref<?x?xf32>)
+ outs(%arg2 : memref<?x?xf32>)
+ return
+}
+
+// CHECK-LABEL: contract_matmul_memref
+// CHECK-SAME: %[[A:.+]]: memref<?x?xf32>, %[[B:.+]]: memref<?x?xf32>,
+// CHECK-SAME: %[[OUT:.+]]: memref<?x?xf32>)
+// CHECK-NOT: linalg.generic
+// CHECK: linalg.contract
+// CHECK-SAME: indexing_maps = {{\[}}#[[$MAP_A]], #[[$MAP_B]], #[[$MAP_C]]{{\]}}
+// CHECK-SAME: ins(%[[A]], %[[B]] : memref<?x?xf32>, memref<?x?xf32>)
+// CHECK-SAME: outs(%[[OUT]] : memref<?x?xf32>)
+
+func.func @contract_matmul_bitcast_int_to_float(%arg0: tensor<16x8xi32>,
+ %arg1: tensor<8x32xi32>, %arg2: tensor<16x32xf32>) -> tensor<16x32xf32> {
+ %0 = linalg.contract indexing_maps = [#map, #map1, #map2]
+ ins(%arg0, %arg1 : tensor<16x8xi32>, tensor<8x32xi32>)
+ outs(%arg2 : tensor<16x32xf32>) -> tensor<16x32xf32>
+ return %0 : tensor<16x32xf32>
+}
+
+// CHECK-LABEL: contract_matmul_bitcast_int_to_float
+// CHECK-SAME: %[[A:.+]]: tensor<16x8xi32>, %[[B:.+]]: tensor<8x32xi32>,
+// CHECK-SAME: %[[OUT:.+]]: tensor<16x32xf32>) -> tensor<16x32xf32>
+// CHECK-NOT: linalg.generic
+// CHECK: linalg.contract
+// CHECK-SAME: indexing_maps = {{\[}}#[[$MAP_A]], #[[$MAP_B]], #[[$MAP_C]]{{\]}}
+// CHECK-NOT: cast =
+// CHECK-SAME: ins(%[[A]], %[[B]] : tensor<16x8xi32>, tensor<8x32xi32>)
+// CHECK-SAME: outs(%[[OUT]] : tensor<16x32xf32>) -> tensor<16x32xf32>
+
+func.func @contract_matmul_unsigned_cast_float(%arg0: tensor<16x8xi16>,
+ %arg1: tensor<8x32xi16>, %arg2: tensor<16x32xf32>) -> tensor<16x32xf32> {
+ %0 = linalg.contract indexing_maps = [#map, #map1, #map2]
+ {cast = #linalg.type_fn<cast_unsigned>}
+ ins(%arg0, %arg1 : tensor<16x8xi16>, tensor<8x32xi16>)
+ outs(%arg2 : tensor<16x32xf32>) -> tensor<16x32xf32>
+ return %0 : tensor<16x32xf32>
+}
+
+// CHECK-LABEL: contract_matmul_unsigned_cast_float
+// CHECK-SAME: %[[A:.+]]: tensor<16x8xi16>, %[[B:.+]]: tensor<8x32xi16>,
+// CHECK-SAME: %[[OUT:.+]]: tensor<16x32xf32>) -> tensor<16x32xf32>
+// CHECK-NOT: linalg.generic
+// CHECK: linalg.contract
+// CHECK-SAME: indexing_maps = {{\[}}#[[$MAP_A]], #[[$MAP_B]], #[[$MAP_C]]{{\]}}
+// CHECK-SAME: cast = #linalg.type_fn<cast_unsigned>
+// CHECK-SAME: ins(%[[A]], %[[B]] : tensor<16x8xi16>, tensor<8x32xi16>)
+// CHECK-SAME: outs(%[[OUT]] : tensor<16x32xf32>) -> tensor<16x32xf32>
+
+// -----
+
+#map = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
+#map1 = affine_map<(d0, d1, d2, d3) -> (d3, d2, d1)>
+#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1)>
+func.func @contract_multi_reduction(%arg0: tensor<10x20x30xf32>,
+ %arg1: tensor<30x20x40xf32>, %arg2: tensor<10x40xf32>) -> tensor<10x40xf32> {
+ %0 = linalg.contract indexing_maps = [#map, #map1, #map2]
+ ins(%arg0, %arg1 : tensor<10x20x30xf32>, tensor<30x20x40xf32>)
+ outs(%arg2 : tensor<10x40xf32>) -> tensor<10x40xf32>
+ return %0 : tensor<10x40xf32>
+}
+
+// CHECK-DAG: #[[$MAP_A:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
+// CHECK-DAG: #[[$MAP_B:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d2, d1)>
+// CHECK-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1)>
+
+// CHECK-LABEL: contract_multi_reduction
+// CHECK-NOT: linalg.generic
+// CHECK: linalg.contract
+// CHECK-SAME: indexing_maps = {{\[}}#[[$MAP_A]], #[[$MAP_B]], #[[$MAP_C]]{{\]}}
diff --git a/mlir/test/Dialect/Linalg/roundtrip-morphism-linalg-named-ops.mlir b/mlir/test/Dialect/Linalg/roundtrip-morphism-linalg-named-ops.mlir
new file mode 100644
index 0000000000000..19b30ef10da84
--- /dev/null
+++ b/mlir/test/Dialect/Linalg/roundtrip-morphism-linalg-named-ops.mlir
@@ -0,0 +1,99 @@
+// The following test examples of linalg named ops lowered to linalg.generic
+// and then lifted back up to named op.
+
+// RUN: mlir-opt %s -split-input-file -linalg-morph-ops=named-to-generic \
+// RUN: | mlir-opt -split-input-file -linalg-morph-ops=generic-to-named \
+// RUN: | FileCheck %s
+
+func.func @unary_exp(%A: memref<7x14x21xf32>, %Out: memref<7x14x21xf32>) {
+ linalg.exp ins(%A : memref<7x14x21xf32>) outs(%Out : memref<7x14x21xf32>)
+ return
+}
+
+// CHECK-LABEL: unary_exp
+// CHECK-SAME: %[[A:.+]]: memref<7x14x21xf32>, %[[OUT:.+]]: memref<7x14x21xf32>)
+// CHECK-NOT: linalg.generic
+// CHECK: linalg.exp
+// CHECK-SAME: ins(%[[A]] : memref<7x14x21xf32>)
+// CHECK-SAME: outs(%[[OUT]] : memref<7x14x21xf32>)
+
+// -----
+
+func.func @binary_add(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>,
+ %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %0 = linalg.add
+ ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%Out : tensor<?x?xf32>) -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
+}
+
+// CHECK-LABEL: binary_add
+// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>,
+// CHECK-SAME: %[[OUT:.+]]: tensor<?x?xf32>) -> tensor<?x?xf32>
+// CHECK-NOT: linalg.generic
+// CHECK: linalg.add
+// CHECK-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// CHECK-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+
+// -----
+
+
+///----------------------------------------------------------------------------------------
+/// Tests for linalg.matmul
+///----------------------------------------------------------------------------------------
+
+func.func @matmul(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>,
+ %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %0 = linalg.matmul
+ ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%Out : tensor<?x?xf32>) -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
+}
+
+// CHECK-LABEL: @matmul
+// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>,
+// CHECK-SAME: %[[OUT:.+]]: tensor<?x?xf32>) -> tensor<?x?xf32>
+// CHECK-NOT: linalg.generic
+// CHECK: linalg.matmul
+// CHECK-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// CHECK-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+
+// -----
+
+// Check matmul with unsigned cast is correctly raised back to named op.
+func.func @matmul_unsigned_cast(%A: tensor<16x8xi16>, %B: tensor<8x32xi64>,
+ %Out: tensor<16x32xi32>) -> tensor<16x32xi32> {
+ %0 = linalg.matmul {cast = #linalg.type_fn<cast_unsigned>}
+ ins(%A, %B : tensor<16x8xi16>, tensor<8x32xi64>)
+ outs(%Out : tensor<16x32xi32>) -> tensor<16x32xi32>
+ return %0 : tensor<16x32xi32>
+}
+
+// CHECK-LABEL: @matmul_unsigned_cast
+// CHECK-NOT: linalg.generic
+// CHECK: linalg.matmul
+// CHECK-SAME: {cast = #linalg.type_fn<cast_unsigned>}
+
+// -----
+
+func.func @mixed_named_ops(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>,
+ %C: tensor<?x?xf32>, %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %AB = linalg.matmul
+ ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%Out : tensor<?x?xf32>) -> tensor<?x?xf32>
+ %1 = linalg.add
+ ins(%AB, %C : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%Out : tensor<?x?xf32>) -> tensor<?x?xf32>
+ return %1 : tensor<?x?xf32>
+}
+
+// CHECK-LABEL: @mixed_named_ops
+// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[C:.+]]: tensor<?x?xf32>,
+// CHECK-SAME: %[[OUT:.+]]: tensor<?x?xf32>) -> tensor<?x?xf32>
+// CHECK-NOT: linalg.generic
+// CHECK: %[[AB:.+]] = linalg.matmul
+// CHECK-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// CHECK-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+// CHECK: linalg.add
+// CHECK-SAME: ins(%[[AB]], %[[C]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// CHECK-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/specialize-generic-ops.mlir b/mlir/test/Dialect/Linalg/specialize-generic-ops.mlir
index 5c58a5fedd639..029d11a4f60de 100644
--- a/mlir/test/Dialect/Linalg/specialize-generic-ops.mlir
+++ b/mlir/test/Dialect/Linalg/specialize-generic-ops.mlir
@@ -1,10 +1,16 @@
-// RUN: mlir-opt %s -split-input-file --linalg-specialize-generic-ops | FileCheck %s
+// RUN: mlir-opt %s -split-input-file -linalg-morph-ops=generic-to-named \
+// RUN: | FileCheck %s --check-prefix=NAMED,ALL
+
+// RUN: mlir-opt %s -split-input-file -linalg-morph-ops=generic-to-category \
+// RUN: | FileCheck %s --check-prefix=CATEGORY,ALL
#umap = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
func.func @unary_op_exp(%A: tensor<?x?x?xf32>, %Out: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
%0 = linalg.generic
- {indexing_maps = [#umap, #umap], iterator_types = ["parallel", "parallel","parallel"]}
- ins(%A : tensor<?x?x?xf32>) outs(%Out : tensor<?x?x?xf32>) {
+ {indexing_maps = [#umap, #umap],
+ iterator_types = ["parallel", "parallel","parallel"]}
+ ins(%A : tensor<?x?x?xf32>)
+ outs(%Out : tensor<?x?x?xf32>) {
^bb0(%in: f32, %out: f32):
%1 = math.exp %in : f32
linalg.yield %1 : f32
@@ -12,18 +18,27 @@ func.func @unary_op_exp(%A: tensor<?x?x?xf32>, %Out: tensor<?x?x?xf32>) -> tenso
return %0 : tensor<?x?x?xf32>
}
-// CHECK-LABEL: unary_op_exp
-// CHECK-SAME: %[[A:.+]]: tensor<?x?x?xf32>, %[[Out:.+]]: tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.exp ins(%[[A]] : tensor<?x?x?xf32>) outs(%[[Out]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+// ALL-LABEL: unary_op_exp
+// ALL-SAME: %[[A:.+]]: tensor<?x?x?xf32>, %[[OUT:.+]]: tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.exp
+// NAMED-SAME: ins(%[[A]] : tensor<?x?x?xf32>)
+// NAMED-SAME: outs(%[[OUT]] : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+
+// Not supported yet.
+// CATEGORY: linalg.generic
// -----
#map = affine_map<(d0, d1) -> (d0, d1)>
-func.func @binary_op_div(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
+func.func @binary_op_div(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>,
+ %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.generic
- {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]}
- ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) {
+ {indexing_maps = [#map, #map, #map],
+ iterator_types = ["parallel", "parallel"]}
+ ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%Out : tensor<?x?xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%1 = arith.divf %in, %in_0 : f32
linalg.yield %1 : f32
@@ -31,10 +46,17 @@ func.func @binary_op_div(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %Out: tensor<
return %0 : tensor<?x?xf32>
}
-// CHECK-LABEL: binary_op_div
-// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[Out:.+]]: tensor<?x?xf32>) -> tensor<?x?xf32>
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.div ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[Out]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+// ALL-LABEL: binary_op_div
+// ALL-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>,
+// ALL-SAME: %[[OUT:.+]]: tensor<?x?xf32>) -> tensor<?x?xf32>
+
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.div
+// NAMED-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// NAMED-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+
+// Not supported yet.
+// CATEGORY: linalg.generic
// -----
@@ -45,22 +67,39 @@ func.func @binary_op_div(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %Out: tensor<
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
-func.func @op_matmul(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
+func.func @op_matmul(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>,
+ %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.generic
- {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) {
- ^bb0(%in: f32, %in_0: f32, %out: f32):
- %1 = arith.mulf %in, %in_0 : f32
- %2 = arith.addf %out, %1 : f32
- linalg.yield %2 : f32
- } -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ {indexing_maps = [#map, #map1, #map2],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%Out : tensor<?x?xf32>) {
+ ^bb0(%in: f32, %in_0: f32, %out: f32):
+ %1 = arith.mulf %in, %in_0 : f32
+ %2 = arith.addf %out, %1 : f32
+ linalg.yield %2 : f32
+ } -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
}
-// CHECK-LABEL: op_matmul
-// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[Out:.+]]: tensor<?x?xf32>) -> tensor<?x?xf32>
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.matmul ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[Out]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+// CATEGORY-DAG: #[[$MAP_A:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
+// CATEGORY-DAG: #[[$MAP_B:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
+// CATEGORY-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
+
+// ALL-LABEL: op_matmul
+// ALL-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>,
+// ALL-SAME: %[[OUT:.+]]: tensor<?x?xf32>) -> tensor<?x?xf32>
+
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.matmul
+// NAMED-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// NAMED-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
+// CATEGORY-SAME: indexing_maps = {{\[}}#[[$MAP_A]], #[[$MAP_B]], #[[$MAP_C]]{{\]}}
+// CATEGORY-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// CATEGORY-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
// Cast-auditing tests: ensure we only specialize when the cast semantics can
// be expressed by linalg.matmul, and use the cast attribute when needed.
@@ -69,8 +108,10 @@ func.func @op_matmul(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %Out: tensor<?x?x
func.func @op_matmul_unsigned_cast(%A: tensor<16x8xi16>, %B: tensor<8x32xi32>,
%Out: tensor<16x32xi32>) -> tensor<16x32xi32> {
%0 = linalg.generic
- {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<16x8xi16>, tensor<8x32xi32>) outs(%Out : tensor<16x32xi32>) {
+ {indexing_maps = [#map, #map1, #map2],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<16x8xi16>, tensor<8x32xi32>)
+ outs(%Out : tensor<16x32xi32>) {
^bb0(%in: i16, %in_0: i32, %out: i32):
%1 = arith.extui %in : i16 to i32
%3 = arith.muli %1, %in_0 : i32
@@ -80,9 +121,15 @@ func.func @op_matmul_unsigned_cast(%A: tensor<16x8xi16>, %B: tensor<8x32xi32>,
return %0 : tensor<16x32xi32>
}
-// CHECK-LABEL: op_matmul_unsigned_cast
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.matmul {cast = #linalg.type_fn<cast_unsigned>}
+// ALL-LABEL: op_matmul_unsigned_cast
+
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.matmul {cast = #linalg.type_fn<cast_unsigned>}
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
+// CATEGORY-SAME: indexing_maps = {{\[}}#[[$MAP_A]], #[[$MAP_B]], #[[$MAP_C]]{{\]}}
+// CATEGORY-SAME: {cast = #linalg.type_fn<cast_unsigned>}
// Ensures truncation rounding is tolerated with unsigned cases.
// Note: We only consider casts as conflicting if they have
diff erent
@@ -94,8 +141,10 @@ func.func @op_matmul_unsigned_cast(%A: tensor<16x8xi16>, %B: tensor<8x32xi32>,
func.func @op_matmul_unsigned_cast_and_truncate(%A: tensor<16x8xi16>, %B: tensor<8x32xi64>,
%Out: tensor<16x32xi32>) -> tensor<16x32xi32> {
%0 = linalg.generic
- {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<16x8xi16>, tensor<8x32xi64>) outs(%Out : tensor<16x32xi32>) {
+ {indexing_maps = [#map, #map1, #map2],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<16x8xi16>, tensor<8x32xi64>)
+ outs(%Out : tensor<16x32xi32>) {
^bb0(%in: i16, %in_0: i64, %out: i32):
%1 = arith.extui %in : i16 to i32
%2 = arith.trunci %in_0 : i64 to i32
@@ -106,70 +155,98 @@ func.func @op_matmul_unsigned_cast_and_truncate(%A: tensor<16x8xi16>, %B: tensor
return %0 : tensor<16x32xi32>
}
-// CHECK-LABEL: op_matmul_unsigned_cast_and_truncate
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.matmul {cast = #linalg.type_fn<cast_unsigned>}
+// ALL-LABEL: op_matmul_unsigned_cast_and_truncate
+
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.matmul {cast = #linalg.type_fn<cast_unsigned>}
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
+// CATEGORY-SAME: indexing_maps = {{\[}}#[[$MAP_A]], #[[$MAP_B]], #[[$MAP_C]]{{\]}}
+// CATEGORY-SAME: {cast = #linalg.type_fn<cast_unsigned>}
// Signed casts are the default, no cast attribute is required.
func.func @op_matmul_signed_cast(%A: tensor<16x8xi16>, %B: tensor<8x32xi16>,
%Out: tensor<16x32xi32>) -> tensor<16x32xi32> {
%0 = linalg.generic
- {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<16x8xi16>, tensor<8x32xi16>) outs(%Out : tensor<16x32xi32>) {
- ^bb0(%in: i16, %in_0: i16, %out: i32):
- %1 = arith.extsi %in : i16 to i32
- %2 = arith.extsi %in_0 : i16 to i32
- %3 = arith.muli %1, %2 : i32
- %4 = arith.addi %out, %3 : i32
- linalg.yield %4 : i32
- } -> tensor<16x32xi32>
- return %0 : tensor<16x32xi32>
+ {indexing_maps = [#map, #map1, #map2],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<16x8xi16>, tensor<8x32xi16>)
+ outs(%Out : tensor<16x32xi32>) {
+ ^bb0(%in: i16, %in_0: i16, %out: i32):
+ %1 = arith.extsi %in : i16 to i32
+ %2 = arith.extsi %in_0 : i16 to i32
+ %3 = arith.muli %1, %2 : i32
+ %4 = arith.addi %out, %3 : i32
+ linalg.yield %4 : i32
+ } -> tensor<16x32xi32>
+ return %0 : tensor<16x32xi32>
}
-// CHECK-LABEL: op_matmul_signed_cast
-// CHECK-NOT: linalg.generic
-// CHECK-NOT: linalg.matmul {cast = #linalg.type_fn<cast_unsigned>}
-// CHECK: linalg.matmul
+// ALL-LABEL: op_matmul_signed_cast
+
+// NAMED-NOT: linalg.generic
+// NAMED-NOT: linalg.matmul {cast = #linalg.type_fn<cast_unsigned>}
+// NAMED: linalg.matmul
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: %[[RES:.+]] = linalg.contract
+// CATEGORY-SAME: indexing_maps = {{\[}}#[[$MAP_A]], #[[$MAP_B]], #[[$MAP_C]]{{\]}}
+// CATEGORY-NOT: {cast =
+// CATEGORY-SAME: ins
+// CATEGORY: return %[[RES]]
// Mixed signed/unsigned inputs cannot be encoded with a single cast attribute.
func.func @negative_op_matmul_mixed_cast(%A: tensor<16x8xi16>, %B: tensor<8x32xi16>,
- %Out: tensor<16x32xi32>) -> tensor<16x32xi32> {
+ %Out: tensor<16x32xi32>) -> tensor<16x32xi32> {
%0 = linalg.generic
- {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<16x8xi16>, tensor<8x32xi16>) outs(%Out : tensor<16x32xi32>) {
- ^bb0(%in: i16, %in_0: i16, %out: i32):
- %1 = arith.extui %in : i16 to i32
- %2 = arith.extsi %in_0 : i16 to i32
- %3 = arith.muli %1, %2 : i32
- %4 = arith.addi %out, %3 : i32
- linalg.yield %4 : i32
- } -> tensor<16x32xi32>
- return %0 : tensor<16x32xi32>
+ {indexing_maps = [#map, #map1, #map2],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<16x8xi16>, tensor<8x32xi16>)
+ outs(%Out : tensor<16x32xi32>) {
+ ^bb0(%in: i16, %in_0: i16, %out: i32):
+ %1 = arith.extui %in : i16 to i32
+ %2 = arith.extsi %in_0 : i16 to i32
+ %3 = arith.muli %1, %2 : i32
+ %4 = arith.addi %out, %3 : i32
+ linalg.yield %4 : i32
+ } -> tensor<16x32xi32>
+ return %0 : tensor<16x32xi32>
}
-// CHECK-LABEL: negative_op_matmul_mixed_cast
-// CHECK: linalg.generic
-// CHECK-NOT: linalg.matmul
+// ALL-LABEL: negative_op_matmul_mixed_cast
+
+// NAMED: linalg.generic
+// NAMED-NOT: linalg.matmul
+
+// CATEGORY: linalg.generic
+// CATEGORY-NOT: linalg.contract
// Output-side casts are not representable by the named matmul ops.
func.func @negative_op_matmul_output_cast(%A: tensor<16x8xi32>, %B: tensor<8x32xi32>,
- %Out: tensor<16x32xi64>) -> tensor<16x32xi64> {
+ %Out: tensor<16x32xi64>) -> tensor<16x32xi64> {
%0 = linalg.generic
- {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<16x8xi32>, tensor<8x32xi32>) outs(%Out : tensor<16x32xi64>) {
- ^bb0(%in: i32, %in_0: i32, %out: i64):
- %3 = arith.trunci %out : i64 to i32
- %4 = arith.muli %in, %in_0 : i32
- %5 = arith.addi %3, %4 : i32
- %6 = arith.extsi %5 : i32 to i64
- linalg.yield %6 : i64
- } -> tensor<16x32xi64>
- return %0 : tensor<16x32xi64>
+ {indexing_maps = [#map, #map1, #map2],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<16x8xi32>, tensor<8x32xi32>)
+ outs(%Out : tensor<16x32xi64>) {
+ ^bb0(%in: i32, %in_0: i32, %out: i64):
+ %3 = arith.trunci %out : i64 to i32
+ %4 = arith.muli %in, %in_0 : i32
+ %5 = arith.addi %3, %4 : i32
+ %6 = arith.extsi %5 : i32 to i64
+ linalg.yield %6 : i64
+ } -> tensor<16x32xi64>
+ return %0 : tensor<16x32xi64>
}
-// CHECK-LABEL: negative_op_matmul_output_cast
-// CHECK: linalg.generic
-// CHECK-NOT: linalg.matmul
+// ALL-LABEL: negative_op_matmul_output_cast
+
+// NAMED: linalg.generic
+// NAMED-NOT: linalg.matmul
+
+// CATEGORY: linalg.generic
+// CATEGORY-NOT: linalg.contract
// Bitcasts are not modeled by the cast attribute, but should not block
// specialization.
@@ -180,8 +257,10 @@ func.func @op_matmul_bitcast_int_to_float(%A: tensor<16x8xi32>,
%B: tensor<8x32xi32>,
%Out: tensor<16x32xf32>) -> tensor<16x32xf32> {
%0 = linalg.generic
- {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<16x8xi32>, tensor<8x32xi32>) outs(%Out : tensor<16x32xf32>) {
+ {indexing_maps = [#map, #map1, #map2],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<16x8xi32>, tensor<8x32xi32>)
+ outs(%Out : tensor<16x32xf32>) {
^bb0(%in: i32, %in_0: i32, %out: f32):
%1 = arith.bitcast %in : i32 to f32
%2 = arith.bitcast %in_0 : i32 to f32
@@ -192,16 +271,22 @@ func.func @op_matmul_bitcast_int_to_float(%A: tensor<16x8xi32>,
return %0 : tensor<16x32xf32>
}
-// CHECK-LABEL: op_matmul_bitcast_int_to_float
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.matmul
+// ALL-LABEL: op_matmul_bitcast_int_to_float
+
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.matmul
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
// Signed float casts only use sitofp, which defaults to signed semantics.
func.func @op_matmul_signed_cast_float(%A: tensor<16x8xi16>, %B: tensor<8x32xi16>,
%Out: tensor<16x32xf32>) -> tensor<16x32xf32> {
%0 = linalg.generic
- {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<16x8xi16>, tensor<8x32xi16>) outs(%Out : tensor<16x32xf32>) {
+ {indexing_maps = [#map, #map1, #map2],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<16x8xi16>, tensor<8x32xi16>)
+ outs(%Out : tensor<16x32xf32>) {
^bb0(%in: i16, %in_0: i16, %out: f32):
%1 = arith.sitofp %in : i16 to f32
%2 = arith.sitofp %in_0 : i16 to f32
@@ -212,17 +297,25 @@ func.func @op_matmul_signed_cast_float(%A: tensor<16x8xi16>, %B: tensor<8x32xi16
return %0 : tensor<16x32xf32>
}
-// CHECK-LABEL: op_matmul_signed_cast_float
-// CHECK-NOT: linalg.generic
-// CHECK-NOT: linalg.matmul {cast = #linalg.type_fn<cast_unsigned>}
-// CHECK: linalg.matmul
+// ALL-LABEL: op_matmul_signed_cast_float
+
+// NAMED-NOT: linalg.generic
+// NAMED-NOT: linalg.matmul {cast = #linalg.type_fn<cast_unsigned>}
+// NAMED: linalg.matmul
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY-NOT: linalg.contract{{.*}}{cast =
+// CATEGORY: %[[RES:.+]] = linalg.contract
+// CATEGORY: return %[[RES]]
// Unsigned float casts are expressed via uitofp and use the unsigned cast attr.
func.func @op_matmul_unsigned_cast_float(%A: tensor<16x8xi16>, %B: tensor<8x32xi16>,
%Out: tensor<16x32xf32>) -> tensor<16x32xf32> {
%0 = linalg.generic
- {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<16x8xi16>, tensor<8x32xi16>) outs(%Out : tensor<16x32xf32>) {
+ {indexing_maps = [#map, #map1, #map2],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<16x8xi16>, tensor<8x32xi16>)
+ outs(%Out : tensor<16x32xf32>) {
^bb0(%in: i16, %in_0: i16, %out: f32):
%1 = arith.uitofp %in : i16 to f32
%2 = arith.uitofp %in_0 : i16 to f32
@@ -233,9 +326,13 @@ func.func @op_matmul_unsigned_cast_float(%A: tensor<16x8xi16>, %B: tensor<8x32xi
return %0 : tensor<16x32xf32>
}
-// CHECK-LABEL: op_matmul_unsigned_cast_float
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.matmul {cast = #linalg.type_fn<cast_unsigned>}
+// ALL-LABEL: op_matmul_unsigned_cast_float
+
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.matmul {cast = #linalg.type_fn<cast_unsigned>}
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract{{.*}}{cast = #linalg.type_fn<cast_unsigned>}
// -----
@@ -246,10 +343,13 @@ func.func @op_matmul_unsigned_cast_float(%A: tensor<16x8xi16>, %B: tensor<8x32xi
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-func.func @op_batch_matmul(%A: tensor<2x16x8xf32>, %B: tensor<2x8x16xf32>, %Out: tensor<2x16x16xf32>) -> tensor<2x16x16xf32> {
+func.func @op_batch_matmul(%A: tensor<2x16x8xf32>, %B: tensor<2x8x16xf32>,
+ %Out: tensor<2x16x16xf32>) -> tensor<2x16x16xf32> {
%0 = linalg.generic
- {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<2x16x8xf32>, tensor<2x8x16xf32>) outs(%Out : tensor<2x16x16xf32>) {
+ {indexing_maps = [#map, #map1, #map2],
+ iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<2x16x8xf32>, tensor<2x8x16xf32>)
+ outs(%Out : tensor<2x16x16xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%1 = arith.mulf %in, %in_0 : f32
%2 = arith.addf %out, %1 : f32
@@ -258,10 +358,20 @@ func.func @op_batch_matmul(%A: tensor<2x16x8xf32>, %B: tensor<2x8x16xf32>, %Out:
return %0 : tensor<2x16x16xf32>
}
-// CHECK-LABEL: op_batch_matmul
-// CHECK-SAME: %[[A:.+]]: tensor<2x16x8xf32>, %[[B:.+]]: tensor<2x8x16xf32>, %[[Out:.+]]: tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.batch_matmul ins(%[[A]], %[[B]] : tensor<2x16x8xf32>, tensor<2x8x16xf32>) outs(%[[Out]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
+// CATEGORY-DAG: #[[$MAP_A:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
+// CATEGORY-DAG: #[[$MAP_B:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
+// CATEGORY-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
+
+// ALL-LABEL: op_batch_matmul
+// ALL-SAME: %[[A:.+]]: tensor<2x16x8xf32>, %[[B:.+]]: tensor<2x8x16xf32>, %[[OUT:.+]]: tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
+
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.batch_matmul ins(%[[A]], %[[B]] : tensor<2x16x8xf32>, tensor<2x8x16xf32>) outs(%[[OUT]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract indexing_maps = {{\[}}#[[$MAP_A]], #[[$MAP_B]], #[[$MAP_C]]{{\]}}
+// CATEGORY-SAME: ins(%[[A]], %[[B]] : tensor<2x16x8xf32>, tensor<2x8x16xf32>)
+// CATEGORY-SAME: outs(%[[OUT]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
// Ensure that the unsigned cast path for cast detection is exercised for
// batch_matmul as well.
@@ -269,38 +379,43 @@ func.func @op_batch_matmul_unsigned_cast(%A: tensor<2x16x8xi16>,
%B: tensor<2x8x16xi64>,
%Out: tensor<2x16x16xi32>) -> tensor<2x16x16xi32> {
%0 = linalg.generic
- {indexing_maps = [#map, #map1, #map2],
- iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<2x16x8xi16>, tensor<2x8x16xi64>)
- outs(%Out : tensor<2x16x16xi32>) {
- ^bb0(%in: i16, %in_0: i64, %out: i32):
- %1 = arith.extui %in : i16 to i32
- %2 = arith.trunci %in_0 : i64 to i32
- %3 = arith.muli %1, %2 : i32
- %4 = arith.addi %out, %3 : i32
- linalg.yield %4 : i32
- } -> tensor<2x16x16xi32>
- return %0 : tensor<2x16x16xi32>
+ {indexing_maps = [#map, #map1, #map2],
+ iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<2x16x8xi16>, tensor<2x8x16xi64>)
+ outs(%Out : tensor<2x16x16xi32>) {
+ ^bb0(%in: i16, %in_0: i64, %out: i32):
+ %1 = arith.extui %in : i16 to i32
+ %2 = arith.trunci %in_0 : i64 to i32
+ %3 = arith.muli %1, %2 : i32
+ %4 = arith.addi %out, %3 : i32
+ linalg.yield %4 : i32
+ } -> tensor<2x16x16xi32>
+ return %0 : tensor<2x16x16xi32>
}
-// CHECK-LABEL: op_batch_matmul_unsigned_cast
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.batch_matmul {cast = #linalg.type_fn<cast_unsigned>}
+// ALL-LABEL: op_batch_matmul_unsigned_cast
+
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.batch_matmul {cast = #linalg.type_fn<cast_unsigned>}
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract indexing_maps = {{\[}}#[[$MAP_A]], #[[$MAP_B]], #[[$MAP_C]]{{\]}}
+// CATEGORY-SAME: {cast = #linalg.type_fn<cast_unsigned>}
// -----
-// This is a multi-reduction linalg.generic and cannot be lifted to matrix multiply
+// A multi-reduction contraction.
#mapA = affine_map<(m, n, k1, k2) -> (m, k1, k2)>
#mapB = affine_map<(m, n, k1, k2) -> (k2, k1, n)>
#mapC = affine_map<(m, n, k1, k2) -> (m, n)>
-func.func @negative_op_multi_reduction(%A: tensor<10x20x30xf32>,
- %B: tensor<30x20x40xf32>,
- %C: tensor<10x40xf32>) -> tensor<10x40xf32> {
+func.func @op_multi_reduction(%A: tensor<10x20x30xf32>,
+ %B: tensor<30x20x40xf32>,
+ %C: tensor<10x40xf32>) -> tensor<10x40xf32> {
%0 = linalg.generic
- {indexing_maps = [#mapA, #mapB, #mapC],
- iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
- ins(%A, %B : tensor<10x20x30xf32>, tensor<30x20x40xf32>)
- outs(%C : tensor<10x40xf32>) {
+ {indexing_maps = [#mapA, #mapB, #mapC],
+ iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
+ ins(%A, %B : tensor<10x20x30xf32>, tensor<30x20x40xf32>)
+ outs(%C : tensor<10x40xf32>) {
^bb0(%a: f32, %b: f32, %c: f32):
%1 = arith.mulf %a, %b : f32
%2 = arith.addf %c, %1 : f32
@@ -309,8 +424,13 @@ func.func @negative_op_multi_reduction(%A: tensor<10x20x30xf32>,
return %0 : tensor<10x40xf32>
}
-// CHECK-LABEL: negative_op_multi_reduction
-// CHECK: linalg.generic
+// ALL-LABEL: op_multi_reduction
+
+// Cannot be lifted to named matrix multiply.
+// NAMED: linalg.generic
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
// -----
@@ -319,11 +439,13 @@ func.func @negative_op_multi_reduction(%A: tensor<10x20x30xf32>,
#mapBni0 = affine_map<(d0, d1, d2, d3) -> (d1, d0, d3)>
#mapBni1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
#mapBni2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-func.func @negative_batch_matmul_non_identity_batch(%A: tensor<4x2x8xf32>, %B: tensor<2x8x16xf32>,
- %Out: tensor<2x4x16xf32>) -> tensor<2x4x16xf32> {
+func.func @batch_matmul_non_identity_batch(%A: tensor<4x2x8xf32>, %B: tensor<2x8x16xf32>,
+ %Out: tensor<2x4x16xf32>) -> tensor<2x4x16xf32> {
%0 = linalg.generic
- {indexing_maps = [#mapBni0, #mapBni1, #mapBni2], iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<4x2x8xf32>, tensor<2x8x16xf32>) outs(%Out : tensor<2x4x16xf32>) {
+ {indexing_maps = [#mapBni0, #mapBni1, #mapBni2],
+ iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<4x2x8xf32>, tensor<2x8x16xf32>)
+ outs(%Out : tensor<2x4x16xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%1 = arith.mulf %in, %in_0 : f32
%2 = arith.addf %out, %1 : f32
@@ -332,28 +454,40 @@ func.func @negative_batch_matmul_non_identity_batch(%A: tensor<4x2x8xf32>, %B: t
return %0 : tensor<2x4x16xf32>
}
-// CHECK-LABEL: negative_batch_matmul_non_identity_batch
-// CHECK: linalg.generic
+// ALL-LABEL: batch_matmul_non_identity_batch
+
+// Cannot be lifted to named matrix multiply.
+// NAMED: linalg.generic
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
// -----
-// TODO: matvec
+// TODO: named matvec
#map = affine_map<(d0, d1) -> (d0, d1)>
#map1 = affine_map<(d0, d1) -> (d1)>
#map2 = affine_map<(d0, d1) -> (d0)>
func.func @op_matvec(%A: tensor<?x?xf32>, %B: tensor<?xf32>, %Out: tensor<?xf32>) -> tensor<?xf32> {
%0 = linalg.generic
- {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "reduction"]}
- ins(%A, %B : tensor<?x?xf32>, tensor<?xf32>) outs(%Out : tensor<?xf32>) {
- ^bb0(%in: f32, %in_0: f32, %out: f32):
- %1 = arith.mulf %in, %in_0 : f32
- %2 = arith.addf %out, %1 : f32
- linalg.yield %2 : f32
+ {indexing_maps = [#map, #map1, #map2],
+ iterator_types = ["parallel", "reduction"]}
+ ins(%A, %B : tensor<?x?xf32>, tensor<?xf32>)
+ outs(%Out : tensor<?xf32>) {
+ ^bb0(%in: f32, %in_0: f32, %out: f32):
+ %1 = arith.mulf %in, %in_0 : f32
+ %2 = arith.addf %out, %1 : f32
+ linalg.yield %2 : f32
} -> tensor<?xf32>
return %0 : tensor<?xf32>
}
-// CHECK-LABEL: op_matvec
-// CHECK: linalg.generic
+
+// ALL-LABEL: op_matvec
+
+// NAMED: linalg.generic
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
// -----
@@ -361,28 +495,36 @@ func.func @op_matvec(%A: tensor<?x?xf32>, %B: tensor<?xf32>, %Out: tensor<?xf32>
#map_ta = affine_map<(d0, d1, d2) -> (d2, d0)>
#map_b = affine_map<(d0, d1, d2) -> (d2, d1)>
#map_c = affine_map<(d0, d1, d2) -> (d0, d1)>
-func.func @op_matmul_transpose_a(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
+func.func @op_matmul_transpose_a(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>,
+ %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.generic
- {indexing_maps = [#map_ta, #map_b, #map_c], iterator_types = ["parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) {
- ^bb0(%in: f32, %in_0: f32, %out: f32):
- %1 = arith.mulf %in, %in_0 : f32
- %2 = arith.addf %out, %1 : f32
- linalg.yield %2 : f32
- } -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ {indexing_maps = [#map_ta, #map_b, #map_c],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) {
+ ^bb0(%in: f32, %in_0: f32, %out: f32):
+ %1 = arith.mulf %in, %in_0 : f32
+ %2 = arith.addf %out, %1 : f32
+ linalg.yield %2 : f32
+ } -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
}
-// CHECK-DAG: #[[$MAP_TA:.+]] = affine_map<(d0, d1, d2) -> (d2, d0)>
-// CHECK-DAG: #[[$MAP_B:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
-// CHECK-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
-// CHECK-LABEL: op_matmul_transpose_a
-// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[Out:.+]]: tensor<?x?xf32>
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.matmul
-// CHECK-SAME: indexing_maps = [#[[$MAP_TA]], #[[$MAP_B]], #[[$MAP_C]]]
-// CHECK-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
-// CHECK-SAME: outs(%[[Out]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+// ALL-DAG: #[[$MAP_TA:.+]] = affine_map<(d0, d1, d2) -> (d2, d0)>
+// ALL-DAG: #[[$MAP_B:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
+// ALL-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
+
+// ALL-LABEL: op_matmul_transpose_a
+// ALL-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[OUT:.+]]: tensor<?x?xf32>
+
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.matmul
+// NAMED-SAME: indexing_maps = [#[[$MAP_TA]], #[[$MAP_B]], #[[$MAP_C]]]
+// NAMED-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// NAMED-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
+// CATEGORY-SAME: indexing_maps = [#[[$MAP_TA]], #[[$MAP_B]], #[[$MAP_C]]]
// -----
@@ -390,28 +532,39 @@ func.func @op_matmul_transpose_a(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %Out:
#map_a = affine_map<(d0, d1, d2) -> (d0, d2)>
#map_tb = affine_map<(d0, d1, d2) -> (d1, d2)>
#map_c = affine_map<(d0, d1, d2) -> (d0, d1)>
-func.func @op_matmul_transpose_b(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
+func.func @op_matmul_transpose_b(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>,
+ %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.generic
- {indexing_maps = [#map_a, #map_tb, #map_c], iterator_types = ["parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) {
- ^bb0(%in: f32, %in_0: f32, %out: f32):
- %1 = arith.mulf %in, %in_0 : f32
- %2 = arith.addf %out, %1 : f32
- linalg.yield %2 : f32
- } -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ {indexing_maps = [#map_a, #map_tb, #map_c],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%Out : tensor<?x?xf32>) {
+ ^bb0(%in: f32, %in_0: f32, %out: f32):
+ %1 = arith.mulf %in, %in_0 : f32
+ %2 = arith.addf %out, %1 : f32
+ linalg.yield %2 : f32
+ } -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
}
-// CHECK-DAG: #[[$MAP_A:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
-// CHECK-DAG: #[[$MAP_TB:.+]] = affine_map<(d0, d1, d2) -> (d1, d2)>
-// CHECK-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
-// CHECK-LABEL: op_matmul_transpose_b
-// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[Out:.+]]: tensor<?x?xf32>
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.matmul
-// CHECK-SAME: indexing_maps = [#[[$MAP_A]], #[[$MAP_TB]], #[[$MAP_C]]]
-// CHECK-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
-// CHECK-SAME: outs(%[[Out]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+// ALL-DAG: #[[$MAP_A:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
+// ALL-DAG: #[[$MAP_TB:.+]] = affine_map<(d0, d1, d2) -> (d1, d2)>
+// ALL-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
+
+// ALL-LABEL: op_matmul_transpose_b
+// ALL-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[OUT:.+]]: tensor<?x?xf32>
+
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.matmul
+// NAMED-SAME: indexing_maps = [#[[$MAP_A]], #[[$MAP_TB]], #[[$MAP_C]]]
+// NAMED-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// NAMED-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
+// CATEGORY-SAME: indexing_maps = [#[[$MAP_A]], #[[$MAP_TB]], #[[$MAP_C]]]
+// CATEGORY-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// CATEGORY-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
// -----
@@ -419,10 +572,13 @@ func.func @op_matmul_transpose_b(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %Out:
#map_ta = affine_map<(d0, d1, d2, d3) -> (d0, d3, d1)>
#map_b = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
#map_c = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-func.func @op_batch_matmul_transpose_a(%A: tensor<2x8x4xf32>, %B: tensor<2x8x16xf32>, %Out: tensor<2x4x16xf32>) -> tensor<2x4x16xf32> {
+func.func @op_batch_matmul_transpose_a(%A: tensor<2x8x4xf32>, %B: tensor<2x8x16xf32>,
+ %Out: tensor<2x4x16xf32>) -> tensor<2x4x16xf32> {
%0 = linalg.generic
- {indexing_maps = [#map_ta, #map_b, #map_c], iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<2x8x4xf32>, tensor<2x8x16xf32>) outs(%Out : tensor<2x4x16xf32>) {
+ {indexing_maps = [#map_ta, #map_b, #map_c],
+ iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<2x8x4xf32>, tensor<2x8x16xf32>)
+ outs(%Out : tensor<2x4x16xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%1 = arith.mulf %in, %in_0 : f32
%2 = arith.addf %out, %1 : f32
@@ -431,16 +587,24 @@ func.func @op_batch_matmul_transpose_a(%A: tensor<2x8x4xf32>, %B: tensor<2x8x16x
return %0 : tensor<2x4x16xf32>
}
-// CHECK-DAG: #[[$MAP_TA:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d1)>
-// CHECK-DAG: #[[$MAP_B:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
-// CHECK-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-// CHECK-LABEL: op_batch_matmul_transpose_a
-// CHECK-SAME: %[[A:.+]]: tensor<2x8x4xf32>, %[[B:.+]]: tensor<2x8x16xf32>, %[[Out:.+]]: tensor<2x4x16xf32>
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.batch_matmul
-// CHECK-SAME: indexing_maps = [#[[$MAP_TA]], #[[$MAP_B]], #[[$MAP_C]]]
-// CHECK-SAME: ins(%[[A]], %[[B]] : tensor<2x8x4xf32>, tensor<2x8x16xf32>)
-// CHECK-SAME: outs(%[[Out]] : tensor<2x4x16xf32>) -> tensor<2x4x16xf32>
+// ALL-DAG: #[[$MAP_TA:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d1)>
+// ALL-DAG: #[[$MAP_B:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
+// ALL-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
+
+// ALL-LABEL: op_batch_matmul_transpose_a
+// ALL-SAME: %[[A:.+]]: tensor<2x8x4xf32>, %[[B:.+]]: tensor<2x8x16xf32>, %[[OUT:.+]]: tensor<2x4x16xf32>
+
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.batch_matmul
+// NAMED-SAME: indexing_maps = [#[[$MAP_TA]], #[[$MAP_B]], #[[$MAP_C]]]
+// NAMED-SAME: ins(%[[A]], %[[B]] : tensor<2x8x4xf32>, tensor<2x8x16xf32>)
+// NAMED-SAME: outs(%[[OUT]] : tensor<2x4x16xf32>) -> tensor<2x4x16xf32>
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
+// CATEGORY-SAME: indexing_maps = [#[[$MAP_TA]], #[[$MAP_B]], #[[$MAP_C]]]
+// CATEGORY-SAME: ins(%[[A]], %[[B]] : tensor<2x8x4xf32>, tensor<2x8x16xf32>)
+// CATEGORY-SAME: outs(%[[OUT]] : tensor<2x4x16xf32>) -> tensor<2x4x16xf32>
// -----
@@ -448,10 +612,13 @@ func.func @op_batch_matmul_transpose_a(%A: tensor<2x8x4xf32>, %B: tensor<2x8x16x
#map_a = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
#map_tb = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
#map_c = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-func.func @op_batch_matmul_transpose_b(%A: tensor<2x4x8xf32>, %B: tensor<2x16x8xf32>, %Out: tensor<2x4x16xf32>) -> tensor<2x4x16xf32> {
+func.func @op_batch_matmul_transpose_b(%A: tensor<2x4x8xf32>, %B: tensor<2x16x8xf32>,
+ %Out: tensor<2x4x16xf32>) -> tensor<2x4x16xf32> {
%0 = linalg.generic
- {indexing_maps = [#map_a, #map_tb, #map_c], iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<2x4x8xf32>, tensor<2x16x8xf32>) outs(%Out : tensor<2x4x16xf32>) {
+ {indexing_maps = [#map_a, #map_tb, #map_c],
+ iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<2x4x8xf32>, tensor<2x16x8xf32>)
+ outs(%Out : tensor<2x4x16xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%1 = arith.mulf %in, %in_0 : f32
%2 = arith.addf %out, %1 : f32
@@ -460,16 +627,24 @@ func.func @op_batch_matmul_transpose_b(%A: tensor<2x4x8xf32>, %B: tensor<2x16x8x
return %0 : tensor<2x4x16xf32>
}
-// CHECK-DAG: #[[$MAP_A:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
-// CHECK-DAG: #[[$MAP_TB:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
-// CHECK-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-// CHECK-LABEL: op_batch_matmul_transpose_b
-// CHECK-SAME: %[[A:.+]]: tensor<2x4x8xf32>, %[[B:.+]]: tensor<2x16x8xf32>, %[[Out:.+]]: tensor<2x4x16xf32>
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.batch_matmul
-// CHECK-SAME: indexing_maps = [#[[$MAP_A]], #[[$MAP_TB]], #[[$MAP_C]]]
-// CHECK-SAME: ins(%[[A]], %[[B]] : tensor<2x4x8xf32>, tensor<2x16x8xf32>)
-// CHECK-SAME: outs(%[[Out]] : tensor<2x4x16xf32>) -> tensor<2x4x16xf32>
+// ALL-DAG: #[[$MAP_A:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
+// ALL-DAG: #[[$MAP_TB:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
+// ALL-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
+
+// ALL-LABEL: op_batch_matmul_transpose_b
+// ALL-SAME: %[[A:.+]]: tensor<2x4x8xf32>, %[[B:.+]]: tensor<2x16x8xf32>, %[[OUT:.+]]: tensor<2x4x16xf32>
+
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.batch_matmul
+// NAMED-SAME: indexing_maps = [#[[$MAP_A]], #[[$MAP_TB]], #[[$MAP_C]]]
+// NAMED-SAME: ins(%[[A]], %[[B]] : tensor<2x4x8xf32>, tensor<2x16x8xf32>)
+// NAMED-SAME: outs(%[[OUT]] : tensor<2x4x16xf32>) -> tensor<2x4x16xf32>
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
+// CATEGORY-SAME: indexing_maps = [#[[$MAP_A]], #[[$MAP_TB]], #[[$MAP_C]]]
+// CATEGORY-SAME: ins(%[[A]], %[[B]] : tensor<2x4x8xf32>, tensor<2x16x8xf32>)
+// CATEGORY-SAME: outs(%[[OUT]] : tensor<2x4x16xf32>) -> tensor<2x4x16xf32>
// -----
@@ -478,28 +653,38 @@ func.func @op_batch_matmul_transpose_b(%A: tensor<2x4x8xf32>, %B: tensor<2x16x8x
#map_tb = affine_map<(d0, d1, d2) -> (d1, d2)>
#map_c = affine_map<(d0, d1, d2) -> (d0, d1)>
func.func @op_matmul_transpose_a_and_b(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>,
- %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.generic
- {indexing_maps = [#map_ta, #map_tb, #map_c], iterator_types = ["parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) {
- ^bb0(%in: f32, %in_0: f32, %out: f32):
- %1 = arith.mulf %in, %in_0 : f32
- %2 = arith.addf %out, %1 : f32
- linalg.yield %2 : f32
- } -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ {indexing_maps = [#map_ta, #map_tb, #map_c],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%Out : tensor<?x?xf32>) {
+ ^bb0(%in: f32, %in_0: f32, %out: f32):
+ %1 = arith.mulf %in, %in_0 : f32
+ %2 = arith.addf %out, %1 : f32
+ linalg.yield %2 : f32
+ } -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
}
-// CHECK-DAG: #[[$MAP_TA:.+]] = affine_map<(d0, d1, d2) -> (d2, d0)>
-// CHECK-DAG: #[[$MAP_TB:.+]] = affine_map<(d0, d1, d2) -> (d1, d2)>
-// CHECK-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
-// CHECK-LABEL: op_matmul_transpose_a_and_b
-// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[Out:.+]]: tensor<?x?xf32>
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.matmul
-// CHECK-SAME: indexing_maps = [#[[$MAP_TA]], #[[$MAP_TB]], #[[$MAP_C]]]
-// CHECK-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
-// CHECK-SAME: outs(%[[Out]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+// ALL-DAG: #[[$MAP_TA:.+]] = affine_map<(d0, d1, d2) -> (d2, d0)>
+// ALL-DAG: #[[$MAP_TB:.+]] = affine_map<(d0, d1, d2) -> (d1, d2)>
+// ALL-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
+
+// ALL-LABEL: op_matmul_transpose_a_and_b
+// ALL-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[OUT:.+]]: tensor<?x?xf32>
+
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.matmul
+// NAMED-SAME: indexing_maps = [#[[$MAP_TA]], #[[$MAP_TB]], #[[$MAP_C]]]
+// NAMED-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// NAMED-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
+// CATEGORY-SAME: indexing_maps = [#[[$MAP_TA]], #[[$MAP_TB]], #[[$MAP_C]]]
+// CATEGORY-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// CATEGORY-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
// -----
@@ -508,28 +693,38 @@ func.func @op_matmul_transpose_a_and_b(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>,
#map_b = affine_map<(d0, d1, d2) -> (d2, d1)>
#map_tc = affine_map<(d0, d1, d2) -> (d1, d0)>
func.func @op_matmul_transposed_output(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>,
- %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.generic
- {indexing_maps = [#map_a, #map_b, #map_tc], iterator_types = ["parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) {
- ^bb0(%in: f32, %in_0: f32, %out: f32):
- %1 = arith.mulf %in, %in_0 : f32
- %2 = arith.addf %out, %1 : f32
- linalg.yield %2 : f32
- } -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ {indexing_maps = [#map_a, #map_b, #map_tc],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%Out : tensor<?x?xf32>) {
+ ^bb0(%in: f32, %in_0: f32, %out: f32):
+ %1 = arith.mulf %in, %in_0 : f32
+ %2 = arith.addf %out, %1 : f32
+ linalg.yield %2 : f32
+ } -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
}
-// CHECK-DAG: #[[$MAP_A:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
-// CHECK-DAG: #[[$MAP_B:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
-// CHECK-DAG: #[[$MAP_TC:.+]] = affine_map<(d0, d1, d2) -> (d1, d0)>
-// CHECK-LABEL: op_matmul_transposed_output
-// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[Out:.+]]: tensor<?x?xf32>
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.matmul
-// CHECK-SAME: indexing_maps = [#[[$MAP_A]], #[[$MAP_B]], #[[$MAP_TC]]]
-// CHECK-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
-// CHECK-SAME: outs(%[[Out]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+// ALL-DAG: #[[$MAP_A:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
+// ALL-DAG: #[[$MAP_B:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
+// ALL-DAG: #[[$MAP_TC:.+]] = affine_map<(d0, d1, d2) -> (d1, d0)>
+
+// ALL-LABEL: op_matmul_transposed_output
+// ALL-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[OUT:.+]]: tensor<?x?xf32>
+
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.matmul
+// NAMED-SAME: indexing_maps = [#[[$MAP_A]], #[[$MAP_B]], #[[$MAP_TC]]]
+// NAMED-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// NAMED-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
+// CATEGORY-SAME: indexing_maps = [#[[$MAP_A]], #[[$MAP_B]], #[[$MAP_TC]]]
+// CATEGORY-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// CATEGORY-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
// -----
@@ -538,24 +733,37 @@ func.func @op_matmul_transposed_output(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>,
#map_nc_b = affine_map<(m, k, n) -> (k, n)>
#map_nc_c = affine_map<(m, k, n) -> (m, n)>
func.func @op_matmul_non_canonical_loops(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>,
- %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.generic
- {indexing_maps = [#map_nc_a, #map_nc_b, #map_nc_c],
- iterator_types = ["parallel", "reduction", "parallel"]}
- ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) {
- ^bb0(%in: f32, %in_0: f32, %out: f32):
- %1 = arith.mulf %in, %in_0 : f32
- %2 = arith.addf %out, %1 : f32
- linalg.yield %2 : f32
- } -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ {indexing_maps = [#map_nc_a, #map_nc_b, #map_nc_c],
+ iterator_types = ["parallel", "reduction", "parallel"]}
+ ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%Out : tensor<?x?xf32>) {
+ ^bb0(%in: f32, %in_0: f32, %out: f32):
+ %1 = arith.mulf %in, %in_0 : f32
+ %2 = arith.addf %out, %1 : f32
+ linalg.yield %2 : f32
+ } -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
}
-// CHECK-LABEL: op_matmul_non_canonical_loops
-// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[Out:.+]]: tensor<?x?xf32>
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.matmul ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[Out]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+// CATEGORY-DAG: #[[$MAP_A:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
+// CATEGORY-DAG: #[[$MAP_B:.+]] = affine_map<(d0, d1, d2) -> (d1, d2)>
+// CATEGORY-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
+
+// ALL-LABEL: op_matmul_non_canonical_loops
+// ALL-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[OUT:.+]]: tensor<?x?xf32>
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.matmul
+// NAMED-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// NAMED-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
+// CATEGORY-SAME: indexing_maps = [#[[$MAP_A]], #[[$MAP_B]], #[[$MAP_C]]]
+// CATEGORY-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// CATEGORY-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
// -----
// Batch matmul with non-canonical loop ordering.
@@ -563,11 +771,12 @@ func.func @op_matmul_non_canonical_loops(%A: tensor<?x?xf32>, %B: tensor<?x?xf32
#map_bnc_b = affine_map<(batch, m, k, n) -> (batch, k, n)>
#map_bnc_c = affine_map<(batch, m, k, n) -> (batch, m, n)>
func.func @op_batch_matmul_non_canonical_loops(%A: tensor<2x16x8xf32>, %B: tensor<2x8x16xf32>,
- %Out: tensor<2x16x16xf32>) -> tensor<2x16x16xf32> {
+ %Out: tensor<2x16x16xf32>) -> tensor<2x16x16xf32> {
%0 = linalg.generic
- {indexing_maps = [#map_bnc_a, #map_bnc_b, #map_bnc_c],
- iterator_types = ["parallel", "parallel", "reduction", "parallel"]}
- ins(%A, %B : tensor<2x16x8xf32>, tensor<2x8x16xf32>) outs(%Out : tensor<2x16x16xf32>) {
+ {indexing_maps = [#map_bnc_a, #map_bnc_b, #map_bnc_c],
+ iterator_types = ["parallel", "parallel", "reduction", "parallel"]}
+ ins(%A, %B : tensor<2x16x8xf32>, tensor<2x8x16xf32>)
+ outs(%Out : tensor<2x16x16xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%1 = arith.mulf %in, %in_0 : f32
%2 = arith.addf %out, %1 : f32
@@ -576,10 +785,23 @@ func.func @op_batch_matmul_non_canonical_loops(%A: tensor<2x16x8xf32>, %B: tenso
return %0 : tensor<2x16x16xf32>
}
-// CHECK-LABEL: op_batch_matmul_non_canonical_loops
-// CHECK-SAME: %[[A:.+]]: tensor<2x16x8xf32>, %[[B:.+]]: tensor<2x8x16xf32>, %[[Out:.+]]: tensor<2x16x16xf32>
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.batch_matmul ins(%[[A]], %[[B]] : tensor<2x16x8xf32>, tensor<2x8x16xf32>) outs(%[[Out]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
+// CATEGORY-DAG: #[[$MAP_A:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
+// CATEGORY-DAG: #[[$MAP_B:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
+// CATEGORY-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
+
+// ALL-LABEL: op_batch_matmul_non_canonical_loops
+// ALL-SAME: %[[A:.+]]: tensor<2x16x8xf32>, %[[B:.+]]: tensor<2x8x16xf32>, %[[OUT:.+]]: tensor<2x16x16xf32>
+
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.batch_matmul
+// NAMED-SAME: ins(%[[A]], %[[B]] : tensor<2x16x8xf32>, tensor<2x8x16xf32>)
+// NAMED-SAME: outs(%[[OUT]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
+// CATEGORY-SAME: indexing_maps = [#[[$MAP_A]], #[[$MAP_B]], #[[$MAP_C]]]
+// CATEGORY-SAME: ins(%[[A]], %[[B]] : tensor<2x16x8xf32>, tensor<2x8x16xf32>)
+// CATEGORY-SAME: outs(%[[OUT]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
// -----
@@ -588,29 +810,41 @@ func.func @op_batch_matmul_non_canonical_loops(%A: tensor<2x16x8xf32>, %B: tenso
#map_nc_tb_b = affine_map<(d0, d1, d2) -> (d2, d1)>
#map_nc_tb_c = affine_map<(d0, d1, d2) -> (d0, d2)>
func.func @op_matmul_non_canonical_transpose_b(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>,
- %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.generic
- {indexing_maps = [#map_nc_tb_a, #map_nc_tb_b, #map_nc_tb_c],
- iterator_types = ["parallel", "reduction", "parallel"]}
- ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) {
- ^bb0(%in: f32, %in_0: f32, %out: f32):
- %1 = arith.mulf %in, %in_0 : f32
- %2 = arith.addf %out, %1 : f32
- linalg.yield %2 : f32
- } -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ {indexing_maps = [#map_nc_tb_a, #map_nc_tb_b, #map_nc_tb_c],
+ iterator_types = ["parallel", "reduction", "parallel"]}
+ ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) {
+ ^bb0(%in: f32, %in_0: f32, %out: f32):
+ %1 = arith.mulf %in, %in_0 : f32
+ %2 = arith.addf %out, %1 : f32
+ linalg.yield %2 : f32
+ } -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
}
-// CHECK-DAG: #[[$MAP_A:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
-// CHECK-DAG: #[[$MAP_TB:.+]] = affine_map<(d0, d1, d2) -> (d1, d2)>
-// CHECK-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
-// CHECK-LABEL: op_matmul_non_canonical_transpose_b
-// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[Out:.+]]: tensor<?x?xf32>
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.matmul
-// CHECK-SAME: indexing_maps = [#[[$MAP_A]], #[[$MAP_TB]], #[[$MAP_C]]]
-// CHECK-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
-// CHECK-SAME: outs(%[[Out]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+// NAMED-DAG: #[[$MAP_A:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
+// NAMED-DAG: #[[$MAP_TB:.+]] = affine_map<(d0, d1, d2) -> (d1, d2)>
+// NAMED-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
+
+// CATEGORY-DAG: #[[$MAP_A:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
+// CATEGORY-DAG: #[[$MAP_TB:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
+// CATEGORY-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
+
+// ALL-LABEL: op_matmul_non_canonical_transpose_b
+// ALL-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[OUT:.+]]: tensor<?x?xf32>
+
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.matmul
+// NAMED-SAME: indexing_maps = [#[[$MAP_A]], #[[$MAP_TB]], #[[$MAP_C]]]
+// NAMED-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// NAMED-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
+// CATEGORY-SAME: indexing_maps = [#[[$MAP_A]], #[[$MAP_TB]], #[[$MAP_C]]]
+// CATEGORY-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// CATEGORY-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
// -----
@@ -620,11 +854,12 @@ func.func @op_matmul_non_canonical_transpose_b(%A: tensor<?x?xf32>, %B: tensor<?
#map_bnc_tb_b = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
#map_bnc_tb_c = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
func.func @op_batch_matmul_non_canonical_transpose_b(%A: tensor<2x16x8xf32>, %B: tensor<2x16x8xf32>,
- %Out: tensor<2x16x16xf32>) -> tensor<2x16x16xf32> {
+ %Out: tensor<2x16x16xf32>) -> tensor<2x16x16xf32> {
%0 = linalg.generic
- {indexing_maps = [#map_bnc_tb_a, #map_bnc_tb_b, #map_bnc_tb_c],
- iterator_types = ["parallel", "parallel", "reduction", "parallel"]}
- ins(%A, %B : tensor<2x16x8xf32>, tensor<2x16x8xf32>) outs(%Out : tensor<2x16x16xf32>) {
+ {indexing_maps = [#map_bnc_tb_a, #map_bnc_tb_b, #map_bnc_tb_c],
+ iterator_types = ["parallel", "parallel", "reduction", "parallel"]}
+ ins(%A, %B : tensor<2x16x8xf32>, tensor<2x16x8xf32>)
+ outs(%Out : tensor<2x16x16xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%1 = arith.mulf %in, %in_0 : f32
%2 = arith.addf %out, %1 : f32
@@ -633,16 +868,28 @@ func.func @op_batch_matmul_non_canonical_transpose_b(%A: tensor<2x16x8xf32>, %B:
return %0 : tensor<2x16x16xf32>
}
-// CHECK-DAG: #[[$MAP_A:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
-// CHECK-DAG: #[[$MAP_TB:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
-// CHECK-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-// CHECK-LABEL: op_batch_matmul_non_canonical_transpose_b
-// CHECK-SAME: %[[A:.+]]: tensor<2x16x8xf32>, %[[B:.+]]: tensor<2x16x8xf32>, %[[Out:.+]]: tensor<2x16x16xf32>
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.batch_matmul
-// CHECK-SAME: indexing_maps = [#[[$MAP_A]], #[[$MAP_TB]], #[[$MAP_C]]]
-// CHECK-SAME: ins(%[[A]], %[[B]] : tensor<2x16x8xf32>, tensor<2x16x8xf32>)
-// CHECK-SAME: outs(%[[Out]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
+// NAMED-DAG: #[[$MAP_A:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
+// NAMED-DAG: #[[$MAP_TB:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
+// NAMED-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
+
+// CATEGORY-DAG: #[[$MAP_A:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
+// CATEGORY-DAG: #[[$MAP_TB:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
+// CATEGORY-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
+
+// ALL-LABEL: op_batch_matmul_non_canonical_transpose_b
+// ALL-SAME: %[[A:.+]]: tensor<2x16x8xf32>, %[[B:.+]]: tensor<2x16x8xf32>, %[[OUT:.+]]: tensor<2x16x16xf32>
+
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.batch_matmul
+// NAMED-SAME: indexing_maps = [#[[$MAP_A]], #[[$MAP_TB]], #[[$MAP_C]]]
+// NAMED-SAME: ins(%[[A]], %[[B]] : tensor<2x16x8xf32>, tensor<2x16x8xf32>)
+// NAMED-SAME: outs(%[[OUT]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
+// CATEGORY-SAME: indexing_maps = [#[[$MAP_A]], #[[$MAP_TB]], #[[$MAP_C]]]
+// CATEGORY-SAME: ins(%[[A]], %[[B]] : tensor<2x16x8xf32>, tensor<2x16x8xf32>)
+// CATEGORY-SAME: outs(%[[OUT]] : tensor<2x16x16xf32>) -> tensor<2x16x16xf32>
// -----
@@ -651,23 +898,37 @@ func.func @op_batch_matmul_non_canonical_transpose_b(%A: tensor<2x16x8xf32>, %B:
#map_fs_b = affine_map<(d0, d1, d2) -> (d0, d2)>
#map_fs_c = affine_map<(d0, d1, d2) -> (d1, d2)>
func.func @op_matmul_fully_shuffled_loops(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>,
- %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.generic
- {indexing_maps = [#map_fs_a, #map_fs_b, #map_fs_c],
- iterator_types = ["reduction", "parallel", "parallel"]}
- ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) {
- ^bb0(%in: f32, %in_0: f32, %out: f32):
- %1 = arith.mulf %in, %in_0 : f32
- %2 = arith.addf %out, %1 : f32
- linalg.yield %2 : f32
- } -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ {indexing_maps = [#map_fs_a, #map_fs_b, #map_fs_c],
+ iterator_types = ["reduction", "parallel", "parallel"]}
+ ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%Out : tensor<?x?xf32>) {
+ ^bb0(%in: f32, %in_0: f32, %out: f32):
+ %1 = arith.mulf %in, %in_0 : f32
+ %2 = arith.addf %out, %1 : f32
+ linalg.yield %2 : f32
+ } -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
}
-// CHECK-LABEL: op_matmul_fully_shuffled_loops
-// CHECK-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[Out:.+]]: tensor<?x?xf32>
-// CHECK-NOT: linalg.generic
-// CHECK: linalg.matmul ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>) outs(%[[Out]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+// CATEGORY-DAG: #[[$MAP_A:.+]] = affine_map<(d0, d1, d2) -> (d1, d0)>
+// CATEGORY-DAG: #[[$MAP_B:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
+// CATEGORY-DAG: #[[$MAP_C:.+]] = affine_map<(d0, d1, d2) -> (d1, d2)>
+
+// ALL-LABEL: op_matmul_fully_shuffled_loops
+// ALL-SAME: %[[A:.+]]: tensor<?x?xf32>, %[[B:.+]]: tensor<?x?xf32>, %[[OUT:.+]]: tensor<?x?xf32>
+
+// NAMED-NOT: linalg.generic
+// NAMED: linalg.matmul
+// NAMED-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// NAMED-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
+// CATEGORY-SAME: indexing_maps = [#[[$MAP_A]], #[[$MAP_TB]], #[[$MAP_C]]]
+// CATEGORY-SAME: ins(%[[A]], %[[B]] : tensor<?x?xf32>, tensor<?x?xf32>)
+// CATEGORY-SAME: outs(%[[OUT]] : tensor<?x?xf32>) -> tensor<?x?xf32>
// -----
@@ -675,23 +936,28 @@ func.func @op_matmul_fully_shuffled_loops(%A: tensor<?x?xf32>, %B: tensor<?x?xf3
#map_bcast_a = affine_map<(d0, d1, d2) -> (d2)>
#map_bcast_b = affine_map<(d0, d1, d2) -> (d2, d1)>
#map_bcast_c = affine_map<(d0, d1, d2) -> (d0, d1)>
-func.func @negative_matmul_broadcast_a(%A: tensor<?xf32>, %B: tensor<?x?xf32>,
- %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
+func.func @op_matmul_broadcast_a(%A: tensor<?xf32>, %B: tensor<?x?xf32>,
+ %Out: tensor<?x?xf32>) -> tensor<?x?xf32> {
%0 = linalg.generic
- {indexing_maps = [#map_bcast_a, #map_bcast_b, #map_bcast_c],
- iterator_types = ["parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<?xf32>, tensor<?x?xf32>) outs(%Out : tensor<?x?xf32>) {
- ^bb0(%in: f32, %in_0: f32, %out: f32):
- %1 = arith.mulf %in, %in_0 : f32
- %2 = arith.addf %out, %1 : f32
- linalg.yield %2 : f32
- } -> tensor<?x?xf32>
- return %0 : tensor<?x?xf32>
+ {indexing_maps = [#map_bcast_a, #map_bcast_b, #map_bcast_c],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<?xf32>, tensor<?x?xf32>)
+ outs(%Out : tensor<?x?xf32>) {
+ ^bb0(%in: f32, %in_0: f32, %out: f32):
+ %1 = arith.mulf %in, %in_0 : f32
+ %2 = arith.addf %out, %1 : f32
+ linalg.yield %2 : f32
+ } -> tensor<?x?xf32>
+ return %0 : tensor<?x?xf32>
}
-// CHECK-LABEL: negative_matmul_broadcast_a
-// CHECK: linalg.generic
-// CHECK-NOT: linalg.matmul
+// ALL-LABEL: op_matmul_broadcast_a
+
+// NAMED: linalg.generic
+// NAMED-NOT: linalg.matmul
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
// -----
@@ -699,12 +965,13 @@ func.func @negative_matmul_broadcast_a(%A: tensor<?xf32>, %B: tensor<?x?xf32>,
#map_bbcast_a = affine_map<(d0, d1, d2, d3) -> (d1, d3)>
#map_bbcast_b = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
#map_bbcast_c = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-func.func @negative_batch_matmul_broadcast_a(%A: tensor<16x8xf32>, %B: tensor<2x8x16xf32>,
- %Out: tensor<2x16x16xf32>) -> tensor<2x16x16xf32> {
+func.func @op_batch_matmul_broadcast_a(%A: tensor<16x8xf32>, %B: tensor<2x8x16xf32>,
+ %Out: tensor<2x16x16xf32>) -> tensor<2x16x16xf32> {
%0 = linalg.generic
- {indexing_maps = [#map_bbcast_a, #map_bbcast_b, #map_bbcast_c],
- iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<16x8xf32>, tensor<2x8x16xf32>) outs(%Out : tensor<2x16x16xf32>) {
+ {indexing_maps = [#map_bbcast_a, #map_bbcast_b, #map_bbcast_c],
+ iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<16x8xf32>, tensor<2x8x16xf32>)
+ outs(%Out : tensor<2x16x16xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%1 = arith.mulf %in, %in_0 : f32
%2 = arith.addf %out, %1 : f32
@@ -713,9 +980,13 @@ func.func @negative_batch_matmul_broadcast_a(%A: tensor<16x8xf32>, %B: tensor<2x
return %0 : tensor<2x16x16xf32>
}
-// CHECK-LABEL: negative_batch_matmul_broadcast_a
-// CHECK: linalg.generic
-// CHECK-NOT: linalg.batch_matmul
+// ALL-LABEL: op_batch_matmul_broadcast_a
+
+// NAMED: linalg.generic
+// NAMED-NOT: linalg.batch_matmul
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
// -----
@@ -723,12 +994,13 @@ func.func @negative_batch_matmul_broadcast_a(%A: tensor<16x8xf32>, %B: tensor<2x
#map_bbcast2_a = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
#map_bbcast2_b = affine_map<(d0, d1, d2, d3) -> (d3)>
#map_bbcast2_c = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
-func.func @negative_batch_matmul_broadcast_b(%A: tensor<2x16x8xf32>, %B: tensor<8xf32>,
- %Out: tensor<2x16x16xf32>) -> tensor<2x16x16xf32> {
+func.func @op_batch_matmul_broadcast_b(%A: tensor<2x16x8xf32>, %B: tensor<8xf32>,
+ %Out: tensor<2x16x16xf32>) -> tensor<2x16x16xf32> {
%0 = linalg.generic
- {indexing_maps = [#map_bbcast2_a, #map_bbcast2_b, #map_bbcast2_c],
- iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
- ins(%A, %B : tensor<2x16x8xf32>, tensor<8xf32>) outs(%Out : tensor<2x16x16xf32>) {
+ {indexing_maps = [#map_bbcast2_a, #map_bbcast2_b, #map_bbcast2_c],
+ iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
+ ins(%A, %B : tensor<2x16x8xf32>, tensor<8xf32>)
+ outs(%Out : tensor<2x16x16xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%1 = arith.mulf %in, %in_0 : f32
%2 = arith.addf %out, %1 : f32
@@ -737,6 +1009,10 @@ func.func @negative_batch_matmul_broadcast_b(%A: tensor<2x16x8xf32>, %B: tensor<
return %0 : tensor<2x16x16xf32>
}
-// CHECK-LABEL: negative_batch_matmul_broadcast_b
-// CHECK: linalg.generic
-// CHECK-NOT: linalg.batch_matmul
+// ALL-LABEL: op_batch_matmul_broadcast_b
+
+// NAMED: linalg.generic
+// NAMED-NOT: linalg.batch_matmul
+
+// CATEGORY-NOT: linalg.generic
+// CATEGORY: linalg.contract
More information about the Mlir-commits
mailing list