[llvm-branch-commits] [mlir] [mlir][transform] Drop redundant padding_dimensions spec from pad_tiling_interface (PR #145257)
Nicolas Vasilache via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Jun 22 21:07:38 PDT 2025
https://github.com/nicolasvasilache updated https://github.com/llvm/llvm-project/pull/145257
>From 2f1558ae8c1c90a6091dbc821fd5438f5136b8ae Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache <nico.vasilache at amd.com>
Date: Wed, 18 Jun 2025 19:14:31 +0200
Subject: [PATCH 1/2] [mlir][transform] Plumb a simplified form of AffineMin
folding into transform.pad-tiling-interface
This revision introduces a simple variant of AffineMin folding in makeComposedFoldedAffineApply
and makes use of it in transform.pad-tiling-interface.
Since this version explicitly call ValueBoundsInterface, it may be too expensive and is
only activate behind a flag.
It results in better foldings when mixing tiling and padding, including with dynamic shapes.
This should be further composed with #145068 to provide full simplification and address
the remaining TODO in the test.
---
.../mlir/Dialect/Affine/IR/AffineOps.h | 18 ++-
.../mlir/Interfaces/ValueBoundsOpInterface.h | 2 +-
mlir/lib/Dialect/Affine/IR/AffineOps.cpp | 134 ++++++++++++++----
.../Linalg/Transforms/PadTilingInterface.cpp | 5 +-
.../lib/Interfaces/ValueBoundsOpInterface.cpp | 2 +-
...m-op-pad-tiling-interface-multiple-of.mlir | 131 +++++++++++++++++
6 files changed, 251 insertions(+), 41 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
index 6fdb72c370e6d..2091faa6b0b02 100644
--- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
+++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
@@ -410,9 +410,11 @@ void canonicalizeSetAndOperands(IntegerSet *set,
/// other AffineApplyOps supplying those operands. The operands of the resulting
/// AffineApplyOp do not change the length of AffineApplyOp chains.
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map,
- ArrayRef<OpFoldResult> operands);
+ ArrayRef<OpFoldResult> operands,
+ bool composeAffineMin = false);
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineExpr e,
- ArrayRef<OpFoldResult> operands);
+ ArrayRef<OpFoldResult> operands,
+ bool composeAffineMin = false);
/// Constructs an AffineApplyOp that applies `map` to `operands` after composing
/// the map with the maps of any other AffineApplyOp supplying the operands,
@@ -421,16 +423,19 @@ AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineExpr e,
/// map.
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc,
AffineMap map,
- ArrayRef<OpFoldResult> operands);
+ ArrayRef<OpFoldResult> operands,
+ bool composeAffineMin = false);
/// Variant of `makeComposedFoldedAffineApply` that applies to an expression.
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc,
AffineExpr expr,
- ArrayRef<OpFoldResult> operands);
+ ArrayRef<OpFoldResult> operands,
+ bool composeAffineMin = false);
/// Variant of `makeComposedFoldedAffineApply` suitable for multi-result maps.
/// Note that this may create as many affine.apply operations as the map has
/// results given that affine.apply must be single-result.
SmallVector<OpFoldResult> makeComposedFoldedMultiResultAffineApply(
- OpBuilder &b, Location loc, AffineMap map, ArrayRef<OpFoldResult> operands);
+ OpBuilder &b, Location loc, AffineMap map, ArrayRef<OpFoldResult> operands,
+ bool composeAffineMin = false);
/// Returns an AffineMinOp obtained by composing `map` and `operands` with
/// AffineApplyOps supplying those operands.
@@ -459,7 +464,8 @@ OpFoldResult makeComposedFoldedAffineMax(OpBuilder &b, Location loc,
/// terminal symbol, i.e., a symbol defined at the top level or a block/function
/// argument.
void fullyComposeAffineMapAndOperands(AffineMap *map,
- SmallVectorImpl<Value> *operands);
+ SmallVectorImpl<Value> *operands,
+ bool composeAffineMin = false);
} // namespace affine
} // namespace mlir
diff --git a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h
index 337314143c80c..523df173093fa 100644
--- a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h
+++ b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h
@@ -135,7 +135,7 @@ class ValueBoundsConstraintSet
/// Construct a variable for a map and its operands.
Variable(AffineMap map, ArrayRef<Variable> mapOperands);
- Variable(AffineMap map, ArrayRef<Value> mapOperands);
+ Variable(AffineMap map, ValueRange mapOperands);
MLIRContext *getContext() const { return map.getContext(); }
diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
index 3d09c6a9b2c24..06b7910736727 100644
--- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@@ -11,12 +11,14 @@
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/UB/IR/UBOps.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
+#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineExprVisitor.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/IR/IntegerSet.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/Value.h"
#include "mlir/Interfaces/ShapedOpInterfaces.h"
#include "mlir/Interfaces/ValueBoundsOpInterface.h"
#include "mlir/Transforms/InliningUtils.h"
@@ -26,7 +28,9 @@
#include "llvm/ADT/SmallVectorExtras.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/LogicalResult.h"
#include "llvm/Support/MathExtras.h"
+#include <limits>
#include <numeric>
#include <optional>
@@ -1042,6 +1046,59 @@ simplifyMapWithOperands(AffineMap &map, ArrayRef<Value> operands) {
map.getContext());
}
+/// Assuming `dimOrSym` is a quantity in `map` that is defined by `minOp`,
+/// replaces the patterns:
+/// ```
+/// dimOrSym.ceildiv(cst) * cst
+/// (dimOrSym + cst - 1).floordiv(cst) * cst
+/// ```
+/// by `cst` in `map`.
+/// This simplification is valid iff `minOp` is guaranteed to be nonnegative.
+/// Additionally, allows the caller to pass `affineMinKnownToBeNonNegative` to
+/// inject static information that may not be statically discoverable.
+/// Warning: ValueBoundsConstraintSet::computeConstantBound is needed to check
+/// for the nonnegative case, if `affineMinKnownToBeNonNegative` is false.
+static LogicalResult replaceAffineMinBoundingBoxExpression(
+ AffineMinOp minOp, AffineExpr dimOrSym, AffineMap *map,
+ bool affineMinKnownToBeNonNegative = false) {
+ auto affineMinMap = minOp.getAffineMap();
+ if (!affineMinKnownToBeNonNegative) {
+ ValueRange values = minOp->getOperands();
+ for (unsigned i = 0, e = affineMinMap.getNumResults(); i < e; ++i) {
+ AffineMap row = affineMinMap.getSubMap(ArrayRef<unsigned>{i});
+ FailureOr<int64_t> lowerBound =
+ ValueBoundsConstraintSet::computeConstantBound(
+ presburger::BoundType::LB, {row, values},
+ /*stopCondition=*/nullptr,
+ /*closedUB=*/true);
+ if (failed(lowerBound) || lowerBound.value() < 0)
+ return failure();
+ }
+ }
+
+ AffineMap initialMap = *map;
+ for (unsigned i = 0, e = affineMinMap.getNumResults(); i != e; ++i) {
+ auto m = affineMinMap.getSubMap(ArrayRef<unsigned>{i});
+ // TODO: this should also work with nonnegative symbolic divisors.
+ if (!m.isSingleConstant())
+ continue;
+
+ auto cst = m.getSingleConstantResult();
+ DenseMap<AffineExpr, AffineExpr> repl;
+ // dimOrSym.ceilDiv(cst) * cst -> cst
+ repl[dimOrSym.ceilDiv(cst) * cst] =
+ getAffineConstantExpr(cst, minOp.getContext());
+ // (dimOrSym + cst - 1).floorDiv(cst) * cst -> cst
+ repl[(dimOrSym + cst - 1).floorDiv(cst) * cst] =
+ getAffineConstantExpr(cst, minOp.getContext());
+ auto newMap = map->replace(repl);
+ if (newMap == *map)
+ continue;
+ *map = newMap;
+ }
+ return success(*map != initialMap);
+}
+
/// Replace all occurrences of AffineExpr at position `pos` in `map` by the
/// defining AffineApplyOp expression and operands.
/// When `dimOrSymbolPosition < dims.size()`, AffineDimExpr@[pos] is replaced.
@@ -1052,10 +1109,13 @@ simplifyMapWithOperands(AffineMap &map, ArrayRef<Value> operands) {
/// 2. `map` dim and symbols are gradually shifted to higher positions.
/// 3. Old `dim` and `sym` entries are replaced by nullptr
/// This avoids the need for any bookkeeping.
+/// If `replaceAffineMin` is set to true, additionally triggers more expensive
+/// replacements involving affine_min operations.
static LogicalResult replaceDimOrSym(AffineMap *map,
unsigned dimOrSymbolPosition,
SmallVectorImpl<Value> &dims,
- SmallVectorImpl<Value> &syms) {
+ SmallVectorImpl<Value> &syms,
+ bool replaceAffineMin) {
MLIRContext *ctx = map->getContext();
bool isDimReplacement = (dimOrSymbolPosition < dims.size());
unsigned pos = isDimReplacement ? dimOrSymbolPosition
@@ -1064,6 +1124,13 @@ static LogicalResult replaceDimOrSym(AffineMap *map,
if (!v)
return failure();
+ auto minOp = v.getDefiningOp<AffineMinOp>();
+ if (minOp && replaceAffineMin) {
+ AffineExpr dimOrSym = isDimReplacement ? getAffineDimExpr(pos, ctx)
+ : getAffineSymbolExpr(pos, ctx);
+ return replaceAffineMinBoundingBoxExpression(minOp, dimOrSym, map);
+ }
+
auto affineApply = v.getDefiningOp<AffineApplyOp>();
if (!affineApply)
return failure();
@@ -1101,7 +1168,8 @@ static LogicalResult replaceDimOrSym(AffineMap *map,
/// iteratively. Perform canonicalization of map and operands as well as
/// AffineMap simplification. `map` and `operands` are mutated in place.
static void composeAffineMapAndOperands(AffineMap *map,
- SmallVectorImpl<Value> *operands) {
+ SmallVectorImpl<Value> *operands,
+ bool composeAffineMin = false) {
if (map->getNumResults() == 0) {
canonicalizeMapAndOperands(map, operands);
*map = simplifyAffineMap(*map);
@@ -1122,7 +1190,8 @@ static void composeAffineMapAndOperands(AffineMap *map,
while (true) {
bool changed = false;
for (unsigned pos = 0; pos != dims.size() + syms.size(); ++pos)
- if ((changed |= succeeded(replaceDimOrSym(map, pos, dims, syms))))
+ if ((changed |=
+ succeeded(replaceDimOrSym(map, pos, dims, syms, composeAffineMin))))
break;
if (!changed)
break;
@@ -1163,38 +1232,41 @@ static void composeAffineMapAndOperands(AffineMap *map,
}
void mlir::affine::fullyComposeAffineMapAndOperands(
- AffineMap *map, SmallVectorImpl<Value> *operands) {
+ AffineMap *map, SmallVectorImpl<Value> *operands, bool composeAffineMin) {
while (llvm::any_of(*operands, [](Value v) {
return isa_and_nonnull<AffineApplyOp>(v.getDefiningOp());
})) {
- composeAffineMapAndOperands(map, operands);
+ composeAffineMapAndOperands(map, operands, composeAffineMin);
}
}
AffineApplyOp
mlir::affine::makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map,
- ArrayRef<OpFoldResult> operands) {
+ ArrayRef<OpFoldResult> operands,
+ bool composeAffineMin) {
SmallVector<Value> valueOperands;
map = foldAttributesIntoMap(b, map, operands, valueOperands);
- composeAffineMapAndOperands(&map, &valueOperands);
+ composeAffineMapAndOperands(&map, &valueOperands, composeAffineMin);
assert(map);
return b.create<AffineApplyOp>(loc, map, valueOperands);
}
AffineApplyOp
mlir::affine::makeComposedAffineApply(OpBuilder &b, Location loc, AffineExpr e,
- ArrayRef<OpFoldResult> operands) {
+ ArrayRef<OpFoldResult> operands,
+ bool composeAffineMin) {
return makeComposedAffineApply(
b, loc,
AffineMap::inferFromExprList(ArrayRef<AffineExpr>{e}, b.getContext())
.front(),
- operands);
+ operands, composeAffineMin);
}
/// Composes the given affine map with the given list of operands, pulling in
/// the maps from any affine.apply operations that supply the operands.
static void composeMultiResultAffineMap(AffineMap &map,
- SmallVectorImpl<Value> &operands) {
+ SmallVectorImpl<Value> &operands,
+ bool composeAffineMin = false) {
// Compose and canonicalize each expression in the map individually because
// composition only applies to single-result maps, collecting potentially
// duplicate operands in a single list with shifted dimensions and symbols.
@@ -1203,7 +1275,8 @@ static void composeMultiResultAffineMap(AffineMap &map,
for (unsigned i : llvm::seq<unsigned>(0, map.getNumResults())) {
SmallVector<Value> submapOperands(operands.begin(), operands.end());
AffineMap submap = map.getSubMap({i});
- fullyComposeAffineMapAndOperands(&submap, &submapOperands);
+ fullyComposeAffineMapAndOperands(&submap, &submapOperands,
+ composeAffineMin);
canonicalizeMapAndOperands(&submap, &submapOperands);
unsigned numNewDims = submap.getNumDims();
submap = submap.shiftDims(dims.size()).shiftSymbols(symbols.size());
@@ -1221,10 +1294,9 @@ static void composeMultiResultAffineMap(AffineMap &map,
canonicalizeMapAndOperands(&map, &operands);
}
-OpFoldResult
-mlir::affine::makeComposedFoldedAffineApply(OpBuilder &b, Location loc,
- AffineMap map,
- ArrayRef<OpFoldResult> operands) {
+OpFoldResult mlir::affine::makeComposedFoldedAffineApply(
+ OpBuilder &b, Location loc, AffineMap map, ArrayRef<OpFoldResult> operands,
+ bool composeAffineMin) {
assert(map.getNumResults() == 1 && "building affine.apply with !=1 result");
// Create new builder without a listener, so that no notification is
@@ -1236,7 +1308,7 @@ mlir::affine::makeComposedFoldedAffineApply(OpBuilder &b, Location loc,
// Create op.
AffineApplyOp applyOp =
- makeComposedAffineApply(newBuilder, loc, map, operands);
+ makeComposedAffineApply(newBuilder, loc, map, operands, composeAffineMin);
// Get constant operands.
SmallVector<Attribute> constOperands(applyOp->getNumOperands());
@@ -1256,26 +1328,25 @@ mlir::affine::makeComposedFoldedAffineApply(OpBuilder &b, Location loc,
return llvm::getSingleElement(foldResults);
}
-OpFoldResult
-mlir::affine::makeComposedFoldedAffineApply(OpBuilder &b, Location loc,
- AffineExpr expr,
- ArrayRef<OpFoldResult> operands) {
+OpFoldResult mlir::affine::makeComposedFoldedAffineApply(
+ OpBuilder &b, Location loc, AffineExpr expr,
+ ArrayRef<OpFoldResult> operands, bool composeAffineMin) {
return makeComposedFoldedAffineApply(
b, loc,
AffineMap::inferFromExprList(ArrayRef<AffineExpr>{expr}, b.getContext())
.front(),
- operands);
+ operands, composeAffineMin);
}
SmallVector<OpFoldResult>
mlir::affine::makeComposedFoldedMultiResultAffineApply(
- OpBuilder &b, Location loc, AffineMap map,
- ArrayRef<OpFoldResult> operands) {
- return llvm::map_to_vector(llvm::seq<unsigned>(0, map.getNumResults()),
- [&](unsigned i) {
- return makeComposedFoldedAffineApply(
- b, loc, map.getSubMap({i}), operands);
- });
+ OpBuilder &b, Location loc, AffineMap map, ArrayRef<OpFoldResult> operands,
+ bool composeAffineMin) {
+ return llvm::map_to_vector(
+ llvm::seq<unsigned>(0, map.getNumResults()), [&](unsigned i) {
+ return makeComposedFoldedAffineApply(b, loc, map.getSubMap({i}),
+ operands, composeAffineMin);
+ });
}
template <typename OpTy>
@@ -3024,7 +3095,8 @@ void AffineIfOp::build(OpBuilder &builder, OperationState &result,
/// `set` by composing the maps of such affine.apply ops with the integer
/// set constraints.
static void composeSetAndOperands(IntegerSet &set,
- SmallVectorImpl<Value> &operands) {
+ SmallVectorImpl<Value> &operands,
+ bool composeAffineMin) {
// We will simply reuse the API of the map composition by viewing the LHSs of
// the equalities and inequalities of `set` as the affine exprs of an affine
// map. Convert to equivalent map, compose, and convert back to set.
@@ -3035,7 +3107,7 @@ static void composeSetAndOperands(IntegerSet &set,
[](Value v) { return v.getDefiningOp<AffineApplyOp>(); }))
return;
- composeAffineMapAndOperands(&map, &operands);
+ composeAffineMapAndOperands(&map, &operands, composeAffineMin);
set = IntegerSet::get(map.getNumDims(), map.getNumSymbols(), map.getResults(),
set.getEqFlags());
}
@@ -3044,7 +3116,7 @@ static void composeSetAndOperands(IntegerSet &set,
LogicalResult AffineIfOp::fold(FoldAdaptor, SmallVectorImpl<OpFoldResult> &) {
auto set = getIntegerSet();
SmallVector<Value, 4> operands(getOperands());
- composeSetAndOperands(set, operands);
+ composeSetAndOperands(set, operands, /*composeAffineMin=*/false);
canonicalizeSetAndOperands(&set, &operands);
// Check if the canonicalization or composition led to any change.
diff --git a/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp b/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp
index 5383ae48aeb3a..42dac0776bace 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp
@@ -84,7 +84,7 @@ SmallVector<OpFoldResult> linalg::computePaddedShape(
getDimsToSize(rewriter, indexingSizes, options);
// For each dimension in the operand's shape, iterate over indexingSizes and
- // add
+ // add the various term contributions.
for (const auto &enResults : enumerate(indexingMap.getResults())) {
int64_t resultIndex = enResults.index();
AffineMap partialIndexingMap = indexingMap.getSubMap(
@@ -122,7 +122,8 @@ SmallVector<OpFoldResult> linalg::computePaddedShape(
AffineMap composedMap = projectedMap.compose(ceilMap);
OpFoldResult paddingDimOfr = affine::makeComposedFoldedAffineApply(
rewriter, loc, composedMap,
- {indexingSizes[paddingDim], paddingSize});
+ {indexingSizes[paddingDim], paddingSize},
+ /*composeAffineMin=*/true);
terms.push_back(paddingDimOfr);
} else {
// Otherwise just set to paddingSize.
diff --git a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp
index 87f883c2e6485..d858ab3a6406a 100644
--- a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp
+++ b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp
@@ -146,7 +146,7 @@ ValueBoundsConstraintSet::Variable::Variable(AffineMap map,
}
ValueBoundsConstraintSet::Variable::Variable(AffineMap map,
- ArrayRef<Value> mapOperands)
+ ValueRange mapOperands)
: Variable(map, llvm::map_to_vector(mapOperands,
[](Value v) { return Variable(v); })) {}
diff --git a/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface-multiple-of.mlir b/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface-multiple-of.mlir
index 5ac35c14be3fb..4fcbcbb2a18e3 100644
--- a/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface-multiple-of.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface-multiple-of.mlir
@@ -136,3 +136,134 @@ module {
}
}
}
+
+// -----
+
+// CHECK-DAG: #[[$MAP0:.*]] = affine_map<()[s0, s1] -> (-s1 + (s0 ceildiv 16) * 16)>
+// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 16)>
+
+// CHECK-LABEL: pad_lhs
+func.func @pad_lhs(
+ %arg0: tensor<24x?xf32>, %arg1: tensor<?x25xf32>, %arg2: tensor<24x25xf32>)
+ -> tensor<24x25xf32>
+{
+ // CHECK: %[[D0_0:.*]] = tensor.dim
+ // CHECK: %[[D0_1:.*]] = tensor.dim
+ // CHECK: %[[H0:.*]] = affine.apply #[[$MAP0]]()[%[[D0_0]], %[[D0_1]]]
+ // CHECK: tensor.pad %{{.*}} low[0, 0] high[0, %[[H0]]]
+ // CHECK: : tensor<24x?xf32> to tensor<24x?xf32>
+
+ // CHECK: %[[D0_2:.*]] = tensor.dim
+ // CHECK: %[[H1:.*]] = affine.apply #[[$MAP0]]()[%[[D0_0]], %[[D0_2]]]
+ // CHECK: tensor.pad %{{.*}} low[0, 0] high[%[[H1]], 0]
+ // CHECK: : tensor<?x25xf32> to tensor<?x25xf32>
+ // CHECK: scf.for %{{.*}} -> (tensor<24x25xf32>)
+
+ // TODO: Not yet simplified enough..
+ // CHECK: linalg.matmul ins(%{{.*}}, %{{.*}}: tensor<8x?xf32>, tensor<?x25xf32>) outs(%extracted_slice_5 : tensor<8x25xf32>) -> tensor<8x25xf32>
+
+ // CHECK: tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, 0] [8, 25] [1, 1]
+ // CHECK-SAME: : tensor<8x25xf32> into tensor<24x25xf32>
+ %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x?xf32>, tensor<?x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
+ func.return %0 : tensor<24x25xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %matmul = transform.structured.match ops{["linalg.matmul"]} in %arg1
+ : (!transform.any_op) -> !transform.any_op
+
+ // Pad then tile should produce static shapes.
+ %matmul_padded, %_ = transform.structured.pad_tiling_interface %matmul to padding_sizes [8, 16] pad_to_multiple_of {
+ padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32],
+ padding_dimensions=[0, 2]
+ } : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+
+ %m, %l0, %l1 = transform.structured.tile_using_for %matmul_padded tile_sizes [8, 0, 16]
+ : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 16)>
+// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0) -> (-d0 + 16)>
+
+// CHECK-LABEL: pad_lhs
+func.func @pad_lhs(
+ %arg0: tensor<24x?xf32>, %arg1: tensor<?x25xf32>, %arg2: tensor<24x25xf32>)
+ -> tensor<24x25xf32>
+{
+ // CHECK: scf.for %{{.*}} -> (tensor<24x25xf32>)
+ // CHECK: %[[MIN:.*]] = affine.min #[[$MAP0]](%{{.*}})
+ // CHECK: %[[H0:.*]] = affine.apply #[[$MAP1]](%[[MIN]])
+ // CHECK: tensor.pad %{{.*}} low[0, 0] high[0, %[[H0]]]
+ // CHECK: : tensor<8x?xf32> to tensor<8x16xf32>
+
+ // CHECK: %[[H1:.*]] = affine.apply #[[$MAP1]](%[[MIN]])
+ // CHECK: tensor.pad %{{.*}} low[0, 0] high[%[[H1]], 0]
+ // CHECK: : tensor<?x25xf32> to tensor<16x25xf32>
+
+ // CHECK: linalg.matmul ins(%{{.*}}, %{{.*}} : tensor<8x16xf32>, tensor<16x25xf32>) outs(%{{.*}} : tensor<8x25xf32>) -> tensor<8x25xf32>
+
+ // CHECK: tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, 0] [8, 25] [1, 1]
+ // CHECK-SAME: : tensor<8x25xf32> into tensor<24x25xf32>
+ %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x?xf32>, tensor<?x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
+ func.return %0 : tensor<24x25xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %matmul = transform.structured.match ops{["linalg.matmul"]} in %arg1
+ : (!transform.any_op) -> !transform.any_op
+
+ // Tile then pad should produce static shapes.
+ %m, %l0, %l1 = transform.structured.tile_using_for %matmul tile_sizes [8, 0, 16]
+ : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+
+ %matmul_padded, %_ = transform.structured.pad_tiling_interface %m to padding_sizes [8, 16] pad_to_multiple_of {
+ padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32],
+ padding_dimensions=[0, 2]
+ } : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0) -> (-d0 + 20, 8)>
+// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 16)>
+// CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0) -> (-d0 + 8)>
+// CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0) -> (-d0 + 16)>
+
+// CHECK-LABEL: pad_lhs
+func.func @pad_lhs(
+ %arg0: tensor<20x?xf32>, %arg1: tensor<?x25xf32>, %arg2: tensor<20x25xf32>)
+ -> tensor<20x25xf32>
+{
+ // CHECK: linalg.matmul ins(%{{.*}}, %{{.*}} : tensor<8x16xf32>, tensor<16x25xf32>) outs(%{{.*}} : tensor<8x25xf32>) -> tensor<8x25xf32>
+ %0 = linalg.matmul ins(%arg0, %arg1 : tensor<20x?xf32>, tensor<?x25xf32>) outs(%arg2 : tensor<20x25xf32>) -> tensor<20x25xf32>
+ func.return %0 : tensor<20x25xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %matmul = transform.structured.match ops{["linalg.matmul"]} in %arg1
+ : (!transform.any_op) -> !transform.any_op
+
+ // Tile then pad should produce static shapes.
+ %m, %l0, %l1 = transform.structured.tile_using_for %matmul tile_sizes [8, 0, 16]
+ : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+
+ %matmul_padded, %_ = transform.structured.pad_tiling_interface %m to padding_sizes [8, 16] pad_to_multiple_of {
+ padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32],
+ padding_dimensions=[0, 2]
+ } : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+
+ transform.yield
+ }
+}
+
>From 512be3adf3bda6baeb41da93a94fd235c1eb39b9 Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache <nico.vasilache at amd.com>
Date: Sun, 22 Jun 2025 14:24:48 +0200
Subject: [PATCH 2/2] [mlir][transform] Drop redundant padding_dimensions spec
from pad_tiling_interface
This revision aligns padding specification in pad_tiling_interface to that of tiling specification.
Dimensions that should be skipped are specified by "padding by 0".
Trailing dimensions that are ignored are automatically completed to "pad to 0".
---
.../Linalg/TransformOps/LinalgTransformOps.td | 25 +++++++----
.../TransformOps/LinalgTransformOps.cpp | 21 +--------
.../Linalg/Transforms/PadTilingInterface.cpp | 45 +++++++++----------
...m-op-pad-tiling-interface-multiple-of.mlir | 28 +++++-------
.../transform-op-pad-tiling-interface.mlir | 12 ++---
5 files changed, 54 insertions(+), 77 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index cf3f2b70580da..c5650470fdc8d 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -1195,17 +1195,29 @@ def PadTilingInterfaceOp : Op<Transform_Dialect, "structured.pad_tiling_interfac
TransformOpInterface,
ReportTrackingListenerFailuresOpTrait]> {
let description = [{
- Pads the operations pointed to by the target handle using the options
- provided as operation attributes. The operation returns a handle to the
- padded operation and to the padding operation ("tensor.pad").
+ Pads the **iteration domain** of the operations pointed to by the target
+ handle using the options provided as operation attributes. Padding the
+ iteration domain induces a padding of the operands that is consistent
+ across the op semantics and, unlike for simple elementwise ops, may not be
+ trivially deducible or specifiable on operands only (e.g. convolutions).
+
+ The specification of `padding_sizes` follows that of `tile_sizes` during
+ tiling: the value "0" on a particular iterator encode "no padding". Like in
+ the case of tiling, an automatic completion by 0 to the operation rank
+ occurs.
+
+ This transformation returns a handle to the padded operation and to the
+ padding operation ("tensor.pad").
TODO: in the future this should be moved out of a specific Linalg
implementation file and into a more general "Structured" file.
#### Return modes
- This operation ignores non-Linalg ops and drops them in the return.
- In the future, this operation will support all TilingInterfaceOps.
+ This operation ignores non-IndexingMapOpInterface ops and drops them in the
+ return. In the future, this operation will support all TilingInterfaceOps
+ for which the contract between iteration domain and operands can be
+ reified.
This operation may produce a definite failure if the padding fails for any
reason.
@@ -1219,7 +1231,6 @@ def PadTilingInterfaceOp : Op<Transform_Dialect, "structured.pad_tiling_interfac
let arguments =
(ins TransformHandleTypeInterface:$target,
DefaultValuedAttr<ArrayAttr, "{}">:$padding_values,
- DefaultValuedAttr<I64ArrayAttr, "{}">:$padding_dimensions,
Variadic<TransformAnyParamTypeOrAnyHandle>:$padding_sizes,
DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:
$static_padding_sizes,
@@ -1245,11 +1256,9 @@ def PadTilingInterfaceOp : Op<Transform_Dialect, "structured.pad_tiling_interfac
// add/mul ring at the moment.
// TODO: support other operations (e.g. min, max etc).
OpBuilder<(ins "Value":$target,
- "ArrayRef<int64_t>":$paddingDimensions,
CArg<"ArrayRef<int64_t>", "{}">:$staticPaddingSizes,
CArg<"bool", "false">:$padToMultipleOf)>,
OpBuilder<(ins "Value":$target,
- "ArrayRef<int64_t>":$paddingDimensions,
"ArrayRef<OpFoldResult>":$mixedPadPaddingSizes,
CArg<"bool", "false">:$usePrescribedTensorShapes)>
];
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 5d55adbf46f36..d9a0ba02f4fe4 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -2163,7 +2163,6 @@ LogicalResult transform::PadOp::verify() {
void transform::PadTilingInterfaceOp::build(OpBuilder &b,
OperationState &result,
Value target,
- ArrayRef<int64_t> paddingDimensions,
ArrayRef<int64_t> paddingSizes,
bool padToMultipleOf) {
auto resultType = transform::AnyOpType::get(b.getContext());
@@ -2172,7 +2171,6 @@ void transform::PadTilingInterfaceOp::build(OpBuilder &b,
/*types=*/TypeRange{resultType, resultType},
/*target=*/target,
/*paddingValues=*/ArrayAttr(), // let inference handle this
- /*paddingDimensions=*/b.getI64ArrayAttr(paddingDimensions),
/*paddingSizes=*/ValueRange{},
/*paddingSizes=*/
(paddingSizes.empty() ? DenseI64ArrayAttr()
@@ -2183,7 +2181,6 @@ void transform::PadTilingInterfaceOp::build(OpBuilder &b,
void transform::PadTilingInterfaceOp::build(
OpBuilder &b, OperationState &result, Value target,
- ArrayRef<int64_t> paddingDimensions,
ArrayRef<OpFoldResult> mixedPaddingSizes, bool padToMultipleOf) {
auto resultType = transform::AnyOpType::get(b.getContext());
SmallVector<int64_t> staticPaddingSizes;
@@ -2195,7 +2192,6 @@ void transform::PadTilingInterfaceOp::build(
/*types=*/TypeRange{resultType, resultType},
/*target=*/target,
/*paddingValues=*/ArrayAttr(), // let inference handle this
- /*paddingDimensions=*/b.getI64ArrayAttr(paddingDimensions),
/*paddingSizes=*/dynamicPaddingSizes,
/*paddingSizes=*/staticPaddingSizes,
/*usePrescribedTensorShapes=*/padToMultipleOf);
@@ -2277,8 +2273,6 @@ transform::PadTilingInterfaceOp::apply(transform::TransformRewriter &rewriter,
TilingInterface paddedOp;
PadTilingInterfaceOptions options;
options.setPaddingValues(paddingValues)
- .setPaddingDimensions(
- extractFromIntegerArrayAttr<int64_t>(getPaddingDimensions()))
.setPaddingSizes(getMixedPaddingSizes())
.setPadToMultipleOf(getPadToMultipleOf());
@@ -2303,20 +2297,7 @@ transform::PadTilingInterfaceOp::apply(transform::TransformRewriter &rewriter,
return DiagnosedSilenceableFailure::success();
}
-LogicalResult transform::PadTilingInterfaceOp::verify() {
- SmallVector<int64_t> paddingDimensions =
- extractFromIntegerArrayAttr<int64_t>(getPaddingDimensions());
- if (any_of(paddingDimensions,
- [](int64_t paddingDimension) { return paddingDimension < 0; })) {
- return emitOpError() << "expects padding_dimensions to contain positive "
- "integers, found "
- << getPaddingDimensions();
- }
- if (getMixedPaddingSizes().size() != paddingDimensions.size()) {
- return emitOpError() << "expects as many multiples as padding_dimensions";
- }
- return success();
-}
+LogicalResult transform::PadTilingInterfaceOp::verify() { return success(); }
//===---------------------------------------------------------------------===//
// HoistPadOp
diff --git a/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp b/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp
index 42dac0776bace..eda3373b4d639 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp
@@ -32,29 +32,27 @@ using namespace mlir::tensor;
#define DBGSNL() (llvm::dbgs() << "\n")
/// Form a "full-rank" padding specification so that the application is easy.
-static llvm::SmallDenseMap<int64_t, OpFoldResult>
-getDimsToSize(Builder &b, ArrayRef<OpFoldResult> indexingSizes,
- const PadTilingInterfaceOptions &options) {
- llvm::SmallDenseMap<int64_t, OpFoldResult> dimsToSize;
- for (const auto &[paddingDim, paddingSize] :
- llvm::zip_equal(options.paddingDimensions, options.paddingSizes)) {
- dimsToSize[paddingDim] = paddingSize;
- }
+static SmallVector<OpFoldResult>
+getFullRankPaddingSizes(Builder &b, ArrayRef<OpFoldResult> indexingSizes,
+ const PadTilingInterfaceOptions &options) {
+ SmallVector<OpFoldResult> paddingSizes;
// Complete the padding specification to specify all dimensions.
- for (int64_t idx = 0, e = indexingSizes.size(); idx != e; ++idx) {
- if (dimsToSize.find(idx) != dimsToSize.end())
- continue;
- // If a dimension is not specified, either complete with:
+ for (size_t idx = 0, e = indexingSizes.size(); idx != e; ++idx) {
+ // Complete to zero if needed.
+ paddingSizes.push_back(options.paddingSizes.size() > idx
+ ? options.paddingSizes[idx]
+ : b.getIndexAttr(0));
+ // If a dimension is zero (either specified or completed), replace by:
// - 1 if we are padding to the next multiple of.
// - indexingSizes[idx] otherwise
- dimsToSize[idx] =
- options.padToMultipleOf ? b.getIndexAttr(1) : indexingSizes[idx];
- }
- for (int64_t idx = 0, e = indexingSizes.size(); idx != e; ++idx) {
- LLVM_DEBUG(DBGS() << "----idx: " << idx << " : " << dimsToSize[idx]
+ if (isZeroInteger(paddingSizes[idx])) {
+ paddingSizes[idx] =
+ options.padToMultipleOf ? b.getIndexAttr(1) : indexingSizes[idx];
+ }
+ LLVM_DEBUG(DBGS() << "----idx: " << idx << " : " << paddingSizes[idx]
<< "\n");
}
- return dimsToSize;
+ return paddingSizes;
}
/// Compute the padded shape of the given value `v` of `RankedTensorType` given
@@ -80,8 +78,8 @@ SmallVector<OpFoldResult> linalg::computePaddedShape(
"rank");
// "Full-rank" padding specification.
- llvm::SmallDenseMap<int64_t, OpFoldResult> dimsToSize =
- getDimsToSize(rewriter, indexingSizes, options);
+ SmallVector<OpFoldResult> paddingSizes =
+ getFullRankPaddingSizes(rewriter, indexingSizes, options);
// For each dimension in the operand's shape, iterate over indexingSizes and
// add the various term contributions.
@@ -97,7 +95,9 @@ SmallVector<OpFoldResult> linalg::computePaddedShape(
// Find all padding dimensions that contribute to this operand dimension
// and compute the padded term contribution to the final padded shape.
SmallVector<OpFoldResult> terms;
- for (const auto &[paddingDim, paddingSize] : dimsToSize) {
+ for (size_t paddingDim = 0, e = paddingSizes.size(); paddingDim != e;
+ ++paddingDim) {
+ OpFoldResult paddingSize = paddingSizes[paddingDim];
LLVM_DEBUG(DBGS() << "------try apply padding of dim: " << paddingDim
<< " to: " << paddingSize << "\n");
if (!enResults.value().isFunctionOfDim(paddingDim))
@@ -224,9 +224,6 @@ linalg::rewriteAsPaddedOp(RewriterBase &rewriter, TilingInterface opToPad,
SmallVector<tensor::PadOp> &padOps,
PadSizeComputationFunction computePaddingSizeFun) {
LLVM_DEBUG(DBGS() << "Start rewriteAsPaddedOp : " << opToPad << "\n");
- assert(constOptions.paddingSizes.size() ==
- constOptions.paddingDimensions.size() &&
- "invalid number of elements in padToMultipleOf");
Location loc = opToPad.getLoc();
PadTilingInterfaceOptions options(constOptions);
diff --git a/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface-multiple-of.mlir b/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface-multiple-of.mlir
index 4fcbcbb2a18e3..2bba309953570 100644
--- a/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface-multiple-of.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface-multiple-of.mlir
@@ -36,8 +36,7 @@ module attributes {transform.with_named_sequence} {
// Tile to 5 then pad to 8 (supposedly to better hit vector ops).
%matmul_l1, %loops_l1 = transform.structured.tile_using_for %matmul tile_sizes [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
%matmul_padded, %_ = transform.structured.pad_tiling_interface %matmul_l1 to padding_sizes [8] pad_to_multiple_of {
- padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32],
- padding_dimensions=[0]
+ padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32]
} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.yield
@@ -73,9 +72,8 @@ module {
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.generic"]} in %arg0 : (!transform.any_op) -> !transform.any_op
- %padded, %pad = transform.structured.pad_tiling_interface %0 to padding_sizes [3, 5] pad_to_multiple_of {
- padding_dimensions = [0, 2],
- padding_values = [0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32]
+ %padded, %pad = transform.structured.pad_tiling_interface %0 to padding_sizes [3, 0, 5] pad_to_multiple_of {
+ padding_values = [0.0 : f32, 0.0 : f32, 0.0 : f32]
} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.yield
}
@@ -128,9 +126,8 @@ module {
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.generic"]} in %arg0 : (!transform.any_op) -> !transform.any_op
- %padded, %pad = transform.structured.pad_tiling_interface %0 to padding_sizes [3, 5] pad_to_multiple_of {
- padding_dimensions = [0, 2],
- padding_values = [0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32]
+ %padded, %pad = transform.structured.pad_tiling_interface %0 to padding_sizes [3, 0, 5] pad_to_multiple_of {
+ padding_values = [0.0 : f32, 0.0 : f32, 0.0 : f32]
} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.yield
}
@@ -174,9 +171,8 @@ module attributes {transform.with_named_sequence} {
: (!transform.any_op) -> !transform.any_op
// Pad then tile should produce static shapes.
- %matmul_padded, %_ = transform.structured.pad_tiling_interface %matmul to padding_sizes [8, 16] pad_to_multiple_of {
- padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32],
- padding_dimensions=[0, 2]
+ %matmul_padded, %_ = transform.structured.pad_tiling_interface %matmul to padding_sizes [8, 0, 16] pad_to_multiple_of {
+ padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32]
} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
%m, %l0, %l1 = transform.structured.tile_using_for %matmul_padded tile_sizes [8, 0, 16]
@@ -223,9 +219,8 @@ module attributes {transform.with_named_sequence} {
%m, %l0, %l1 = transform.structured.tile_using_for %matmul tile_sizes [8, 0, 16]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
- %matmul_padded, %_ = transform.structured.pad_tiling_interface %m to padding_sizes [8, 16] pad_to_multiple_of {
- padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32],
- padding_dimensions=[0, 2]
+ %matmul_padded, %_ = transform.structured.pad_tiling_interface %m to padding_sizes [8, 0, 16] pad_to_multiple_of {
+ padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32]
} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.yield
@@ -258,9 +253,8 @@ module attributes {transform.with_named_sequence} {
%m, %l0, %l1 = transform.structured.tile_using_for %matmul tile_sizes [8, 0, 16]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
- %matmul_padded, %_ = transform.structured.pad_tiling_interface %m to padding_sizes [8, 16] pad_to_multiple_of {
- padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32],
- padding_dimensions=[0, 2]
+ %matmul_padded, %_ = transform.structured.pad_tiling_interface %m to padding_sizes [8, 0, 16] pad_to_multiple_of {
+ padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32]
} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.yield
diff --git a/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface.mlir b/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface.mlir
index f0a410fa4015f..26c03ed309c05 100644
--- a/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-pad-tiling-interface.mlir
@@ -18,8 +18,7 @@ module attributes {transform.with_named_sequence} {
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
%fill_padded, %_ = transform.structured.pad_tiling_interface %fill_l1 to padding_sizes [8] {
- padding_values=[0.0 : f32, 0.0 : f32],
- padding_dimensions=[0]
+ padding_values=[0.0 : f32, 0.0 : f32]
} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.yield
@@ -55,8 +54,7 @@ module attributes {transform.with_named_sequence} {
// Tile to 5 then pad to 8 (supposedly to better hit vector ops).
%matmul_l1, %loops_l1 = transform.structured.tile_using_for %matmul tile_sizes [5] : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
%matmul_padded, %_ = transform.structured.pad_tiling_interface %matmul_l1 to padding_sizes [8] {
- padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32],
- padding_dimensions=[0]
+ padding_values=[0.0: f32, 0.0 : f32, 0.0 : f32]
} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.yield
@@ -91,8 +89,7 @@ module {
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.generic"]} in %arg0 : (!transform.any_op) -> !transform.any_op
- %padded, %pad = transform.structured.pad_tiling_interface %0 to padding_sizes [8, 14] {
- padding_dimensions = [0, 2],
+ %padded, %pad = transform.structured.pad_tiling_interface %0 to padding_sizes [8, 0, 14] {
padding_values = [0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32]
} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.yield
@@ -147,8 +144,7 @@ module {
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.generic"]} in %arg0 : (!transform.any_op) -> !transform.any_op
- %padded, %pad = transform.structured.pad_tiling_interface %0 to padding_sizes [8, 14] {
- padding_dimensions = [0, 2],
+ %padded, %pad = transform.structured.pad_tiling_interface %0 to padding_sizes [8, 0, 14] {
padding_values = [0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32]
} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.yield
More information about the llvm-branch-commits
mailing list