[Mlir-commits] [mlir] d6a2014 - [mlir][Linalg]: Add memory space to linalg transform::PromoteOp
Aviad Cohen
llvmlistbot at llvm.org
Thu Sep 7 07:35:40 PDT 2023
Author: Aviad Cohen
Date: 2023-09-07T17:35:32+03:00
New Revision: d6a2014eb8b9f2d728e967b18f0bbdfb91629efe
URL: https://github.com/llvm/llvm-project/commit/d6a2014eb8b9f2d728e967b18f0bbdfb91629efe
DIFF: https://github.com/llvm/llvm-project/commit/d6a2014eb8b9f2d728e967b18f0bbdfb91629efe.diff
LOG: [mlir][Linalg]: Add memory space to linalg transform::PromoteOp
This patch allows to supply an optional memory space of the promoted
buffer.
Differential Revision: https://reviews.llvm.org/D159074
Added:
Modified:
mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
mlir/test/Dialect/Linalg/promote.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index ee6e12f72b80bab..6011663e432c1e1 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -165,9 +165,9 @@ def BufferizeToAllocationOp : Op<Transform_Dialect,
//===----------------------------------------------------------------------===//
def DecomposeOp : Op<Transform_Dialect, "structured.decompose",
- [FunctionalStyleTransformOpTrait,
+ [FunctionalStyleTransformOpTrait,
MemoryEffectsOpInterface,
- TransformOpInterface,
+ TransformOpInterface,
TransformEachOpTrait,
ReportTrackingListenerFailuresOpTrait]> {
let description = [{
@@ -414,8 +414,8 @@ def InterchangeOp : Op<Transform_Dialect, "structured.interchange",
[DenseArrayNonNegative<DenseI64ArrayAttr>]>:$iterator_interchange);
let results = (outs TransformHandleTypeInterface:$transformed);
- let assemblyFormat = [{
- $target
+ let assemblyFormat = [{
+ $target
(`iterator_interchange` `=` $iterator_interchange^)? attr-dict
`:` custom<SemiFunctionType>(type($target), type($transformed))
}];
@@ -479,7 +479,7 @@ def LowerUnPackOp : Op<Transform_Dialect, "structured.lower_unpack", [
TransformOpInterface,
ReportTrackingListenerFailuresOpTrait]> {
let description = [{
- Lower a tensor.unpack into empty + linalg.transpose + tensor.collapse_shape +
+ Lower a tensor.unpack into empty + linalg.transpose + tensor.collapse_shape +
tensor.extract_slice.
#### Return modes
@@ -497,7 +497,7 @@ def LowerUnPackOp : Op<Transform_Dialect, "structured.lower_unpack", [
Transform_ConcreteOpType<"linalg.transpose">:$transpose_op,
Transform_ConcreteOpType<"tensor.collapse_shape">:$collapse_shape_op,
Transform_ConcreteOpType<"tensor.extract_slice">:$extract_slice_op);
- let assemblyFormat = [{
+ let assemblyFormat = [{
$target attr-dict `:` functional-type(operands, results)
}];
@@ -665,7 +665,7 @@ def PackOp : Op<Transform_Dialect, "structured.pack", [
let description = [{
Pack a LinalgOp by applying a data tiling transformation on the op and
packing the operands according to the `packed_sizes` specification.
-
+
Iterator dimensions are tiled in their canonical order in the op spec.
Operands are packed according to the same canonical order of the op iterator
dimensions.
@@ -700,7 +700,7 @@ def PackOp : Op<Transform_Dialect, "structured.pack", [
// affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d1, d4, d5)>
// M N m n
// affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)>
- %0 = linalg.generic_representing_some_higher_d_matmul
+ %0 = linalg.generic_representing_some_higher_d_matmul
ins(%A, %B: tensor<?x?x2x4xf32>, tensor<?x?x4x3xf32>)
outs( %C: tensor<?x?x2x3xf32>)
```
@@ -727,7 +727,7 @@ def PackOp : Op<Transform_Dialect, "structured.pack", [
DefaultValuedAttr<DenseI64ArrayAttr, "{}">:$static_packed_sizes);
let results = (outs TransformHandleTypeInterface:$packed_op);
let assemblyFormat = [{
- $target
+ $target
`packed_sizes` `=` custom<DynamicIndexList>($packed_sizes,
$static_packed_sizes,
type($packed_sizes))
@@ -756,27 +756,27 @@ def PackGreedilyOp : Op<Transform_Dialect, "structured.pack_greedily", [
Target a Linalg op and rewrite it into packed LinalgOp form by trying to
infer whether a known suboperation is embedded
- Different packing strategies are applied in order, when one applies
+ Different packing strategies are applied in order, when one applies
successfully, the transform returns:
1. Matmul packing: Try to infer a matmul operation embedded in the target op.
Specifically, this looks for 2 parallel dimensions that participate in
an outer-product and 1 reduction dimension.
These dimensions are referred as (m, n, k) to match canonical matmul
terminology.
-
+
The packed sizes for (m, n, k) are specified by `matmul_packed_sizes`
and the optional `matmul_padded_sizes_next_multiple_of`.
- When an entry `matmul_packed_sizes[i]` is non-0, the corresponding
+ When an entry `matmul_packed_sizes[i]` is non-0, the corresponding
dimension is packed by `matmul_packed_sizes[i]`.
Otherwise, the dimension is merely padded to the next multiple of
`matmul_padded_sizes_next_multiple_of[i]`.
`matmul_padded_sizes_next_multiple_of` is optional and is expected to
either be empty or of size `3`, matching the size of `matmul_packed_sizes`.
- For each individual element of `matmul_packed_sizes` and
+ For each individual element of `matmul_packed_sizes` and
`matmul_padded_sizes_next_multiple_of`, only one of them is allowed to
be non-zero.
-
+
The ordering of the packed dimensions (mm, nn, kk) is specified by the
`matmul_inner_dims_order` attribute.
@@ -787,7 +787,7 @@ def PackGreedilyOp : Op<Transform_Dialect, "structured.pack_greedily", [
the most minor indexing dimensions of the linalg.generic. The most minor
dimensions are themselves ordered according to `inner_dims_order`.
4. An elementwise traversal of `matmul_packed_sizes` and
- `matmul_padded_sizes_next_multiple_of` is performed and for each
+ `matmul_padded_sizes_next_multiple_of` is performed and for each
dimension `d`, either pack to `matmul_packed_sizes[d]` or pad to the
`matmul_padded_sizes_next_multiple_of[d]`.
5. Packing/padding is performed by the amounts determined in step 4. and
@@ -815,7 +815,7 @@ def PackGreedilyOp : Op<Transform_Dialect, "structured.pack_greedily", [
[DenseArrayCount<3>]>:$static_matmul_packed_sizes,
ConfinedAttr<DefaultValuedAttr<DenseI64ArrayAttr, "{}">,
[Attr<
- Or<[DenseArrayCount<0>.predicate,
+ Or<[DenseArrayCount<0>.predicate,
DenseArrayCount<3>.predicate]>,
"with 0 or 3 elements"
>]>
@@ -837,7 +837,7 @@ def PackGreedilyOp : Op<Transform_Dialect, "structured.pack_greedily", [
`matmul_packed_sizes` `=` custom<DynamicIndexList>($matmul_packed_sizes,
$static_matmul_packed_sizes,
type($matmul_packed_sizes))
- (`matmul_padded_sizes_next_multiple_of` `=`
+ (`matmul_padded_sizes_next_multiple_of` `=`
$matmul_padded_sizes_next_multiple_of^)?
`matmul_inner_dims_order` `=` $matmul_inner_dims_order
)
@@ -862,7 +862,7 @@ def PackTransposeOp : Op<Transform_Dialect, "structured.pack_transpose", [
DeclareOpInterfaceMethods<TransformOpInterface>,
ReportTrackingListenerFailuresOpTrait]> {
let description = [{
- Apply a transposition to a single `tensor.pack` (resp. `tensor.unpack`) and
+ Apply a transposition to a single `tensor.pack` (resp. `tensor.unpack`) and
update the `linalg.generic` op that consumes (resp. produces) the operation.
This transform allows composing a simple `structured.pack` with additional
@@ -874,7 +874,7 @@ def PackTransposeOp : Op<Transform_Dialect, "structured.pack_transpose", [
the specified `tensor.pack` or `tensor.unpack` op.
If the `target` of this op is a `tensor.pack` then a new `tensor.empty` will
- be created along with transposed versions of the `tensor.pack` and the
+ be created along with transposed versions of the `tensor.pack` and the
consuming `linalg.generic`, which is expected to be the sole consumer.
If the `target` of this op is a `tensor.unpack` then the whole pack / compute
@@ -894,7 +894,7 @@ def PackTransposeOp : Op<Transform_Dialect, "structured.pack_transpose", [
This operation returns 3 handles, one to the transformed LinalgOp, one to
the transformed `tensor.pack` and one to the transformed `tensor.unpack`.
- The last handle for `tensor.unpack` is empty if `target_pack_or_unpack_op`
+ The last handle for `tensor.unpack` is empty if `target_pack_or_unpack_op`
was not itself a `tensor.unpack`.
}];
@@ -971,7 +971,7 @@ def PadOp : Op<Transform_Dialect, "structured.pad",
let builders = [
// Builder for a transform::PadOp with automatic inference of padding
// value. Warning: this will set the value 0 for the inferred elemental
- // type without taking the op into account and thus only work for the
+ // type without taking the op into account and thus only work for the
// add/mul ring at the moment.
// TODO: support other operations (e.g. min, max etc).
OpBuilder<(ins "Value":$target,
@@ -1048,7 +1048,7 @@ def HoistPadOp : Op<Transform_Dialect, "structured.hoist_pad",
Hoist the tensor.pad target operation by at most the given number of loops.
Optionally apply the transpose attribute to the inner dimensions.
- TODO: In the future, we should consider rewriting as a tensor.pack after
+ TODO: In the future, we should consider rewriting as a tensor.pack after
hoisting since this abstraction is now available.
TODO: Maybe also return the linalg.generic transpose created at some point.
@@ -1060,7 +1060,7 @@ def HoistPadOp : Op<Transform_Dialect, "structured.hoist_pad",
If all the operations referred to by the `target` handle padproperly, the
transform succeeds. Otherwise the transform silently fails.
- The return handle points to only the subset of successfully hoisted
+ The return handle points to only the subset of successfully hoisted
tensor.pad operations, which can be empty.
}];
@@ -1073,9 +1073,9 @@ def HoistPadOp : Op<Transform_Dialect, "structured.hoist_pad",
let results = (outs TransformHandleTypeInterface:$transformed);
let assemblyFormat = [{
- $target
- `by` $num_loops `loops`
- (`,` `transpose` `by` $transpose^)?
+ $target
+ `by` $num_loops `loops`
+ (`,` `transpose` `by` $transpose^)?
attr-dict
`:` functional-type(operands, results)
}];
@@ -1122,6 +1122,7 @@ def PromoteOp : Op<Transform_Dialect, "structured.promote",
DefaultValuedAttr<BoolArrayAttr, "{}">:$use_full_tile_buffers,
UnitAttr:$use_full_tiles_by_default,
UnitAttr:$use_alloca,
+ OptionalAttr<AnyAttr>:$memory_space,
OptionalAttr<DeviceMappingArrayAttr>:$mapping,
OptionalAttr<I64Attr>:$alignment);
let results = (outs TransformHandleTypeInterface:$transformed);
@@ -1202,7 +1203,7 @@ def ScalarizeOp : Op<Transform_Dialect, "structured.scalarize",
let arguments = (ins TransformHandleTypeInterface:$target);
let results = (outs TransformHandleTypeInterface:$result);
- let assemblyFormat =
+ let assemblyFormat =
"$target attr-dict `:`"
"custom<SemiFunctionType>(type($target), type($result))";
@@ -1248,9 +1249,9 @@ def DecomposeInterfaceOp : Op<Transform_Dialect, "structured.decompose_interface
def RewriteInDestinationPassingStyleOp : Op<
Transform_Dialect, "structured.rewrite_in_destination_passing_style",
- [FunctionalStyleTransformOpTrait,
+ [FunctionalStyleTransformOpTrait,
MemoryEffectsOpInterface,
- TransformOpInterface,
+ TransformOpInterface,
TransformEachOpTrait,
ReportTrackingListenerFailuresOpTrait]> {
let description = [{
@@ -1260,7 +1261,7 @@ def RewriteInDestinationPassingStyleOp : Op<
- tensor.pad
- tensor.generate
- tensor.from_elements
- This dichotomy hints at a future interface, for now the implementation just
+ This dichotomy hints at a future interface, for now the implementation just
switches between
diff erent implementation.
#### Return modes
@@ -1271,7 +1272,7 @@ def RewriteInDestinationPassingStyleOp : Op<
The return handle points to a subset of successfully produced operations:
- `tensor.pad` case, the returned handle points to the tensor.insert_slice.
- `tensor.generate` case, the returned handle points to the linalg.generic.
- - `tensor.from_elements` case, the returned handle points to the last
+ - `tensor.from_elements` case, the returned handle points to the last
`tensor.insert`.
}];
@@ -1483,7 +1484,7 @@ def SplitReductionOp : Op<Transform_Dialect, "structured.split_reduction",
TransformHandleTypeInterface:$split_linalg_op,
TransformHandleTypeInterface:$combining_linalg_op);
- let assemblyFormat =
+ let assemblyFormat =
"$target attr-dict `:`"
"functional-type(operands, results)";
@@ -1990,7 +1991,7 @@ def TileToScfForOp : Op<Transform_Dialect, "structured.tile_to_scf_for",
DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$interchange);
let results = (outs TransformHandleTypeInterface:$tiled_linalg_op,
Variadic<TransformHandleTypeInterface>:$loops);
-
+
let builders = [
OpBuilder<(ins "Value":$target,
"ArrayRef<OpFoldResult>":$mixedTileSizes,
@@ -2057,7 +2058,7 @@ def VectorizeOp : Op<Transform_Dialect, "structured.vectorize",
UnitAttr:$disable_transfer_permutation_map_lowering_patterns);
let results = (outs TransformHandleTypeInterface:$transformed);
- let assemblyFormat =
+ let assemblyFormat =
"$target attr-dict `:`"
"functional-type(operands, results)";
@@ -2279,16 +2280,16 @@ def HoistRedundantTensorSubsetsOp :
TransformOpInterface,
ReportTrackingListenerFailuresOpTrait]> {
let description = [{
- Hoists supported tensor subset extract/insert operation pairs out of
+ Hoists supported tensor subset extract/insert operation pairs out of
immediately enclosing loop iteratively, if the following conditions
are true:
1. The 2 ops access the same tensor subset.
2. All operands are invariant under the enclosing loop.
-
+
The supported subset extract/insert operation pairs currently comprise:
- tensor.extract_slice / tensor.insert_slice
- vector.transfer_read / vector.transfer_write on tensors
-
+
Only scf.for loops are currently supported.
When applied to:
@@ -2304,8 +2305,8 @@ def HoistRedundantTensorSubsetsOp :
let results = (outs);
let assemblyFormat = [{
- $target
- attr-dict
+ $target
+ attr-dict
`:` functional-type(operands, results)
}];
@@ -2328,7 +2329,7 @@ def InsertSliceToCopyOp :
TransformEachOpTrait, TransformOpInterface]> {
let description = [{
Targeted rewrite of an tensor.insert_slice to linalg.copy.
- This is useful to materialize copies explicitly before bufferization and
+ This is useful to materialize copies explicitly before bufferization and
transform them, avoiding the need to rediscover them after bufferization.
If the insert_slice source is already a linalg.copy, only return the source
@@ -2336,7 +2337,7 @@ def InsertSliceToCopyOp :
#### Return modes:
- The operation always succeeds and returns a handle to the relevant
+ The operation always succeeds and returns a handle to the relevant
linalg.copy op.
}];
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index fd82c67ede5fa97..94a39ad186f54a3 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -362,6 +362,13 @@ struct LinalgPromotionOptions {
alignment = align;
return *this;
}
+ /// Memory space of promoted buffer. If `std::nullopt` do not specify memory
+ /// space.
+ std::optional<Attribute> memorySpace;
+ LinalgPromotionOptions &setMemorySpace(Attribute memorySpc) {
+ memorySpace = memorySpc;
+ return *this;
+ }
/// Use alloca with the default allocation scheme.
bool useAlloca = false;
LinalgPromotionOptions &setUseAlloca(bool use) {
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 3421a3c169dbba1..7a701e44a9cda4a 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -1883,6 +1883,8 @@ transform::PromoteOp::applyToOne(transform::TransformRewriter &rewriter,
llvm::to_vector(getUseFullTileBuffers().getAsValueRange<BoolAttr>()));
if (getAlignment().has_value())
promotionOptions = promotionOptions.setAlignment(*getAlignment());
+ if (getMemorySpace().has_value())
+ promotionOptions = promotionOptions.setMemorySpace(*getMemorySpace());
if (getMapping().has_value()) {
// The mapping should only contain an element
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
index 8cf85ebd1cbb83e..ad399f57f72cb1b 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
@@ -54,10 +54,16 @@ static Value allocBuffer(ImplicitLocOpBuilder &b,
if (alignment.has_value())
alignmentAttr = b.getI64IntegerAttr(alignment.value());
+ Attribute memorySpaceAttr;
+ if (options.memorySpace.has_value())
+ memorySpaceAttr = *options.memorySpace;
+
// Static buffer.
if (std::optional<int64_t> cst = getConstantIntValue(allocSize)) {
auto staticBufferType =
MemRefType::get(width * cst.value(), b.getIntegerType(8));
+ staticBufferType =
+ MemRefType::Builder(staticBufferType).setMemorySpace(memorySpaceAttr);
if (options.useAlloca) {
return b.create<memref::AllocaOp>(staticBufferType, ValueRange{},
alignmentAttr);
@@ -69,6 +75,8 @@ static Value allocBuffer(ImplicitLocOpBuilder &b,
// Fallback dynamic buffer.
auto dynamicBufferType =
MemRefType::get(ShapedType::kDynamic, b.getIntegerType(8));
+ dynamicBufferType =
+ MemRefType::Builder(dynamicBufferType).setMemorySpace(memorySpaceAttr);
Value mul = b.createOrFold<arith::MulIOp>(
b.create<arith::ConstantIndexOp>(width), allocSize);
if (options.useAlloca)
@@ -89,6 +97,10 @@ static std::optional<Value> defaultAllocBufferCallBack(
auto zero = b.create<arith::ConstantIndexOp>(0);
auto one = b.create<arith::ConstantIndexOp>(1);
+ Attribute memorySpaceAttr;
+ if (options.memorySpace.has_value())
+ memorySpaceAttr = *options.memorySpace;
+
Value allocSize = one;
for (const auto &size : llvm::enumerate(boundingSubViewSize))
allocSize = b.createOrFold<arith::MulIOp>(allocSize, size.value());
@@ -96,9 +108,12 @@ static std::optional<Value> defaultAllocBufferCallBack(
layout, alignment);
SmallVector<int64_t, 4> dynSizes(boundingSubViewSize.size(),
ShapedType::kDynamic);
- Value view = b.createOrFold<memref::ViewOp>(
- MemRefType::get(dynSizes, viewType.getElementType()), buffer, zero,
- boundingSubViewSize);
+
+ auto viewMemRefType = MemRefType::get(dynSizes, viewType.getElementType());
+ viewMemRefType =
+ MemRefType::Builder(viewMemRefType).setMemorySpace(memorySpaceAttr);
+ Value view = b.createOrFold<memref::ViewOp>(viewMemRefType, buffer, zero,
+ boundingSubViewSize);
return view;
}
diff --git a/mlir/test/Dialect/Linalg/promote.mlir b/mlir/test/Dialect/Linalg/promote.mlir
index 4b902acd41f9219..9ca6db5e41931ec 100644
--- a/mlir/test/Dialect/Linalg/promote.mlir
+++ b/mlir/test/Dialect/Linalg/promote.mlir
@@ -275,3 +275,111 @@ transform.sequence failures(propagate) {
%0 = transform.structured.match interface{LinalgOp} in %arg1 : (!transform.any_op) -> !transform.any_op
%1 = transform.structured.promote %0 : (!transform.any_op) -> !transform.any_op
}
+
+// -----
+
+#map = affine_map<(d0, d1) -> (d0, d1)>
+
+ // CHECK-LABEL: func.func @linalg_generic_update_all_function_inputs_outputs(
+ // CHECK-SAME: %[[VAL_0:.*]]: memref<3x4xf32, 1>,
+ // CHECK-SAME: %[[VAL_1:.*]]: memref<3x4xf32, 1>) -> memref<3x4xf32, 1> {
+func.func @linalg_generic_update_all_function_inputs_outputs(%arg0: memref<3x4xf32, 1>, %arg1: memref<3x4xf32, 1>) -> memref<3x4xf32, 1> {
+ // CHECK: %[[VAL_2:.*]] = memref.alloc() {alignment = 64 : i64} : memref<3x4xf32, 1>
+ // CHECK: %[[VAL_3:.*]] = memref.subview %[[VAL_0]][0, 0] [4, 3] [1, 1] : memref<3x4xf32, 1> to memref<4x3xf32, strided<[4, 1]>, 1>
+ // CHECK: %[[VAL_4:.*]] = memref.subview %[[VAL_1]][0, 0] [4, 3] [1, 1] : memref<3x4xf32, 1> to memref<4x3xf32, strided<[4, 1]>, 1>
+ // CHECK: %[[VAL_5:.*]] = memref.subview %[[VAL_2]][0, 0] [4, 3] [1, 1] : memref<3x4xf32, 1> to memref<4x3xf32, strided<[4, 1]>, 1>
+
+ %alloc = memref.alloc() {alignment = 64 : i64} : memref<3x4xf32, 1>
+ %subview = memref.subview %arg0[0, 0] [4, 3] [1, 1] : memref<3x4xf32, 1> to memref<4x3xf32, strided<[4, 1]>, 1>
+ %subview_0 = memref.subview %arg1[0, 0] [4, 3] [1, 1] : memref<3x4xf32, 1> to memref<4x3xf32, strided<[4, 1]>, 1>
+ %subview_1 = memref.subview %alloc[0, 0] [4, 3] [1, 1] : memref<3x4xf32, 1> to memref<4x3xf32, strided<[4, 1]>, 1>
+
+ // CHECK: %[[VAL_6:.*]] = arith.constant 0 : index
+ // CHECK: %[[VAL_7:.*]] = arith.constant 4 : index
+ // CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
+ // CHECK: %[[VAL_9:.*]] = arith.constant 0 : index
+ // CHECK: %[[VAL_10:.*]] = arith.constant 3 : index
+ // CHECK: %[[VAL_11:.*]] = arith.constant 1 : index
+ // CHECK: %[[VAL_12:.*]] = arith.constant 4 : index
+ // CHECK: %[[VAL_13:.*]] = arith.constant 0 : index
+ // CHECK: %[[VAL_14:.*]] = arith.constant 4 : index
+ // CHECK: %[[VAL_15:.*]] = arith.constant 3 : index
+ // CHECK: %[[VAL_16:.*]] = arith.constant 1 : index
+ // CHECK: %[[VAL_17:.*]] = arith.constant 3 : index
+ // CHECK: %[[VAL_18:.*]] = arith.constant 0 : index
+ // CHECK: %[[VAL_19:.*]] = arith.constant 1 : index
+ // CHECK: %[[VAL_20:.*]] = arith.constant 4 : index
+ // CHECK: %[[VAL_21:.*]] = arith.constant 12 : index
+ // CHECK: %[[VAL_22:.*]] = memref.alloc() : memref<48xi8, #gpu.address_space<workgroup>>
+ // CHECK: %[[VAL_23:.*]] = memref.view %[[VAL_22]]{{\[}}%[[VAL_18]]]{{\[}}%[[VAL_12]], %[[VAL_15]]] : memref<48xi8, #gpu.address_space<workgroup>> to memref<?x?xf32, #gpu.address_space<workgroup>>
+ // CHECK: %[[VAL_24:.*]] = memref.subview %[[VAL_23]][0, 0] {{\[}}%[[VAL_14]], %[[VAL_17]]] [1, 1] : memref<?x?xf32, #gpu.address_space<workgroup>> to memref<?x?xf32, strided<[?, 1], offset: ?>, #gpu.address_space<workgroup>>
+ // CHECK: %[[VAL_25:.*]] = arith.constant 0 : index
+ // CHECK: %[[VAL_26:.*]] = arith.constant 4 : index
+ // CHECK: %[[VAL_27:.*]] = arith.constant 1 : index
+ // CHECK: %[[VAL_28:.*]] = arith.constant 0 : index
+ // CHECK: %[[VAL_29:.*]] = arith.constant 3 : index
+ // CHECK: %[[VAL_30:.*]] = arith.constant 1 : index
+ // CHECK: %[[VAL_31:.*]] = arith.constant 4 : index
+ // CHECK: %[[VAL_32:.*]] = arith.constant 0 : index
+ // CHECK: %[[VAL_33:.*]] = arith.constant 4 : index
+ // CHECK: %[[VAL_34:.*]] = arith.constant 3 : index
+ // CHECK: %[[VAL_35:.*]] = arith.constant 1 : index
+ // CHECK: %[[VAL_36:.*]] = arith.constant 3 : index
+ // CHECK: %[[VAL_37:.*]] = arith.constant 0 : index
+ // CHECK: %[[VAL_38:.*]] = arith.constant 1 : index
+ // CHECK: %[[VAL_39:.*]] = arith.constant 4 : index
+ // CHECK: %[[VAL_40:.*]] = arith.constant 12 : index
+ // CHECK: %[[VAL_41:.*]] = memref.alloc() : memref<48xi8, #gpu.address_space<workgroup>>
+ // CHECK: %[[VAL_42:.*]] = memref.view %[[VAL_41]]{{\[}}%[[VAL_37]]]{{\[}}%[[VAL_31]], %[[VAL_34]]] : memref<48xi8, #gpu.address_space<workgroup>> to memref<?x?xf32, #gpu.address_space<workgroup>>
+ // CHECK: %[[VAL_43:.*]] = memref.subview %[[VAL_42]][0, 0] {{\[}}%[[VAL_33]], %[[VAL_36]]] [1, 1] : memref<?x?xf32, #gpu.address_space<workgroup>> to memref<?x?xf32, strided<[?, 1], offset: ?>, #gpu.address_space<workgroup>>
+ // CHECK: %[[VAL_44:.*]] = arith.constant 0 : index
+ // CHECK: %[[VAL_45:.*]] = arith.constant 4 : index
+ // CHECK: %[[VAL_46:.*]] = arith.constant 1 : index
+ // CHECK: %[[VAL_47:.*]] = arith.constant 0 : index
+ // CHECK: %[[VAL_48:.*]] = arith.constant 3 : index
+ // CHECK: %[[VAL_49:.*]] = arith.constant 1 : index
+ // CHECK: %[[VAL_50:.*]] = arith.constant 4 : index
+ // CHECK: %[[VAL_51:.*]] = arith.constant 0 : index
+ // CHECK: %[[VAL_52:.*]] = arith.constant 4 : index
+ // CHECK: %[[VAL_53:.*]] = arith.constant 3 : index
+ // CHECK: %[[VAL_54:.*]] = arith.constant 1 : index
+ // CHECK: %[[VAL_55:.*]] = arith.constant 3 : index
+ // CHECK: %[[VAL_56:.*]] = arith.constant 0 : index
+ // CHECK: %[[VAL_57:.*]] = arith.constant 1 : index
+ // CHECK: %[[VAL_58:.*]] = arith.constant 4 : index
+ // CHECK: %[[VAL_59:.*]] = arith.constant 12 : index
+ // CHECK: %[[VAL_60:.*]] = memref.alloc() : memref<48xi8, #gpu.address_space<workgroup>>
+ // CHECK: %[[VAL_61:.*]] = memref.view %[[VAL_60]]{{\[}}%[[VAL_56]]]{{\[}}%[[VAL_50]], %[[VAL_53]]] : memref<48xi8, #gpu.address_space<workgroup>> to memref<?x?xf32, #gpu.address_space<workgroup>>
+ // CHECK: %[[VAL_62:.*]] = memref.subview %[[VAL_61]][0, 0] {{\[}}%[[VAL_52]], %[[VAL_55]]] [1, 1] : memref<?x?xf32, #gpu.address_space<workgroup>> to memref<?x?xf32, strided<[?, 1], offset: ?>, #gpu.address_space<workgroup>>
+ // CHECK: memref.copy %[[VAL_3]], %[[VAL_24]] : memref<4x3xf32, strided<[4, 1]>, 1> to memref<?x?xf32, strided<[?, 1], offset: ?>, #gpu.address_space<workgroup>>
+ // CHECK: memref.copy %[[VAL_4]], %[[VAL_43]] : memref<4x3xf32, strided<[4, 1]>, 1> to memref<?x?xf32, strided<[?, 1], offset: ?>, #gpu.address_space<workgroup>>
+ // CHECK: memref.copy %[[VAL_5]], %[[VAL_62]] : memref<4x3xf32, strided<[4, 1]>, 1> to memref<?x?xf32, strided<[?, 1], offset: ?>, #gpu.address_space<workgroup>>
+ // CHECK: linalg.generic {doc = "", indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"], library_call = ""} ins(%[[VAL_24]], %[[VAL_43]] : memref<?x?xf32, strided<[?, 1], offset: ?>, #gpu.address_space<workgroup>>, memref<?x?xf32, strided<[?, 1], offset: ?>, #gpu.address_space<workgroup>>) outs(%[[VAL_62]] : memref<?x?xf32, strided<[?, 1], offset: ?>, #gpu.address_space<workgroup>>) {
+ // CHECK: ^bb0(%[[VAL_63:.*]]: f32, %[[VAL_64:.*]]: f32, %[[VAL_65:.*]]: f32):
+ // CHECK: %[[VAL_66:.*]] = arith.addf %[[VAL_63]], %[[VAL_64]] : f32
+ // CHECK: linalg.yield %[[VAL_66]] : f32
+ // CHECK: }
+
+
+ linalg.generic {doc = "", indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"], library_call = ""} ins(%subview, %subview_0 : memref<4x3xf32, strided<[4, 1]>, 1>, memref<4x3xf32, strided<[4, 1]>, 1>) outs(%subview_1 : memref<4x3xf32, strided<[4, 1]>, 1>) {
+ ^bb0(%in: f32, %in_1: f32, %out: f32):
+ %1 = arith.addf %in, %in_1 : f32
+ linalg.yield %1 : f32
+ }
+
+ // CHECK: memref.copy %[[VAL_62]], %[[VAL_5]] : memref<?x?xf32, strided<[?, 1], offset: ?>, #gpu.address_space<workgroup>> to memref<4x3xf32, strided<[4, 1]>, 1>
+ // CHECK: memref.dealloc %[[VAL_22]] : memref<48xi8, #gpu.address_space<workgroup>>
+ // CHECK: memref.dealloc %[[VAL_41]] : memref<48xi8, #gpu.address_space<workgroup>>
+ // CHECK: memref.dealloc %[[VAL_60]] : memref<48xi8, #gpu.address_space<workgroup>>
+ // CHECK: return %[[VAL_2]] : memref<3x4xf32, 1>
+ // CHECK: }
+
+ return %alloc : memref<3x4xf32, 1>
+}
+
+
+transform.sequence failures(propagate) {
+^bb0(%arg1: !transform.any_op):
+ %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1 = transform.structured.promote %0 { memory_space = #gpu.address_space<workgroup> } : (!transform.any_op) -> !transform.any_op
+}
More information about the Mlir-commits
mailing list