[Mlir-commits] [mlir] dae8c72 - [mlir][linalg] TileToForallOp: Support memref ops
Matthias Springer
llvmlistbot at llvm.org
Wed Jun 21 00:16:26 PDT 2023
Author: Matthias Springer
Date: 2023-06-21T09:12:34+02:00
New Revision: dae8c72495ed531dadecb91f19c23dd4ccbca160
URL: https://github.com/llvm/llvm-project/commit/dae8c72495ed531dadecb91f19c23dd4ccbca160
DIFF: https://github.com/llvm/llvm-project/commit/dae8c72495ed531dadecb91f19c23dd4ccbca160.diff
LOG: [mlir][linalg] TileToForallOp: Support memref ops
Support tiling of ops with memref semantics.
Differential Revision: https://reviews.llvm.org/D153353
Added:
Modified:
mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
mlir/test/Dialect/GPU/transform-gpu-failing.mlir
mlir/test/Dialect/GPU/transform-gpu.mlir
mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
index 67c6d163a9512..273cd797bb39a 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -380,11 +380,14 @@ static FailureOr<ForallTilingResult> tileToForallOpImpl(
auto destinationStyleOp = dyn_cast<DestinationStyleOpInterface>(clonedOp);
if (destinationStyleOp) {
for (OpOperand *outOperand : destinationStyleOp.getDpsInitOperands()) {
- auto *it = llvm::find(dest, outOperand->get());
- if (it == dest.end())
- return op->emitOpError("must have \"tensor semantic\" for tiling");
- unsigned destNum = std::distance(dest.begin(), it);
- outOperand->set(destBbArgs[destNum]);
+ // Swap tensor inits with the corresponding block argument of the
+ // scf.forall op. Memref inits remain as is.
+ if (outOperand->get().getType().isa<TensorType>()) {
+ auto *it = llvm::find(dest, outOperand->get());
+ assert(it != dest.end() && "could not find destination tensor");
+ unsigned destNum = std::distance(dest.begin(), it);
+ outOperand->set(destBbArgs[destNum]);
+ }
}
}
diff --git a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir
index 0d560482d6519..b087816fdc7c9 100644
--- a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir
+++ b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir
@@ -274,34 +274,3 @@ transform.sequence failures(propagate) {
// expected-error @below {{duplicated attribute, cannot map
diff erent loops to the same processor}}
transform.gpu.map_nested_forall_to_threads %funcop block_dims = [32, 32, 1] : (!transform.any_op) -> !transform.any_op
}
-
-// -----
-
-func.func @tiling_buffer_semantic_op(%x: memref<32x32xf32>, %y: memref<32x32xf32>, %stream : !gpu.async.token) {
- %one = arith.constant 1 : index
- %name = gpu.launch async[%stream] blocks(%arg3, %arg4, %arg5) in (%arg9 = %one, %arg10 = %one, %arg11 = %one)
- threads(%arg6, %arg7, %arg8) in (%arg12 = %one, %arg13 = %one, %arg14 = %one)
- {
- // expected-error @below {{'linalg.generic' op must have "tensor semantic" for tiling}}
- // expected-note @below {{when applied to this op}}
- linalg.generic
- {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
- affine_map<(d0, d1) -> (d0, d1)>],
- iterator_types = ["parallel", "parallel"]}
- ins(%x : memref<32x32xf32>)
- outs(%y : memref<32x32xf32>) {
- ^bb0(%in: f32, %out: f32):
- linalg.yield %in : f32
- }
- gpu.terminator
- }
- return
-}
-
-transform.sequence failures(propagate) {
-^bb1(%arg0: !transform.any_op):
- %matmul = transform.structured.match ops{["linalg.generic"]} in %arg0 : (!transform.any_op) -> !transform.any_op
- // expected-error @below {{transform.structured.tile_to_forall_op failed to apply}}
- %forall, %tiled = transform.structured.tile_to_forall_op %matmul num_threads [10, 20, 30] (mapping = [ #gpu.thread<y>, #gpu.thread<x>, #gpu.thread<z> ] )
- : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
-}
diff --git a/mlir/test/Dialect/GPU/transform-gpu.mlir b/mlir/test/Dialect/GPU/transform-gpu.mlir
index 64d80f95a6b34..ba37a41eddc68 100644
--- a/mlir/test/Dialect/GPU/transform-gpu.mlir
+++ b/mlir/test/Dialect/GPU/transform-gpu.mlir
@@ -307,3 +307,39 @@ transform.sequence failures(propagate) {
transform.gpu.map_nested_forall_to_threads %funcop
block_dims = [12, 11, 1] warp_dims = [3, 2, 1] : (!transform.any_op) -> !transform.any_op
}
+
+// -----
+
+// CHECK-LABEL: func.func @tiling_buffer_semantic_op(
+// CHECK: gpu.launch {{.*}} {
+// CHECK: scf.forall {{.*}} {
+// CHECK: memref.subview
+// CHECK: memref.subview
+// CHECK: linalg.generic
+// CHECK: }
+// CHECK: }
+func.func @tiling_buffer_semantic_op(%x: memref<32x32xf32>, %y: memref<32x32xf32>, %stream : !gpu.async.token) {
+ %one = arith.constant 1 : index
+ %name = gpu.launch async[%stream] blocks(%arg3, %arg4, %arg5) in (%arg9 = %one, %arg10 = %one, %arg11 = %one)
+ threads(%arg6, %arg7, %arg8) in (%arg12 = %one, %arg13 = %one, %arg14 = %one)
+ {
+ linalg.generic
+ {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+ affine_map<(d0, d1) -> (d0, d1)>],
+ iterator_types = ["parallel", "parallel"]}
+ ins(%x : memref<32x32xf32>)
+ outs(%y : memref<32x32xf32>) {
+ ^bb0(%in: f32, %out: f32):
+ linalg.yield %in : f32
+ }
+ gpu.terminator
+ }
+ return
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg0: !transform.any_op):
+ %matmul = transform.structured.match ops{["linalg.generic"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ %forall, %tiled = transform.structured.tile_to_forall_op %matmul num_threads [10, 20, 30] (mapping = [ #gpu.thread<y>, #gpu.thread<x>, #gpu.thread<z> ] )
+ : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+}
diff --git a/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir b/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir
index 63065d5196ab8..a5f5ab2366ba9 100644
--- a/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir
+++ b/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir
@@ -40,6 +40,53 @@ module {
// -----
+module {
+ // CHECK-LABEL: func @matmul_memref(
+ // CHECK: scf.forall (%{{.*}}, %{{.*}}) in (10, 20) {
+ // CHECK: memref.subview
+ // CHECK: memref.subview
+ // CHECK: memref.subview
+ // CHECK: linalg.matmul
+ // CHECK: } {mapping = [#gpu.thread<y>, #gpu.thread<x>]}
+ func.func @matmul_memref(%A: memref<?x?xf32>, %B: memref<?x?xf32>, %C: memref<?x?xf32>) {
+ linalg.matmul ins(%A, %B : memref<?x?xf32>, memref<?x?xf32>)
+ outs(%C : memref<?x?xf32>)
+ return
+ }
+
+ transform.sequence failures(propagate) {
+ ^bb1(%arg1: !transform.any_op):
+ %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1:2 = transform.structured.tile_to_forall_op %0 num_threads [10, 20] (mapping = [ #gpu.thread<y>, #gpu.thread<x> ] )
+ : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+ }
+}
+
+// -----
+
+module {
+ // CHECK-LABEL: func @copy_memref(
+ // CHECK: scf.forall (%{{.*}}, %{{.*}}) in (10, 20) {
+ // CHECK: memref.subview
+ // CHECK: memref.subview
+ // CHECK: linalg.copy
+ // CHECK: } {mapping = [#gpu.thread<y>, #gpu.thread<x>]}
+ func.func @copy_memref(%A: memref<?x?xf32>, %B: memref<?x?xf32>) {
+ linalg.copy ins(%A: memref<?x?xf32>)
+ outs(%B : memref<?x?xf32>)
+ return
+ }
+
+ transform.sequence failures(propagate) {
+ ^bb1(%arg1: !transform.any_op):
+ %0 = transform.structured.match ops{["linalg.copy"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1:2 = transform.structured.tile_to_forall_op %0 num_threads [10, 20] (mapping = [ #gpu.thread<y>, #gpu.thread<x> ] )
+ : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+ }
+}
+
+// -----
+
// In this test case, matmul dims and tile size are dynamic.
// CHECK-DAG: #[[$map0:.+]] = affine_map<()[s0, s1] -> (s0 ceildiv s1)>
More information about the Mlir-commits
mailing list