[Mlir-commits] [mlir] f709642 - [mlir][GPU] Add `RecursiveMemoryEffects` to `gpu.launch` (#75315)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Dec 19 22:25:29 PST 2023
Author: Matthias Springer
Date: 2023-12-20T15:25:25+09:00
New Revision: f7096428b4b0f8e9dd6ac1292eda385152de9ae5
URL: https://github.com/llvm/llvm-project/commit/f7096428b4b0f8e9dd6ac1292eda385152de9ae5
DIFF: https://github.com/llvm/llvm-project/commit/f7096428b4b0f8e9dd6ac1292eda385152de9ae5.diff
LOG: [mlir][GPU] Add `RecursiveMemoryEffects` to `gpu.launch` (#75315)
Infer the side effects of `gpu.launch` from its body.
Added:
Modified:
mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
mlir/test/Dialect/GPU/canonicalize.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 2e21cd77d2d83b..c72fde2ab351df 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -672,7 +672,8 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
def GPU_LaunchOp : GPU_Op<"launch", [
AutomaticAllocationScope, AttrSizedOperandSegments, GPU_AsyncOpInterface,
- DeclareOpInterfaceMethods<InferIntRangeInterface>]>,
+ DeclareOpInterfaceMethods<InferIntRangeInterface>,
+ RecursiveMemoryEffects]>,
Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
diff --git a/mlir/test/Dialect/GPU/canonicalize.mlir b/mlir/test/Dialect/GPU/canonicalize.mlir
index c2abb96d7d4fb8..372dd78790276c 100644
--- a/mlir/test/Dialect/GPU/canonicalize.mlir
+++ b/mlir/test/Dialect/GPU/canonicalize.mlir
@@ -11,6 +11,8 @@ func.func @fold_wait_op_test1() {
}
// CHECK-NOT: gpu.wait
+// -----
+
// Erase duplicate barriers.
// CHECK-LABEL: func @erase_barriers
// CHECK-NEXT: gpu.barrier
@@ -21,6 +23,8 @@ func.func @erase_barriers() {
return
}
+// -----
+
// Replace uses of gpu.wait op with its async dependency.
// CHECK-LABEL: func @fold_wait_op_test2
func.func @fold_wait_op_test2(%arg0: i1) -> (memref<5xf16>, memref<5xf16>) {
@@ -38,6 +42,8 @@ func.func @fold_wait_op_test2(%arg0: i1) -> (memref<5xf16>, memref<5xf16>) {
// CHECK-NEXT: gpu.alloc async [%[[TOKEN1]]] ()
// CHECK-NEXT: return
+// -----
+
// CHECK-LABEL: func @fold_memcpy_op
func.func @fold_memcpy_op(%arg0: i1) {
%cst = arith.constant 0.000000e+00 : f16
@@ -60,6 +66,8 @@ func.func @fold_memcpy_op(%arg0: i1) {
}
// CHECK-NOT: gpu.memcpy
+// -----
+
// We cannot fold memcpy here as dest is a block argument.
// CHECK-LABEL: func @do_not_fold_memcpy_op1
func.func @do_not_fold_memcpy_op1(%arg0: i1, %arg1: memref<2xf16>) {
@@ -75,6 +83,8 @@ func.func @do_not_fold_memcpy_op1(%arg0: i1, %arg1: memref<2xf16>) {
}
// CHECK: gpu.memcpy
+// -----
+
// We cannot fold gpu.memcpy as it is used by an op having read effect on dest.
// CHECK-LABEL: func @do_not_fold_memcpy_op2
func.func @do_not_fold_memcpy_op2(%arg0: i1, %arg1: index) -> f16 {
@@ -92,6 +102,8 @@ func.func @do_not_fold_memcpy_op2(%arg0: i1, %arg1: index) -> f16 {
}
// CHECK: gpu.memcpy
+// -----
+
// We cannot fold gpu.memcpy, as the defining op if dest is not a alloc like op.
// CHECK-LABEL: func @do_not_fold_memcpy_op3
func.func @do_not_fold_memcpy_op3(%arg0: memref<1xi8>, %arg1: memref<i1>) {
@@ -102,6 +114,8 @@ func.func @do_not_fold_memcpy_op3(%arg0: memref<1xi8>, %arg1: memref<i1>) {
}
// CHECK: gpu.memcpy
+// -----
+
// CHECK-LABEL: @memcpy_after_cast
func.func @memcpy_after_cast(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
// CHECK-NOT: memref.cast
@@ -112,6 +126,8 @@ func.func @memcpy_after_cast(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
return
}
+// -----
+
// CHECK-LABEL: @memset_after_cast
func.func @memset_after_cast(%arg0: memref<10xf32>, %arg1: f32) {
// CHECK-NOT: memref.cast
@@ -227,3 +243,20 @@ func.func @make_subgroup_reduce_uniform() {
}
return
}
+
+// -----
+
+// The GPU kernel does not have any side effecting ops, so the entire
+// gpu.launch op can fold away.
+
+// CHECK-LABEL: func @gpu_launch_without_side_effects
+// CHECK-NOT: gpu.launch
+func.func @gpu_launch_without_side_effects() {
+ %0:6 = "test.test1"() : () -> (index, index, index, index, index, index)
+ gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %0#0, %arg7 = %0#1, %arg8 = %0#2)
+ threads(%arg3, %arg4, %arg5) in (%arg9 = %0#3, %arg10 = %0#4, %arg11 = %0#5) {
+ %1 = arith.addi %arg0, %arg1 : index
+ gpu.terminator
+ }
+ return
+}
More information about the Mlir-commits
mailing list