[Mlir-commits] [mlir] 9a3d3c7 - generalize pass gpu-kernel-outlining for symbol op (#72074)

Sun Nov 12 21:48:53 PST 2023

Author: drazi
Date: 2023-11-12T21:48:49-08:00
New Revision: 9a3d3c7093d3e834f2f414bafad41b921e38eec3

URL: https://github.com/llvm/llvm-project/commit/9a3d3c7093d3e834f2f414bafad41b921e38eec3
DIFF: https://github.com/llvm/llvm-project/commit/9a3d3c7093d3e834f2f414bafad41b921e38eec3.diff

LOG: generalize pass gpu-kernel-outlining for symbol op (#72074)

This PR generalize gpu-out-lining pass to take care of ops
`SymbolOpInterface` instead of just `func::FuncOp`.

Before this change, gpu-out-lining pass will skip `llvm.func`.
```mlir
module {
  llvm.func @main() {
    %c1 = arith.constant 1 : index
    gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c1, %arg7 = %c1, %arg8 = %c1) threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) {
      gpu.terminator
    }
    llvm.return
  }
}
```

After this change, gpu-out-lining pass can handle llvm.func as well.

Added: 
    

Modified: 
    mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
    mlir/test/Dialect/GPU/outlining.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index b1e2f914db4cb9b..7432a58f18b4422 100644

--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -349,13 +349,13 @@ class GpuKernelOutliningPass
   void runOnOperation() override {
     SymbolTable symbolTable(getOperation());
     bool modified = false;
-    for (auto func : getOperation().getOps<func::FuncOp>()) {
+    for (auto func : getOperation().getOps<SymbolOpInterface>()) {
       // Insert just after the function.
       Block::iterator insertPt(func->getNextNode());
       auto funcWalkResult = func.walk([&](gpu::LaunchOp op) {
         SetVector<Value> operands;
         std::string kernelFnName =
-            Twine(op->getParentOfType<func::FuncOp>().getName(), "_kernel")
+            Twine(op->getParentOfType<SymbolOpInterface>().getName(), "_kernel")
                 .str();
 
         gpu::GPUFuncOp outlinedFunc =

diff  --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index 28c121a550100c2..8020f6dfa65b745 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -37,7 +37,6 @@ func.func @launch() {
 }
 
 // CHECK-DL-LABEL: gpu.module @launch_kernel attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}
-
 // CHECK-LABEL: gpu.module @launch_kernel
 // CHECK-NEXT: gpu.func @launch_kernel
 // CHECK-SAME: (%[[KERNEL_ARG0:.*]]: f32, %[[KERNEL_ARG1:.*]]: memref<?xf32, 1>)
@@ -63,6 +62,42 @@ func.func @launch() {
 
 // -----
 
+// This test checks gpu-out-lining can handle gpu.launch kernel from an llvm.func
+// CHECK-LABEL: @launch_from_llvm_func
+llvm.func @launch_from_llvm_func() {
+  // CHECK: %[[ARG0:.*]] = "op"() : () -> f32
+  %0 = "op"() : () -> (f32)
+  // CHECK: %[[ARG1:.*]] = "op"() : () -> memref<?xf32, 1>
+  %1 = "op"() : () -> (memref<?xf32, 1>)
+
+  // CHECK: %[[DIM:.*]] = arith.constant 1
+  %dim = arith.constant 1 : index
+
+  // CHECK: gpu.launch_func @launch_from_llvm_func_kernel::@launch_from_llvm_func_kernel
+  // CHECK-SAME: (%[[DIM]], %[[DIM]], %[[DIM]])
+  // CHECK-SAME: (%[[DIM]], %[[DIM]], %[[DIM]]) args(%[[ARG0]] : f32, %[[ARG1]] : memref<?xf32, 1>)
+  // CHECK-NEXT: llvm.return
+
+  // CHECK: gpu.func {{.*}} kernel attributes
+  // CHECK-SAME: gpu.known_block_size = array<i32: 1, 1, 1>
+  // CHECK-SAME: gpu.known_grid_size = array<i32: 1, 1, 1>
+  // CHECK: gpu.return
+  gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %dim, %grid_y = %dim,
+                                       %grid_z = %dim)
+             threads(%tx, %ty, %tz) in (%block_x = %dim, %block_y = %dim,
+                                        %block_z = %dim) {
+    "use"(%0): (f32) -> ()
+    "some_op"(%bx, %block_x) : (index, index) -> ()
+    %2 = memref.load %1[%tx] : memref<?xf32, 1>
+    gpu.terminator
+  }
+  llvm.return
+}
+
+// CHECK-DL-LABLE: gpu.module @launch_from_llvm_func_kernel attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<index, 32 : i32>>}
+
+// -----
+
 // CHECK: module attributes {gpu.container_module}
 // CHECK-LABEL: @multiple_launches
 func.func @multiple_launches() {