[Mlir-commits] [mlir] 1c1803d - [mlir][gpu] Add customer printer/parser for gpu.launch_func.

Wed Oct 21 09:19:13 PDT 2020

Author: Christian Sigg
Date: 2020-10-21T18:19:00+02:00
New Revision: 1c1803dbb0f6a4a9bef557d98b4f3a498eb1bf75

URL: https://github.com/llvm/llvm-project/commit/1c1803dbb0f6a4a9bef557d98b4f3a498eb1bf75
DIFF: https://github.com/llvm/llvm-project/commit/1c1803dbb0f6a4a9bef557d98b4f3a498eb1bf75.diff

LOG: [mlir][gpu] Add customer printer/parser for gpu.launch_func.

Reviewed By: herhut

Differential Revision: https://reviews.llvm.org/D89262

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/GPU/GPUOps.td
    mlir/include/mlir/IR/FunctionImplementation.h
    mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
    mlir/lib/IR/FunctionImplementation.cpp
    mlir/test/Dialect/GPU/invalid.mlir
    mlir/test/Dialect/GPU/ops.mlir
    mlir/test/Dialect/GPU/outlining.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td
index 1542241b5b02..180e9cf67d22 100644

--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -18,10 +18,6 @@ include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
 include "mlir/IR/SymbolInterfaces.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 
-// Type constraint accepting standard integers, indices.
-def IntOrIndex : TypeConstraint<
-  Or<[AnySignlessInteger.predicate, Index.predicate]>, "integer or index">;
-
 //===----------------------------------------------------------------------===//
 // GPU Dialect operations.
 //===----------------------------------------------------------------------===//
@@ -296,9 +292,9 @@ def GPU_GPUFuncOp : GPU_Op<"func", [HasParent<"GPUModuleOp">,
 }
 
 def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
-    Arguments<(ins IntOrIndex:$gridSizeX, IntOrIndex:$gridSizeY,
-               IntOrIndex:$gridSizeZ, IntOrIndex:$blockSizeX,
-               IntOrIndex:$blockSizeY, IntOrIndex:$blockSizeZ,
+    Arguments<(ins SymbolRefAttr:$kernel,
+               Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
+               Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
                Variadic<AnyType>:$operands)>,
     Results<(outs)> {
   let summary = "Launches a function as a GPU kernel";
@@ -312,8 +308,8 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
     function is required to be a gpu.module. And finally, the module containing
     the kernel module (which thus cannot be the top-level module) is required
     to have the `gpu.container_module` attribute. The `gpu.launch_func`
-    operation has a symbol attribute named `kernel` to identify the fully specified
-    kernel function to launch (both the gpu.module and func).
+    operation has a symbol attribute named `kernel` to identify the fully 
+    specified kernel function to launch (both the gpu.module and func).
 
     The operation takes at least six operands, with the first three operands
     being grid sizes along x,y,z dimensions and the following three being block
@@ -321,8 +317,6 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
     unused sizes must be explicitly set to `1`. The remaining operands are
     passed as arguments to the kernel function.
 
-    A custom syntax for this operation is currently not available.
-
     Example:
 
     ```mlir
@@ -357,13 +351,11 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
         }
       }
 
-      "gpu.launch_func"(%cst, %cst, %cst,  // Grid sizes.
-                        %cst, %cst, %cst,  // Block sizes.
-                        %arg0, %arg1)      // Arguments passed to the kernel.
-            { kernel_module = @kernels,    // Module containing the kernel.
-              kernel = "kernel_1" }        // Kernel function.
-            : (index, index, index, index, index, index, f32, memref<?xf32, 1>)
-              -> ()
+      gpu.launch_func
+          @kernels::@kernel_1                          // Kernel function.
+          blocks in (%cst, %cst, %cst)                 // Grid size.
+          threads in (%cst, %cst, %cst)                // Block size.
+          args(%arg0 : f32, %arg1 : memref<?xf32, 1>)  // Kernel arguments.
     }
     ```
   }];
@@ -371,19 +363,12 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
   let skipDefaultBuilders = 1;
 
   let builders = [
-    OpBuilder<"GPUFuncOp kernelFunc, "
-              "Value gridSizeX, Value gridSizeY, Value gridSizeZ, "
-              "Value blockSizeX, Value blockSizeY, Value blockSizeZ, "
-              "ValueRange kernelOperands">,
     OpBuilder<"GPUFuncOp kernelFunc, "
               "KernelDim3 gridSize, KernelDim3 blockSize, "
               "ValueRange kernelOperands">
   ];
 
   let extraClassDeclaration = [{
-    /// The kernel function specified by the operation's `kernel` attribute.
-    SymbolRefAttr kernel();
-
     /// The number of operands passed to the kernel function.
     unsigned getNumKernelOperands();
 
@@ -416,6 +401,13 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
   }];
 
   let verifier = [{ return ::verify(*this); }];
+  let assemblyFormat = [{
+      $kernel
+      `blocks` `in` ` ` `(`$gridSizeX`,` $gridSizeY`,` $gridSizeZ`)`
+      `threads` `in` ` ` `(`$blockSizeX`,` $blockSizeY`,` $blockSizeZ`)`
+      custom<LaunchFuncOperands>($operands, type($operands))
+      attr-dict
+  }];
 }
 
 def GPU_LaunchOp : GPU_Op<"launch">,

diff  --git a/mlir/include/mlir/IR/FunctionImplementation.h b/mlir/include/mlir/IR/FunctionImplementation.h
index 958cba51f6dc..c19100c55219 100644
--- a/mlir/include/mlir/IR/FunctionImplementation.h
+++ b/mlir/include/mlir/IR/FunctionImplementation.h
@@ -49,6 +49,16 @@ void addArgAndResultAttrs(Builder &builder, OperationState &result,
 using FuncTypeBuilder = function_ref<Type(
     Builder &, ArrayRef<Type>, ArrayRef<Type>, VariadicFlag, std::string &)>;
 
+/// Parses function arguments using `parser`. The `allowVariadic` argument
+/// indicates whether functions with variadic arguments are supported. The
+/// trailing arguments are populated by this function with names, types and
+/// attributes of the arguments.
+ParseResult parseFunctionArgumentList(
+    OpAsmParser &parser, bool allowAttributes, bool allowVariadic,
+    SmallVectorImpl<OpAsmParser::OperandType> &argNames,
+    SmallVectorImpl<Type> &argTypes, SmallVectorImpl<NamedAttrList> &argAttrs,
+    bool &isVariadic);
+
 /// Parses a function signature using `parser`. The `allowVariadic` argument
 /// indicates whether functions with variadic arguments are supported. The
 /// trailing arguments are populated by this function with names, types and

diff  --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 79fe969dbe17..7abefd7a5499 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -428,12 +428,11 @@ static ParseResult parseLaunchOp(OpAsmParser &parser, OperationState &result) {
 //===----------------------------------------------------------------------===//
 
 void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
-                         GPUFuncOp kernelFunc, Value gridSizeX, Value gridSizeY,
-                         Value gridSizeZ, Value blockSizeX, Value blockSizeY,
-                         Value blockSizeZ, ValueRange kernelOperands) {
+                         GPUFuncOp kernelFunc, KernelDim3 gridSize,
+                         KernelDim3 blockSize, ValueRange kernelOperands) {
   // Add grid and block sizes as op operands, followed by the data operands.
-  result.addOperands(
-      {gridSizeX, gridSizeY, gridSizeZ, blockSizeX, blockSizeY, blockSizeZ});
+  result.addOperands({gridSize.x, gridSize.y, gridSize.z, blockSize.x,
+                      blockSize.y, blockSize.z});
   result.addOperands(kernelOperands);
   auto kernelModule = kernelFunc.getParentOfType<GPUModuleOp>();
   auto kernelSymbol = builder.getSymbolRefAttr(
@@ -441,17 +440,6 @@ void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
   result.addAttribute(getKernelAttrName(), kernelSymbol);
 }
 
-void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
-                         GPUFuncOp kernelFunc, KernelDim3 gridSize,
-                         KernelDim3 blockSize, ValueRange kernelOperands) {
-  build(builder, result, kernelFunc, gridSize.x, gridSize.y, gridSize.z,
-        blockSize.x, blockSize.y, blockSize.z, kernelOperands);
-}
-
-SymbolRefAttr LaunchFuncOp::kernel() {
-  return getAttrOfType<SymbolRefAttr>(getKernelAttrName());
-}
-
 unsigned LaunchFuncOp::getNumKernelOperands() {
   return getNumOperands() - kNumConfigOperands;
 }
@@ -492,6 +480,33 @@ static LogicalResult verify(LaunchFuncOp op) {
   return success();
 }
 
+static ParseResult
+parseLaunchFuncOperands(OpAsmParser &parser,
+                        SmallVectorImpl<OpAsmParser::OperandType> &argNames,
+                        SmallVectorImpl<Type> &argTypes) {
+  if (parser.parseOptionalKeyword("args"))
+    return success();
+  SmallVector<NamedAttrList, 4> argAttrs;
+  bool isVariadic = false;
+  return impl::parseFunctionArgumentList(parser, /*allowAttributes=*/false,
+                                         /*allowVariadic=*/false, argNames,
+                                         argTypes, argAttrs, isVariadic);
+}
+
+static void printLaunchFuncOperands(OpAsmPrinter &printer,
+                                    OperandRange operands, TypeRange types) {
+  if (operands.empty())
+    return;
+  printer << "args(";
+  llvm::interleaveComma(llvm::zip(operands, types), printer,
+                        [&](const auto &pair) {
+                          printer.printOperand(std::get<0>(pair));
+                          printer << " : ";
+                          printer.printType(std::get<1>(pair));
+                        });
+  printer << ")";
+}
+
 //===----------------------------------------------------------------------===//
 // GPUFuncOp
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/lib/IR/FunctionImplementation.cpp b/mlir/lib/IR/FunctionImplementation.cpp
index 13aee344bbdc..56b2221fd44f 100644
--- a/mlir/lib/IR/FunctionImplementation.cpp
+++ b/mlir/lib/IR/FunctionImplementation.cpp
@@ -13,11 +13,11 @@
 
 using namespace mlir;
 
-static ParseResult
-parseArgumentList(OpAsmParser &parser, bool allowVariadic,
-                  SmallVectorImpl<Type> &argTypes,
-                  SmallVectorImpl<OpAsmParser::OperandType> &argNames,
-                  SmallVectorImpl<NamedAttrList> &argAttrs, bool &isVariadic) {
+ParseResult mlir::impl::parseFunctionArgumentList(
+    OpAsmParser &parser, bool allowAttributes, bool allowVariadic,
+    SmallVectorImpl<OpAsmParser::OperandType> &argNames,
+    SmallVectorImpl<Type> &argTypes, SmallVectorImpl<NamedAttrList> &argAttrs,
+    bool &isVariadic) {
   if (parser.parseLParen())
     return failure();
 
@@ -56,6 +56,8 @@ parseArgumentList(OpAsmParser &parser, bool allowVariadic,
     NamedAttrList attrs;
     if (parser.parseOptionalAttrDict(attrs))
       return failure();
+    if (!allowAttributes && !attrs.empty())
+      return parser.emitError(loc, "expected arguments without attributes");
     argAttrs.push_back(attrs);
     return success();
   };
@@ -129,8 +131,9 @@ ParseResult mlir::impl::parseFunctionSignature(
     SmallVectorImpl<Type> &argTypes, SmallVectorImpl<NamedAttrList> &argAttrs,
     bool &isVariadic, SmallVectorImpl<Type> &resultTypes,
     SmallVectorImpl<NamedAttrList> &resultAttrs) {
-  if (parseArgumentList(parser, allowVariadic, argTypes, argNames, argAttrs,
-                        isVariadic))
+  bool allowArgAttrs = true;
+  if (parseFunctionArgumentList(parser, allowArgAttrs, allowVariadic, argNames,
+                                argTypes, argAttrs, isVariadic))
     return failure();
   if (succeeded(parser.parseOptionalArrow()))
     return parseFunctionResultList(parser, resultTypes, resultAttrs);

diff  --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
index cfdb06ac5702..3612b8e0dcc1 100644
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -45,8 +45,7 @@ func @launch_func_too_few_operands(%sz : index) {
 
 func @launch_func_missing_parent_module_attribute(%sz : index) {
   // expected-error at +1 {{expected the closest surrounding module to have the 'gpu.container_module' attribute}}
-  "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) {foo = "bar"}
-      : (index, index, index, index, index, index) -> ()
+  gpu.launch_func @foo::@bar blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
   return
 }
 
@@ -54,8 +53,8 @@ func @launch_func_missing_parent_module_attribute(%sz : index) {
 
 module attributes {gpu.container_module} {
   func @launch_func_missing_callee_attribute(%sz : index) {
-    // expected-error at +1 {{symbol reference attribute 'kernel' must be specified}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) {foo = "bar"}
+    // expected-error at +1 {{'gpu.launch_func' op requires attribute 'kernel'}}
+    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
         : (index, index, index, index, index, index) -> ()
     return
   }
@@ -65,9 +64,8 @@ module attributes {gpu.container_module} {
 
 module attributes {gpu.container_module} {
   func @launch_func_no_function_attribute(%sz : index) {
-    // expected-error at +1 {{symbol reference attribute 'kernel' must be specified}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) {kernel = 10}
-        : (index, index, index, index, index, index) -> ()
+    // expected-error at +1 {{custom op 'gpu.launch_func' invalid kind of attribute specified}}
+    gpu.launch_func "foo" blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
     return
   }
 }
@@ -77,9 +75,7 @@ module attributes {gpu.container_module} {
 module attributes {gpu.container_module} {
   func @launch_func_undefined_module(%sz : index) {
     // expected-error at +1 {{kernel module 'kernels' is undefined}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
-    { kernel = @kernels::@kernel_1 }
-        : (index, index, index, index, index, index) -> ()
+    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
     return
   }
 }
@@ -103,9 +99,7 @@ module attributes {gpu.container_module} {
 
   func @launch_func_missing_module_attribute(%sz : index) {
     // expected-error at +1 {{kernel module 'kernels' is undefined}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
-    { kernel = @kernels::@kernel_1 }
-        : (index, index, index, index, index, index) -> ()
+    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
     return
   }
 }
@@ -117,9 +111,7 @@ module attributes {gpu.container_module} {
 
   func @launch_func_undefined_function(%sz : index) {
     // expected-error at +1 {{kernel function '@kernels::@kernel_1' is undefined}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
-    { kernel = @kernels::@kernel_1 }
-        : (index, index, index, index, index, index) -> ()
+    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
     return
   }
 }
@@ -135,9 +127,7 @@ module attributes {gpu.container_module} {
 
   func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm.ptr<float>) {
     // expected-error at +1 {{kernel module 'kernels' is undefined}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg)
-    {kernel = @kernels::@kernel_1}
-        : (index, index, index, index, index, index, !llvm.ptr<float>) -> ()
+    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : !llvm.ptr<float>)
     return
   }
 }
@@ -153,9 +143,7 @@ module attributes {gpu.container_module} {
 
   func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm.ptr<float>) {
     // expected-error at +1 {{kernel function is missing the 'gpu.kernel' attribute}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg)
-    {kernel = @kernels::@kernel_1}
-        : (index, index, index, index, index, index, !llvm.ptr<float>) -> ()
+    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : !llvm.ptr<float>)
     return
   }
 }
@@ -171,10 +159,7 @@ module attributes {gpu.container_module} {
 
   func @launch_func_kernel_operand_size(%sz : index, %arg : !llvm.ptr<float>) {
     // expected-error at +1 {{got 2 kernel operands but expected 1}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg, %arg)
-        {kernel = @kernels::@kernel_1}
-        : (index, index, index, index, index, index, !llvm.ptr<float>,
-           !llvm.ptr<float>) -> ()
+    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : !llvm.ptr<float>, %arg : !llvm.ptr<float>)
     return
   }
 }
@@ -190,9 +175,17 @@ module attributes {gpu.container_module} {
 
   func @launch_func_kernel_operand_types(%sz : index, %arg : f32) {
     // expected-err at +1 {{type of function argument 0 does not match}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg)
-        {kernel = @kernels::@kernel_1}
-        : (index, index, index, index, index, index, f32) -> ()
+    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%arg : f32)
+    return
+  }
+}
+
+// -----
+
+module attributes {gpu.container_module} {
+  func @launch_func_kernel_operand_attr(%sz : index) {
+    // expected-error at +1 {{expected arguments without attributes}}
+    gpu.launch_func @foo::@bar blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) args(%sz : index {foo})
     return
   }
 }

diff  --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index 23cd6d5c7d0a..e81b233abfbc 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -63,7 +63,7 @@ module attributes {gpu.container_module} {
       gpu.return
     }
 
-    gpu.func @kernel_2(%arg0: f32, %arg1: memref<?xf32, 1>) kernel {
+    gpu.func @kernel_2() kernel {
       gpu.return
     }
   }
@@ -74,15 +74,11 @@ module attributes {gpu.container_module} {
     // CHECK: %{{.*}} = constant 8
     %cst = constant 8 : index
 
-    // CHECK: "gpu.launch_func"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {kernel = @kernels::@kernel_1} : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
-    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1)
-    { kernel = @kernels::@kernel_1}
-        : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
+    // CHECK: gpu.launch_func @kernels::@kernel_1 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) args(%{{.*}} : f32, %{{.*}} : memref<?xf32, 1>)
+    gpu.launch_func @kernels::@kernel_1 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst) args(%0 : f32, %1 : memref<?xf32, 1>)
 
-    // CHECK: "gpu.launch_func"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {kernel = @kernels::@kernel_2} : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
-    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1)
-    { kernel = @kernels::@kernel_2}
-        : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
+    // CHECK: gpu.launch_func @kernels::@kernel_2 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}})
+    gpu.launch_func @kernels::@kernel_2 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst)
 
     return
   }

diff  --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index 5fd8b6ce79cd..e2f16fe96a08 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -21,7 +21,7 @@ func @launch() {
   // CHECK: %[[BDIMZ:.*]] = constant 28
   %bDimZ = constant 28 : index
 
-  // CHECK: "gpu.launch_func"(%[[GDIMX]], %[[GDIMY]], %[[GDIMZ]], %[[BDIMX]], %[[BDIMY]], %[[BDIMZ]], %[[ARG0]], %[[ARG1]]) {kernel = @launch_kernel::@launch_kernel} : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
+  // CHECK: gpu.launch_func @launch_kernel::@launch_kernel blocks in (%[[GDIMX]], %[[GDIMY]], %[[GDIMZ]]) threads in (%[[BDIMX]], %[[BDIMY]], %[[BDIMZ]]) args(%[[ARG0]] : f32, %[[ARG1]] : memref<?xf32, 1>)
   // CHECK-NOT: gpu.launch blocks
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY,
                                        %grid_z = %gDimZ)
@@ -64,14 +64,14 @@ func @launch() {
 func @multiple_launches() {
   // CHECK: %[[CST:.*]] = constant 8 : index
   %cst = constant 8 : index
-  // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]]) {kernel = @multiple_launches_kernel::@multiple_launches_kernel} : (index, index, index, index, index, index) -> ()
+  // CHECK: gpu.launch_func @multiple_launches_kernel::@multiple_launches_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]])
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
                                        %grid_z = %cst)
              threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst,
                                         %block_z = %cst) {
     gpu.terminator
   }
-  // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]]) {kernel = @multiple_launches_kernel_0::@multiple_launches_kernel} : (index, index, index, index, index, index) -> ()
+  // CHECK: gpu.launch_func @multiple_launches_kernel_0::@multiple_launches_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]])
   gpu.launch blocks(%bx2, %by2, %bz2) in (%grid_x2 = %cst, %grid_y2 = %cst,
                                           %grid_z2 = %cst)
              threads(%tx2, %ty2, %tz2) in (%block_x2 = %cst, %block_y2 = %cst,
@@ -95,7 +95,7 @@ func @extra_constants_not_inlined(%arg0: memref<?xf32>) {
   %cst2 = constant 2 : index
   %c0 = constant 0 : index
   %cst3 = "secret_constant"() : () -> index
-  // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %{{.*}}, %{{.*}}) {kernel = @extra_constants_not_inlined_kernel::@extra_constants_not_inlined_kernel} : (index, index, index, index, index, index, memref<?xf32>, index) -> ()
+  // CHECK: gpu.launch_func @extra_constants_not_inlined_kernel::@extra_constants_not_inlined_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]]) args({{.*}} : memref<?xf32>, {{.*}} : index)
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
                                        %grid_z = %cst)
              threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst,
@@ -119,7 +119,7 @@ func @extra_constants(%arg0: memref<?xf32>) {
   %cst2 = constant 2 : index
   %c0 = constant 0 : index
   %cst3 = dim %arg0, %c0 : memref<?xf32>
-  // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[ARG0]]) {kernel = @extra_constants_kernel::@extra_constants_kernel} : (index, index, index, index, index, index, memref<?xf32>) -> ()
+  // CHECK: gpu.launch_func @extra_constants_kernel::@extra_constants_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]]) args(%[[ARG0]] : memref<?xf32>)
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
                                        %grid_z = %cst)
              threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst,
@@ -130,7 +130,7 @@ func @extra_constants(%arg0: memref<?xf32>) {
   return
 }
 
-// CHECK-LABEL: func @extra_constants_kernel
+// CHECK-LABEL: func @extra_constants_kernel(
 // CHECK-SAME: %[[KARG0:.*]]: memref<?xf32>
 // CHECK: constant 2
 // CHECK: constant 0
@@ -147,7 +147,7 @@ func @extra_constants_noarg(%arg0: memref<?xf32>, %arg1: memref<?xf32>) {
   %c0 = constant 0 : index
   // CHECK: dim %[[ARG1]]
   %cst3 = dim %arg1, %c0 : memref<?xf32>
-  // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[ARG0]], %{{.*}}) {kernel = @extra_constants_noarg_kernel::@extra_constants_noarg_kernel} : (index, index, index, index, index, index, memref<?xf32>, index) -> ()
+  // CHECK: gpu.launch_func @extra_constants_noarg_kernel::@extra_constants_noarg_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]]) args(%[[ARG0]] : memref<?xf32>, {{.*}} : index)
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
                                        %grid_z = %cst)
              threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst,
@@ -158,7 +158,7 @@ func @extra_constants_noarg(%arg0: memref<?xf32>, %arg1: memref<?xf32>) {
   return
 }
 
-// CHECK-LABEL: func @extra_constants_noarg_kernel
+// CHECK-LABEL: func @extra_constants_noarg_kernel(
 // CHECK-SAME: %[[KARG0:.*]]: memref<?xf32>, %[[KARG1:.*]]: index
 // CHECK: %[[KCST:.*]] = constant 2
 // CHECK: "use"(%[[KCST]], %[[KARG0]], %[[KARG1]])