[Mlir-commits] [mlir] 0372db0 - [MLIR] Use nested symbol to identify kernel in `LaunchFuncOp`.

Wed Apr 22 00:45:05 PDT 2020

Author: Frederik Gossen
Date: 2020-04-22T07:44:29Z
New Revision: 0372db05bb1552c2b39fc735f949977e0a863a25

URL: https://github.com/llvm/llvm-project/commit/0372db05bb1552c2b39fc735f949977e0a863a25
DIFF: https://github.com/llvm/llvm-project/commit/0372db05bb1552c2b39fc735f949977e0a863a25.diff

LOG: [MLIR] Use nested symbol to identify kernel in `LaunchFuncOp`.

Summary:
Use a nested symbol to identify the kernel to be invoked by a `LaunchFuncOp` in the GPU dialect.
This replaces the two attributes that were used to identify the kernel module and the kernel within seperately.

Differential Revision: https://reviews.llvm.org/D78551

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/GPU/GPUOps.td
    mlir/include/mlir/IR/SymbolTable.h
    mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
    mlir/lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp
    mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
    mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir
    mlir/test/Conversion/GPUToSPIRV/builtins.mlir
    mlir/test/Conversion/GPUToSPIRV/if.mlir
    mlir/test/Conversion/GPUToSPIRV/load-store.mlir
    mlir/test/Conversion/GPUToSPIRV/loop.mlir
    mlir/test/Conversion/GPUToSPIRV/simple.mlir
    mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir
    mlir/test/Dialect/GPU/invalid.mlir
    mlir/test/Dialect/GPU/ops.mlir
    mlir/test/Dialect/GPU/outlining.mlir
    mlir/test/mlir-vulkan-runner/addf.mlir
    mlir/test/mlir-vulkan-runner/mulf.mlir
    mlir/test/mlir-vulkan-runner/subf.mlir
    mlir/test/mlir-vulkan-runner/time.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td
index 5d91ff6ac545..342b36badd30 100644

--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -334,15 +334,17 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
 
   let extraClassDeclaration = [{
     /// The kernel function specified by the operation's `kernel` attribute.
-    StringRef kernel();
+    SymbolRefAttr kernel();
 
     /// The number of operands passed to the kernel function.
     unsigned getNumKernelOperands();
 
-    /// The name of the kernel module specified by the operation's
-    /// `kernel_module` attribute.
+    /// The name of the kernel's containing module.
     StringRef getKernelModuleName();
 
+    /// The name of the kernel.
+    StringRef getKernelName();
+
     /// The i-th operand passed to the kernel function.
     Value getKernelOperand(unsigned i);
 
@@ -361,12 +363,8 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
     friend LogicalResult GPUDialect::verifyOperationAttribute(Operation *,
                                                               NamedAttribute);
 
-    /// The name of the symbolRef attribute specifying the kernel to launch.
+    /// The name of the symbol reference attribute specifying the kernel to launch.
     static StringRef getKernelAttrName() { return "kernel"; }
-
-    /// The name of the symbolRef attribute specifying the name of the module
-    /// containing the kernel to launch.
-    static StringRef getKernelModuleAttrName() { return "kernel_module"; }
   }];
 
   let verifier = [{ return ::verify(*this); }];

diff  --git a/mlir/include/mlir/IR/SymbolTable.h b/mlir/include/mlir/IR/SymbolTable.h
index 6f5c07ef3a41..c61efb066e39 100644
--- a/mlir/include/mlir/IR/SymbolTable.h
+++ b/mlir/include/mlir/IR/SymbolTable.h
@@ -9,6 +9,7 @@
 #ifndef MLIR_IR_SYMBOLTABLE_H
 #define MLIR_IR_SYMBOLTABLE_H
 
+#include "mlir/IR/Attributes.h"
 #include "mlir/IR/OpDefinition.h"
 #include "llvm/ADT/StringMap.h"
 
@@ -106,6 +107,14 @@ class SymbolTable {
   static Operation *lookupNearestSymbolFrom(Operation *from, StringRef symbol);
   static Operation *lookupNearestSymbolFrom(Operation *from,
                                             SymbolRefAttr symbol);
+  template <typename T>
+  static T lookupNearestSymbolFrom(Operation *from, StringRef symbol) {
+    return dyn_cast_or_null<T>(lookupNearestSymbolFrom(from, symbol));
+  }
+  template <typename T>
+  static T lookupNearestSymbolFrom(Operation *from, SymbolRefAttr symbol) {
+    return dyn_cast_or_null<T>(lookupNearestSymbolFrom(from, symbol));
+  }
 
   /// This class represents a specific symbol use.
   class SymbolUse {
@@ -227,6 +236,13 @@ class SymbolTable : public TraitBase<ConcreteType, SymbolTable> {
   template <typename T> T lookupSymbol(StringRef name) {
     return dyn_cast_or_null<T>(lookupSymbol(name));
   }
+  Operation *lookupSymbol(SymbolRefAttr symbol) {
+    return mlir::SymbolTable::lookupSymbolIn(this->getOperation(), symbol);
+  }
+  template <typename T>
+  T lookupSymbol(SymbolRefAttr symbol) {
+    return dyn_cast_or_null<T>(lookupSymbol(symbol));
+  }
 };
 
 /// A trait used to define a symbol that can be used on operations within a

diff  --git a/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp b/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
index bdd9bb66f617..cfdcb0f98ade 100644
--- a/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
+++ b/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
@@ -273,14 +273,8 @@ Value GpuLaunchFuncToCudaCallsPass::setupParamsArray(gpu::LaunchFuncOp launchOp,
                                                      OpBuilder &builder) {
 
   // Get the launch target.
-  auto containingModule = launchOp.getParentOfType<ModuleOp>();
-  if (!containingModule)
-    return {};
-  auto gpuModule = containingModule.lookupSymbol<gpu::GPUModuleOp>(
-      launchOp.getKernelModuleName());
-  if (!gpuModule)
-    return {};
-  auto gpuFunc = gpuModule.lookupSymbol<LLVM::LLVMFuncOp>(launchOp.kernel());
+  auto gpuFunc = SymbolTable::lookupNearestSymbolFrom<LLVM::LLVMFuncOp>(
+      launchOp, launchOp.kernel());
   if (!gpuFunc)
     return {};
 
@@ -416,8 +410,8 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls(
   // the kernel function.
   auto cuOwningModuleRef =
       builder.create<LLVM::LoadOp>(loc, getPointerType(), cuModule);
-  auto kernelName = generateKernelNameConstant(launchOp.getKernelModuleName(),
-                                               launchOp.kernel(), loc, builder);
+  auto kernelName = generateKernelNameConstant(
+      launchOp.getKernelModuleName(), launchOp.getKernelName(), loc, builder);
   auto cuFunction = allocatePointer(builder, loc);
   auto cuModuleGetFunction =
       getOperation().lookupSymbol<LLVM::LLVMFuncOp>(cuModuleGetFunctionName);

diff  --git a/mlir/lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp b/mlir/lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp
index b33edb92605c..26588049b939 100644
--- a/mlir/lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp
+++ b/mlir/lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp
@@ -182,7 +182,7 @@ void ConvertGpuLaunchFuncToVulkanLaunchFunc::convertGpuLaunchFunc(
   // Set entry point name as an attribute.
   vulkanLaunchCallOp.setAttr(
       kSPIRVEntryPointAttrName,
-      StringAttr::get(launchOp.kernel(), loc->getContext()));
+      StringAttr::get(launchOp.getKernelName(), loc->getContext()));
 
   launchOp.erase();
 }

diff  --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 1474be76c57e..e751107820bc 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -11,8 +11,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/GPU/GPUDialect.h"
+
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/Function.h"
 #include "mlir/IR/FunctionImplementation.h"
@@ -62,10 +64,8 @@ LogicalResult GPUDialect::verifyOperationAttribute(Operation *op,
 
     // Ignore launch ops with missing attributes here. The errors will be
     // reported by the verifiers of those ops.
-    if (!launchOp.getAttrOfType<StringAttr>(
-            LaunchFuncOp::getKernelAttrName()) ||
-        !launchOp.getAttrOfType<SymbolRefAttr>(
-            LaunchFuncOp::getKernelModuleAttrName()))
+    if (!launchOp.getAttrOfType<SymbolRefAttr>(
+            LaunchFuncOp::getKernelAttrName()))
       return success();
 
     // Check that `launch_func` refers to a well-formed GPU kernel module.
@@ -76,13 +76,12 @@ LogicalResult GPUDialect::verifyOperationAttribute(Operation *op,
              << "kernel module '" << kernelModuleName << "' is undefined";
 
     // Check that `launch_func` refers to a well-formed kernel function.
-    StringRef kernelName = launchOp.kernel();
-    Operation *kernelFunc = kernelModule.lookupSymbol(kernelName);
+    Operation *kernelFunc = module.lookupSymbol(launchOp.kernel());
     auto kernelGPUFunction = dyn_cast_or_null<gpu::GPUFuncOp>(kernelFunc);
     auto kernelLLVMFunction = dyn_cast_or_null<LLVM::LLVMFuncOp>(kernelFunc);
     if (!kernelGPUFunction && !kernelLLVMFunction)
       return launchOp.emitOpError("kernel function '")
-             << kernelName << "' is undefined";
+             << launchOp.kernel() << "' is undefined";
     if (!kernelFunc->getAttrOfType<mlir::UnitAttr>(
             GPUDialect::getKernelFuncAttrName()))
       return launchOp.emitOpError("kernel function is missing the '")
@@ -397,11 +396,11 @@ void LaunchFuncOp::build(Builder *builder, OperationState &result,
   result.addOperands(
       {gridSizeX, gridSizeY, gridSizeZ, blockSizeX, blockSizeY, blockSizeZ});
   result.addOperands(kernelOperands);
-  result.addAttribute(getKernelAttrName(),
-                      builder->getStringAttr(kernelFunc.getName()));
   auto kernelModule = kernelFunc.getParentOfType<GPUModuleOp>();
-  result.addAttribute(getKernelModuleAttrName(),
-                      builder->getSymbolRefAttr(kernelModule.getName()));
+  auto kernelSymbol = builder->getSymbolRefAttr(
+      kernelModule.getName(),
+      {builder->getSymbolRefAttr(kernelFunc.getName())});
+  result.addAttribute(getKernelAttrName(), kernelSymbol);
 }
 
 void LaunchFuncOp::build(Builder *builder, OperationState &result,
@@ -411,8 +410,8 @@ void LaunchFuncOp::build(Builder *builder, OperationState &result,
         blockSize.x, blockSize.y, blockSize.z, kernelOperands);
 }
 
-StringRef LaunchFuncOp::kernel() {
-  return getAttrOfType<StringAttr>(getKernelAttrName()).getValue();
+SymbolRefAttr LaunchFuncOp::kernel() {
+  return getAttrOfType<SymbolRefAttr>(getKernelAttrName());
 }
 
 unsigned LaunchFuncOp::getNumKernelOperands() {
@@ -420,10 +419,11 @@ unsigned LaunchFuncOp::getNumKernelOperands() {
 }
 
 StringRef LaunchFuncOp::getKernelModuleName() {
-  return getAttrOfType<SymbolRefAttr>(getKernelModuleAttrName())
-      .getRootReference();
+  return kernel().getRootReference();
 }
 
+StringRef LaunchFuncOp::getKernelName() { return kernel().getLeafReference(); }
+
 Value LaunchFuncOp::getKernelOperand(unsigned i) {
   return getOperation()->getOperand(i + kNumConfigOperands);
 }
@@ -446,16 +446,10 @@ static LogicalResult verify(LaunchFuncOp op) {
         "expected the closest surrounding module to have the '" +
         GPUDialect::getContainerModuleAttrName() + "' attribute");
 
-  auto kernelAttr = op.getAttrOfType<StringAttr>(op.getKernelAttrName());
+  auto kernelAttr = op.getAttrOfType<SymbolRefAttr>(op.getKernelAttrName());
   if (!kernelAttr)
-    return op.emitOpError("string attribute '" + op.getKernelAttrName() +
-                          "' must be specified");
-
-  auto kernelModuleAttr =
-      op.getAttrOfType<SymbolRefAttr>(op.getKernelModuleAttrName());
-  if (!kernelModuleAttr)
     return op.emitOpError("symbol reference attribute '" +
-                          op.getKernelModuleAttrName() + "' must be specified");
+                          op.getKernelAttrName() + "' must be specified");
 
   return success();
 }

diff  --git a/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir b/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir
index 17244ce9f066..20b76a2e3a29 100644
--- a/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir
+++ b/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir
@@ -27,7 +27,7 @@ module attributes {gpu.container_module} {
     // CHECK: llvm.call @mcuGetStreamHelper
     // CHECK: llvm.call @mcuLaunchKernel
     // CHECK: llvm.call @mcuStreamSynchronize
-    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "kernel", kernel_module = @kernel_module }
+    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = @kernel_module::@kernel }
         : (!llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.i64, !llvm.float, !llvm<"float*">) -> ()
 
     llvm.return

diff  --git a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
index 2a73884c8696..84afa22ecae3 100644
--- a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
@@ -3,7 +3,7 @@
 module attributes {gpu.container_module} {
   func @builtin() {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = "builtin_workgroup_id_x", kernel_module = @kernels} : (index, index, index, index, index, index) -> ()
+    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @kernels::@builtin_workgroup_id_x} : (index, index, index, index, index, index) -> ()
     return
   }
 
@@ -26,7 +26,7 @@ module attributes {gpu.container_module} {
 module attributes {gpu.container_module} {
   func @builtin() {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = "builtin_workgroup_id_y", kernel_module = @kernels} : (index, index, index, index, index, index) -> ()
+    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @kernels::@builtin_workgroup_id_y} : (index, index, index, index, index, index) -> ()
     return
   }
 
@@ -49,7 +49,7 @@ module attributes {gpu.container_module} {
 module attributes {gpu.container_module} {
   func @builtin() {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = "builtin_workgroup_id_z", kernel_module = @kernels} : (index, index, index, index, index, index) -> ()
+    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @kernels::@builtin_workgroup_id_z} : (index, index, index, index, index, index) -> ()
     return
   }
 
@@ -72,7 +72,7 @@ module attributes {gpu.container_module} {
 module attributes {gpu.container_module} {
   func @builtin() {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = "builtin_workgroup_size_x", kernel_module = @kernels} : (index, index, index, index, index, index) -> ()
+    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @kernels::@builtin_workgroup_size_x} : (index, index, index, index, index, index) -> ()
     return
   }
 
@@ -96,7 +96,7 @@ module attributes {gpu.container_module} {
 module attributes {gpu.container_module} {
   func @builtin() {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = "builtin_workgroup_size_y", kernel_module = @kernels} : (index, index, index, index, index, index) -> ()
+    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @kernels::@builtin_workgroup_size_y} : (index, index, index, index, index, index) -> ()
     return
   }
 
@@ -117,7 +117,7 @@ module attributes {gpu.container_module} {
 module attributes {gpu.container_module} {
   func @builtin() {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = "builtin_workgroup_size_z", kernel_module = @kernels} : (index, index, index, index, index, index) -> ()
+    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @kernels::@builtin_workgroup_size_z} : (index, index, index, index, index, index) -> ()
     return
   }
 
@@ -138,7 +138,7 @@ module attributes {gpu.container_module} {
 module attributes {gpu.container_module} {
   func @builtin() {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = "builtin_local_id_x", kernel_module = @kernels} : (index, index, index, index, index, index) -> ()
+    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @kernels::@builtin_local_id_x} : (index, index, index, index, index, index) -> ()
     return
   }
 
@@ -161,7 +161,7 @@ module attributes {gpu.container_module} {
 module attributes {gpu.container_module} {
   func @builtin() {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = "builtin_num_workgroups_x", kernel_module = @kernels} : (index, index, index, index, index, index) -> ()
+    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0) {kernel = @kernels::@builtin_num_workgroups_x} : (index, index, index, index, index, index) -> ()
     return
   }
 

diff  --git a/mlir/test/Conversion/GPUToSPIRV/if.mlir b/mlir/test/Conversion/GPUToSPIRV/if.mlir
index 3fefc04fad1a..8a637457884e 100644
--- a/mlir/test/Conversion/GPUToSPIRV/if.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/if.mlir
@@ -9,7 +9,7 @@ module attributes {
 } {
   func @main(%arg0 : memref<10xf32>, %arg1 : i1) {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0, %arg0, %arg1) { kernel = "kernel_simple_selection", kernel_module = @kernels} : (index, index, index, index, index, index, memref<10xf32>, i1) -> ()
+    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0, %arg0, %arg1) { kernel = @kernels::@kernel_simple_selection} : (index, index, index, index, index, index, memref<10xf32>, i1) -> ()
     return
   }
 

diff  --git a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
index acb18e7b16e1..077a1c0c7879 100644
--- a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
@@ -17,7 +17,7 @@ module attributes {
     %1 = subi %c4, %c0_0 : index
     %c1_1 = constant 1 : index
     %c1_2 = constant 1 : index
-    "gpu.launch_func"(%0, %c1_2, %c1_2, %1, %c1_2, %c1_2, %arg0, %arg1, %arg2, %c0, %c0_0, %c1, %c1_1) {kernel = "load_store_kernel", kernel_module = @kernels} : (index, index, index, index, index, index, memref<12x4xf32>, memref<12x4xf32>, memref<12x4xf32>, index, index, index, index) -> ()
+    "gpu.launch_func"(%0, %c1_2, %c1_2, %1, %c1_2, %c1_2, %arg0, %arg1, %arg2, %c0, %c0_0, %c1, %c1_1) {kernel = @kernels::@load_store_kernel} : (index, index, index, index, index, index, memref<12x4xf32>, memref<12x4xf32>, memref<12x4xf32>, index, index, index, index) -> ()
     return
   }
 

diff  --git a/mlir/test/Conversion/GPUToSPIRV/loop.mlir b/mlir/test/Conversion/GPUToSPIRV/loop.mlir
index 6f0b209c8ea0..56bff8a3985b 100644
--- a/mlir/test/Conversion/GPUToSPIRV/loop.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/loop.mlir
@@ -9,7 +9,7 @@ module attributes {
 } {
   func @loop(%arg0 : memref<10xf32>, %arg1 : memref<10xf32>) {
     %c0 = constant 1 : index
-    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0, %arg0, %arg1) { kernel = "loop_kernel", kernel_module = @kernels} : (index, index, index, index, index, index, memref<10xf32>, memref<10xf32>) -> ()
+    "gpu.launch_func"(%c0, %c0, %c0, %c0, %c0, %c0, %arg0, %arg1) { kernel = @kernels::@loop_kernel} : (index, index, index, index, index, index, memref<10xf32>, memref<10xf32>) -> ()
     return
   }
 

diff  --git a/mlir/test/Conversion/GPUToSPIRV/simple.mlir b/mlir/test/Conversion/GPUToSPIRV/simple.mlir
index c657d5f68fab..f68823321568 100644
--- a/mlir/test/Conversion/GPUToSPIRV/simple.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/simple.mlir
@@ -18,7 +18,7 @@ module attributes {gpu.container_module} {
     %0 = "op"() : () -> (f32)
     %1 = "op"() : () -> (memref<12xf32>)
     %cst = constant 1 : index
-    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "basic_module_structure", kernel_module = @kernels }
+    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = @kernels::@basic_module_structure }
         : (index, index, index, index, index, index, f32, memref<12xf32>) -> ()
     return
   }
@@ -39,7 +39,7 @@ module attributes {gpu.container_module} {
     %0 = "op"() : () -> (f32)
     %1 = "op"() : () -> (memref<12xf32>)
     %cst = constant 1 : index
-    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = "missing_entry_point_abi", kernel_module = @kernels }
+    "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1) { kernel = @kernels::@missing_entry_point_abi }
         : (index, index, index, index, index, index, f32, memref<12xf32>) -> ()
     return
   }

diff  --git a/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir b/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir
index 30dc3a478036..726b276010ef 100644
--- a/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir
+++ b/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir
@@ -26,7 +26,7 @@ module attributes {gpu.container_module} {
   func @foo() {
     %0 = alloc() : memref<12xf32>
     %c1 = constant 1 : index
-    "gpu.launch_func"(%c1, %c1, %c1, %c1, %c1, %c1, %0) {kernel = "kernel", kernel_module = @kernels} : (index, index, index, index, index, index, memref<12xf32>) -> ()
+    "gpu.launch_func"(%c1, %c1, %c1, %c1, %c1, %c1, %0) {kernel = @kernels::@kernel} : (index, index, index, index, index, index, memref<12xf32>) -> ()
     return
   }
 }

diff  --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
index 885ad3273d63..be02dec83913 100644
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -54,7 +54,7 @@ func @launch_func_missing_parent_module_attribute(%sz : index) {
 
 module attributes {gpu.container_module} {
   func @launch_func_missing_callee_attribute(%sz : index) {
-    // expected-error at +1 {{string attribute 'kernel' must be specified}}
+    // expected-error at +1 {{symbol reference attribute 'kernel' must be specified}}
     "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) {foo = "bar"}
         : (index, index, index, index, index, index) -> ()
     return
@@ -63,20 +63,9 @@ module attributes {gpu.container_module} {
 
 // -----
 
-module attributes {gpu.container_module} {
-  func @launch_func_missing_module_attribute(%sz : index) {
-    // expected-error at +1 {{attribute 'kernel_module' must be specified}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) {kernel = "launch_func_missing_kernel_attr"}
-        : (index, index, index, index, index, index) -> ()
-    return
-  }
-}
-
-// -----
-
 module attributes {gpu.container_module} {
   func @launch_func_no_function_attribute(%sz : index) {
-    // expected-error at +1 {{string attribute 'kernel' must be specified}}
+    // expected-error at +1 {{symbol reference attribute 'kernel' must be specified}}
     "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) {kernel = 10}
         : (index, index, index, index, index, index) -> ()
     return
@@ -85,23 +74,11 @@ module attributes {gpu.container_module} {
 
 // -----
 
-module attributes {gpu.container_module} {
-  func @launch_func_module_attribute_wrong_type(%sz : index) {
-    // expected-error at +1 {{symbol reference attribute 'kernel_module' must be specified}}
-    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
-    {kernel = "launch_func_module_attribute_wrong_type", kernel_module = 10}
-        : (index, index, index, index, index, index) -> ()
-    return
-  }
-}
-
-// -----
-
 module attributes {gpu.container_module} {
   func @launch_func_undefined_module(%sz : index) {
     // expected-error at +1 {{kernel module 'kernels' is undefined}}
     "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
-    { kernel = "kernel_1", kernel_module = @kernels }
+    { kernel = @kernels::@kernel_1 }
         : (index, index, index, index, index, index) -> ()
     return
   }
@@ -116,7 +93,7 @@ module attributes {gpu.container_module} {
   func @launch_func_missing_module_attribute(%sz : index) {
     // expected-error at +1 {{kernel module 'kernels' is undefined}}
     "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
-    { kernel = "kernel_1", kernel_module = @kernels }
+    { kernel = @kernels::@kernel_1 }
         : (index, index, index, index, index, index) -> ()
     return
   }
@@ -128,9 +105,9 @@ module attributes {gpu.container_module} {
   gpu.module @kernels { }
 
   func @launch_func_undefined_function(%sz : index) {
-    // expected-error at +1 {{kernel function 'kernel_1' is undefined}}
+    // expected-error at +1 {{kernel function '@kernels::@kernel_1' is undefined}}
     "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
-    { kernel = "kernel_1", kernel_module = @kernels }
+    { kernel = @kernels::@kernel_1 }
         : (index, index, index, index, index, index) -> ()
     return
   }
@@ -138,6 +115,24 @@ module attributes {gpu.container_module} {
 
 // -----
 
+module attributes {gpu.container_module} {
+  module @kernels {
+    gpu.func @kernel_1(%arg1 : !llvm<"float*">) kernel {
+      gpu.return
+    }
+  }
+
+  func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm<"float*">) {
+    // expected-error at +1 {{kernel module 'kernels' is undefined}}
+    "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg)
+    {kernel = @kernels::@kernel_1}
+        : (index, index, index, index, index, index, !llvm<"float*">) -> ()
+    return
+  }
+}
+
+// -----
+
 module attributes {gpu.container_module} {
   gpu.module @kernels {
     gpu.func @kernel_1(%arg1 : !llvm<"float*">) {
@@ -148,7 +143,7 @@ module attributes {gpu.container_module} {
   func @launch_func_missing_kernel_attr(%sz : index, %arg : !llvm<"float*">) {
     // expected-error at +1 {{kernel function is missing the 'gpu.kernel' attribute}}
     "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg)
-    {kernel = "kernel_1", kernel_module = @kernels}
+    {kernel = @kernels::@kernel_1}
         : (index, index, index, index, index, index, !llvm<"float*">) -> ()
     return
   }
@@ -166,7 +161,7 @@ module attributes {gpu.container_module} {
   func @launch_func_kernel_operand_size(%sz : index, %arg : !llvm<"float*">) {
     // expected-error at +1 {{got 2 kernel operands but expected 1}}
     "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg, %arg)
-        {kernel = "kernel_1", kernel_module = @kernels}
+        {kernel = @kernels::@kernel_1}
         : (index, index, index, index, index, index, !llvm<"float*">,
            !llvm<"float*">) -> ()
     return
@@ -185,7 +180,7 @@ module attributes {gpu.container_module} {
   func @launch_func_kernel_operand_types(%sz : index, %arg : f32) {
     // expected-err at +1 {{type of function argument 0 does not match}}
     "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz, %arg)
-        {kernel = "kernel_1", kernel_module = @kernels}
+        {kernel = @kernels::@kernel_1}
         : (index, index, index, index, index, index, f32) -> ()
     return
   }

diff  --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index 1cb1b53e077c..f500d7173f71 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -70,14 +70,14 @@ module attributes {gpu.container_module} {
     // CHECK: %{{.*}} = constant 8
     %cst = constant 8 : index
 
-    // CHECK: "gpu.launch_func"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {kernel = "kernel_1", kernel_module = @kernels} : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
+    // CHECK: "gpu.launch_func"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {kernel = @kernels::@kernel_1} : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
     "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1)
-    { kernel = "kernel_1", kernel_module = @kernels }
+    { kernel = @kernels::@kernel_1}
         : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
 
-    // CHECK: "gpu.launch_func"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {kernel = "kernel_2", kernel_module = @kernels} : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
+    // CHECK: "gpu.launch_func"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {kernel = @kernels::@kernel_2} : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
     "gpu.launch_func"(%cst, %cst, %cst, %cst, %cst, %cst, %0, %1)
-    { kernel = "kernel_2", kernel_module = @kernels }
+    { kernel = @kernels::@kernel_2}
         : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
 
     return

diff  --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index 0d8734716a47..d15f10fd75ec 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -21,7 +21,7 @@ func @launch() {
   // CHECK: %[[BDIMZ:.*]] = constant 28
   %bDimZ = constant 28 : index
 
-  // CHECK: "gpu.launch_func"(%[[GDIMX]], %[[GDIMY]], %[[GDIMZ]], %[[BDIMX]], %[[BDIMY]], %[[BDIMZ]], %[[ARG0]], %[[ARG1]]) {kernel = "launch_kernel", kernel_module = @launch_kernel} : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
+  // CHECK: "gpu.launch_func"(%[[GDIMX]], %[[GDIMY]], %[[GDIMZ]], %[[BDIMX]], %[[BDIMY]], %[[BDIMZ]], %[[ARG0]], %[[ARG1]]) {kernel = @launch_kernel::@launch_kernel} : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> ()
   // CHECK-NOT: gpu.launch blocks
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY,
                                        %grid_z = %gDimZ)
@@ -64,14 +64,14 @@ func @launch() {
 func @multiple_launches() {
   // CHECK: %[[CST:.*]] = constant 8 : index
   %cst = constant 8 : index
-  // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]]) {kernel = "multiple_launches_kernel", kernel_module = @multiple_launches_kernel} : (index, index, index, index, index, index) -> ()
+  // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]]) {kernel = @multiple_launches_kernel::@multiple_launches_kernel} : (index, index, index, index, index, index) -> ()
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
                                        %grid_z = %cst)
              threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst,
                                         %block_z = %cst) {
     gpu.terminator
   }
-  // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]]) {kernel = "multiple_launches_kernel", kernel_module = @multiple_launches_kernel_0} : (index, index, index, index, index, index) -> ()
+  // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]]) {kernel = @multiple_launches_kernel_0::@multiple_launches_kernel} : (index, index, index, index, index, index) -> ()
   gpu.launch blocks(%bx2, %by2, %bz2) in (%grid_x2 = %cst, %grid_y2 = %cst,
                                           %grid_z2 = %cst)
              threads(%tx2, %ty2, %tz2) in (%block_x2 = %cst, %block_y2 = %cst,
@@ -93,7 +93,7 @@ func @extra_constants(%arg0 : memref<?xf32>) {
   %cst = constant 8 : index
   %cst2 = constant 2 : index
   %cst3 = dim %arg0, 0 : memref<?xf32>
-  // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %{{.*}}) {kernel = "extra_constants_kernel", kernel_module = @extra_constants_kernel} : (index, index, index, index, index, index, memref<?xf32>) -> ()
+  // CHECK: "gpu.launch_func"(%[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %[[CST]], %{{.*}}) {kernel = @extra_constants_kernel::@extra_constants_kernel} : (index, index, index, index, index, index, memref<?xf32>) -> ()
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
                                        %grid_z = %cst)
              threads(%tx, %ty, %tz) in (%block_x = %cst, %block_y = %cst,

diff  --git a/mlir/test/mlir-vulkan-runner/addf.mlir b/mlir/test/mlir-vulkan-runner/addf.mlir
index 2fb3a94a190b..3ba86ef89119 100644
--- a/mlir/test/mlir-vulkan-runner/addf.mlir
+++ b/mlir/test/mlir-vulkan-runner/addf.mlir
@@ -39,7 +39,7 @@ module attributes {
 
     %cst1 = constant 1 : index
     %cst8 = constant 8 : index
-    "gpu.launch_func"(%cst8, %cst1, %cst1, %cst1, %cst1, %cst1, %arg0, %arg1, %arg2) { kernel = "kernel_add", kernel_module = @kernels }
+    gpu.launch_func"(%cst8, %cst1, %cst1, %cst1, %cst1, %cst1, %arg0, %arg1, %arg2) { kernel = @kernels::@kernel_add }
         : (index, index, index, index, index, index, memref<8xf32>, memref<8xf32>, memref<8xf32>) -> ()
     %arg6 = memref_cast %arg5 : memref<?xf32> to memref<*xf32>
     call @print_memref_f32(%arg6) : (memref<*xf32>) -> ()

diff  --git a/mlir/test/mlir-vulkan-runner/mulf.mlir b/mlir/test/mlir-vulkan-runner/mulf.mlir
index 0da888b6876c..89175e803d98 100644
--- a/mlir/test/mlir-vulkan-runner/mulf.mlir
+++ b/mlir/test/mlir-vulkan-runner/mulf.mlir
@@ -40,7 +40,7 @@ module attributes {
 
     %cst1 = constant 1 : index
     %cst4 = constant 4 : index
-    "gpu.launch_func"(%cst4, %cst4, %cst1, %cst1, %cst1, %cst1, %arg0, %arg1, %arg2) { kernel = "kernel_mul", kernel_module = @kernels }
+    "gpu.launch_func"(%cst4, %cst4, %cst1, %cst1, %cst1, %cst1, %arg0, %arg1, %arg2) { kernel = @kernels::@kernel_mul }
         : (index, index, index, index, index, index, memref<4x4xf32>, memref<4x4xf32>, memref<4x4xf32>) -> ()
     %arg6 = memref_cast %arg5 : memref<?x?xf32> to memref<*xf32>
     call @print_memref_f32(%arg6) : (memref<*xf32>) -> ()

diff  --git a/mlir/test/mlir-vulkan-runner/subf.mlir b/mlir/test/mlir-vulkan-runner/subf.mlir
index c77a14b2ccf5..b41094653d97 100644
--- a/mlir/test/mlir-vulkan-runner/subf.mlir
+++ b/mlir/test/mlir-vulkan-runner/subf.mlir
@@ -42,7 +42,7 @@ module attributes {
     %cst1 = constant 1 : index
     %cst4 = constant 4 : index
     %cst8 = constant 8 : index
-    "gpu.launch_func"(%cst8, %cst4, %cst4, %cst1, %cst1, %cst1, %arg0, %arg1, %arg2) { kernel = "kernel_sub", kernel_module = @kernels }
+    "gpu.launch_func"(%cst8, %cst4, %cst4, %cst1, %cst1, %cst1, %arg0, %arg1, %arg2) { kernel = @kernels::@kernel_sub }
         : (index, index, index, index, index, index, memref<8x4x4xf32>, memref<4x4xf32>, memref<8x4x4xf32>) -> ()
     %arg6 = memref_cast %arg5 : memref<?x?x?xf32> to memref<*xf32>
     call @print_memref_f32(%arg6) : (memref<*xf32>) -> ()

diff  --git a/mlir/test/mlir-vulkan-runner/time.mlir b/mlir/test/mlir-vulkan-runner/time.mlir
index b95452e19f96..ffa8985d488f 100644
--- a/mlir/test/mlir-vulkan-runner/time.mlir
+++ b/mlir/test/mlir-vulkan-runner/time.mlir
@@ -46,7 +46,7 @@ module attributes {
 
     %cst1 = constant 1 : index
     %cst128 = constant 128 : index
-    "gpu.launch_func"(%cst128, %cst1, %cst1, %cst128, %cst1, %cst1, %arg0, %arg1, %arg2) { kernel = "kernel_add", kernel_module = @kernels }
+    "gpu.launch_func"(%cst128, %cst1, %cst1, %cst128, %cst1, %cst1, %arg0, %arg1, %arg2) { kernel = @kernels::@kernel_add }
         : (index, index, index, index, index, index, memref<16384xf32>, memref<16384xf32>, memref<16384xf32>) -> ()
     %arg6 = memref_cast %arg5 : memref<?xf32> to memref<*xf32>
     return