[Mlir-commits] [mlir] [MLIR][GPU] Reject nested symbol references in gpu-kernel-outlining (PR #188994)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Fri Mar 27 06:32:22 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-mlir-gpu

Author: Mehdi Amini (joker-eph)

<details>
<summary>Changes</summary>

Nested symbol references (e.g. `@<!-- -->module::@<!-- -->func`) inside a `gpu.launch` body cannot be resolved after the body is outlined into a new `gpu.module`. Previously, `createKernelModule` used `getLeafReference()` to look up each symbol use, which silently skipped nested references when the leaf name could not be found in the parent symbol table. This left unresolvable cross-module references in the outlined kernel.

This patch detects nested symbol references whose root exists in the parent symbol table — meaning the reference was valid before outlining but will become dangling after it — and emits a diagnostic error. Phantom references whose root does not exist in the parent are left as-is, preserving existing behavior for unregistered-op attributes (regression test from #<!-- -->185357).

The existing `@<!-- -->nested_launch` test was inadvertently testing this broken behavior (silently producing invalid IR with a dangling `@<!-- -->nested_launch_kernel::@<!-- -->nested_launch_kernel` reference inside the outlined outer kernel module); it is updated to expect the new error.

Fixes #<!-- -->187942

Assisted-by: Claude Code

---
Full diff: https://github.com/llvm/llvm-project/pull/188994.diff


2 Files Affected:

- (modified) mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp (+25-5) 
- (modified) mlir/test/Dialect/GPU/outlining.mlir (+32-12) 


``````````diff
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 0a2268d81b46d..b9529b0d067f2 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -375,8 +375,11 @@ class GpuKernelOutliningPass
         // Create nested module and insert outlinedFunc. The module will
         // originally get the same name as the function, but may be renamed on
         // insertion into the parent module.
-        auto kernelModule = createKernelModule(op, outlinedFunc, symbolTable);
-        symbolTable.insert(kernelModule, insertPt);
+        FailureOr<gpu::GPUModuleOp> kernelModule =
+            createKernelModule(op, outlinedFunc, symbolTable);
+        if (failed(kernelModule))
+          return WalkResult::interrupt();
+        symbolTable.insert(*kernelModule, insertPt);
 
         // Potentially changes signature, pulling in constants.
         convertToLaunchFuncOp(op, outlinedFunc, operands.getArrayRef());
@@ -396,9 +399,9 @@ class GpuKernelOutliningPass
 
 private:
   /// Returns a gpu.module containing kernelFunc and all callees (recursive).
-  gpu::GPUModuleOp createKernelModule(gpu::LaunchOp gpuLaunchOp,
-                                      gpu::GPUFuncOp kernelFunc,
-                                      const SymbolTable &parentSymbolTable) {
+  FailureOr<gpu::GPUModuleOp>
+  createKernelModule(gpu::LaunchOp gpuLaunchOp, gpu::GPUFuncOp kernelFunc,
+                     const SymbolTable &parentSymbolTable) {
     // TODO: This code cannot use an OpBuilder because it must be inserted into
     // a SymbolTable by the caller. SymbolTable needs to be refactored to
     // prevent manual building of Ops with symbols in code using SymbolTables
@@ -435,6 +438,23 @@ class GpuKernelOutliningPass
       if (std::optional<SymbolTable::UseRange> symbolUses =
               SymbolTable::getSymbolUses(symbolDefWorklist.pop_back_val())) {
         for (SymbolTable::SymbolUse symbolUse : *symbolUses) {
+          // Nested symbol references (e.g. @M::@F) cannot be resolved inside
+          // the kernel module when @M exists in the parent: @M will not be
+          // available inside the outlined module after the transformation.
+          // Ignore references whose root does not exist in the parent, as those
+          // are phantom references (e.g. in unregistered-op attributes) that
+          // were already unresolvable and are simply copied as-is.
+          if (!symbolUse.getSymbolRef().getNestedReferences().empty() &&
+              parentSymbolTable.lookup(
+                  symbolUse.getSymbolRef().getRootReference())) {
+            symbolUse.getUser()->emitError("nested symbol reference '")
+                << symbolUse.getSymbolRef()
+                << "' cannot be resolved inside the outlined kernel module; "
+                   "gpu-kernel-outlining does not support cross-module symbol "
+                   "references inside gpu.launch bodies";
+            kernelModule->erase();
+            return failure();
+          }
           StringAttr symbolName = symbolUse.getSymbolRef().getLeafReference();
           if (symbolTable.lookup(symbolName))
             continue;
diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index cf5b73349f23c..25220dff7a5bb 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt -allow-unregistered-dialect -gpu-launch-sink-index-computations -gpu-kernel-outlining -split-input-file -verify-diagnostics %s | FileCheck %s
-// RUN: mlir-opt -allow-unregistered-dialect -gpu-launch-sink-index-computations -gpu-kernel-outlining=data-layout-str='#dlti.dl_spec<#dlti.dl_entry<index,32:i32>>' -split-input-file %s | FileCheck --check-prefix CHECK-DL %s
+// RUN: mlir-opt -allow-unregistered-dialect -gpu-launch-sink-index-computations -gpu-kernel-outlining=data-layout-str='#dlti.dl_spec<#dlti.dl_entry<index,32:i32>>' -split-input-file -verify-diagnostics %s | FileCheck --check-prefix CHECK-DL %s
 
 // CHECK: module attributes {gpu.container_module}
 
@@ -662,20 +662,15 @@ module attributes {gpu.container_module} {
 
 // -----
 
-// This test tests nested `gpu.launch`.
-
-// CHECK-LABEL: func.func @nested_launch(
-//  CHECK-SAME:                          %[[ARG0:.*]]: index) {
-//       CHECK:   gpu.launch_func  @nested_launch_kernel_0::@nested_launch_kernel blocks in (%[[ARG0]], %[[ARG0]], %[[ARG0]]) threads in (%[[ARG0]], %[[ARG0]], %[[ARG0]])  args(%[[ARG0]] : index)
-//       CHECK: gpu.module @nested_launch_kernel
-//       CHECK:   gpu.func @nested_launch_kernel() kernel
-//       CHECK:     "some_op"
-//       CHECK: gpu.module @nested_launch_kernel_0
-//       CHECK:   gpu.func @nested_launch_kernel(%[[VAL_0:.*]]: index) kernel
-//       CHECK:     gpu.launch_func  @nested_launch_kernel::@nested_launch_kernel blocks in (%[[VAL_0]], %[[VAL_0]], %[[VAL_0]]) threads in (%[[VAL_0]], %[[VAL_0]], %[[VAL_0]])
+// Nested gpu.launch ops are not supported: the inner gpu.launch is outlined
+// first, producing a gpu.launch_func with a nested symbol reference inside the
+// outer launch body. That nested reference cannot be resolved in the outlined
+// outer kernel module.
+
 func.func @nested_launch(%sz : index) {
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz)
              threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz) {
+    // expected-error @below {{nested symbol reference '@nested_launch_kernel::@nested_launch_kernel' cannot be resolved inside the outlined kernel module}}
     gpu.launch blocks(%bx1, %by1, %bz1) in (%grid_x1 = %sz, %grid_y1 = %sz, %grid_z1 = %sz)
                threads(%tx1, %ty1, %tz1) in (%block_x1 = %sz, %block_y1 = %sz, %block_z1 = %sz) {
       "some_op"(%bx1, %tx1) : (index, index) -> ()
@@ -685,3 +680,28 @@ func.func @nested_launch(%sz : index) {
   }
   return
 }
+
+// -----
+
+// Nested cross-module symbol references inside gpu.launch bodies are rejected.
+// (https://github.com/llvm/llvm-project/issues/187942)
+
+module attributes {gpu.container_module} {
+  gpu.module @km {
+    gpu.func @k() kernel {
+      gpu.return
+    }
+  }
+
+  func.func @cross_module_nested_ref(%sz: index) {
+    gpu.launch blocks(%bx, %by, %bz) in (%gx = %sz, %gy = %sz, %gz = %sz)
+               threads(%tx, %ty, %tz) in (%bxs = %sz, %bys = %sz, %bzs = %sz) {
+      // expected-error @below {{nested symbol reference '@km::@k' cannot be resolved inside the outlined kernel module}}
+      gpu.launch_func @km::@k
+        blocks in (%sz, %sz, %sz)
+        threads in (%sz, %sz, %sz)
+      gpu.terminator
+    }
+    return
+  }
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/188994


More information about the Mlir-commits mailing list