[Mlir-commits] [mlir] 3303578 - [MLIR][GPU] Reject nested symbol references in gpu-kernel-outlining (#188994)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Fri Mar 27 07:00:15 PDT 2026


Author: Mehdi Amini
Date: 2026-03-27T15:00:09+01:00
New Revision: 33035782349d250d054eaee8172ad629b5bc1d66

URL: https://github.com/llvm/llvm-project/commit/33035782349d250d054eaee8172ad629b5bc1d66
DIFF: https://github.com/llvm/llvm-project/commit/33035782349d250d054eaee8172ad629b5bc1d66.diff

LOG: [MLIR][GPU] Reject nested symbol references in gpu-kernel-outlining (#188994)

Nested symbol references (e.g. `@module::@func`) inside a `gpu.launch`
body cannot be resolved after the body is outlined into a new
`gpu.module`. Previously, `createKernelModule` used `getLeafReference()`
to look up each symbol use, which silently skipped nested references
when the leaf name could not be found in the parent symbol table. This
left unresolvable cross-module references in the outlined kernel.

This patch detects nested symbol references whose root exists in the
parent symbol table — meaning the reference was valid before outlining
but will become dangling after it — and emits a diagnostic error.
Phantom references whose root does not exist in the parent are left
as-is, preserving existing behavior for unregistered-op attributes
(regression test from #185357).

The existing `@nested_launch` test was inadvertently testing this broken
behavior (silently producing invalid IR with a dangling
`@nested_launch_kernel::@nested_launch_kernel` reference inside the
outlined outer kernel module); it is updated to expect the new error.

Fixes #187942

Assisted-by: Claude Code

Added: 
    

Modified: 
    mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
    mlir/test/Dialect/GPU/outlining.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 0a2268d81b46d..b9529b0d067f2 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -375,8 +375,11 @@ class GpuKernelOutliningPass
         // Create nested module and insert outlinedFunc. The module will
         // originally get the same name as the function, but may be renamed on
         // insertion into the parent module.
-        auto kernelModule = createKernelModule(op, outlinedFunc, symbolTable);
-        symbolTable.insert(kernelModule, insertPt);
+        FailureOr<gpu::GPUModuleOp> kernelModule =
+            createKernelModule(op, outlinedFunc, symbolTable);
+        if (failed(kernelModule))
+          return WalkResult::interrupt();
+        symbolTable.insert(*kernelModule, insertPt);
 
         // Potentially changes signature, pulling in constants.
         convertToLaunchFuncOp(op, outlinedFunc, operands.getArrayRef());
@@ -396,9 +399,9 @@ class GpuKernelOutliningPass
 
 private:
   /// Returns a gpu.module containing kernelFunc and all callees (recursive).
-  gpu::GPUModuleOp createKernelModule(gpu::LaunchOp gpuLaunchOp,
-                                      gpu::GPUFuncOp kernelFunc,
-                                      const SymbolTable &parentSymbolTable) {
+  FailureOr<gpu::GPUModuleOp>
+  createKernelModule(gpu::LaunchOp gpuLaunchOp, gpu::GPUFuncOp kernelFunc,
+                     const SymbolTable &parentSymbolTable) {
     // TODO: This code cannot use an OpBuilder because it must be inserted into
     // a SymbolTable by the caller. SymbolTable needs to be refactored to
     // prevent manual building of Ops with symbols in code using SymbolTables
@@ -435,6 +438,23 @@ class GpuKernelOutliningPass
       if (std::optional<SymbolTable::UseRange> symbolUses =
               SymbolTable::getSymbolUses(symbolDefWorklist.pop_back_val())) {
         for (SymbolTable::SymbolUse symbolUse : *symbolUses) {
+          // Nested symbol references (e.g. @M::@F) cannot be resolved inside
+          // the kernel module when @M exists in the parent: @M will not be
+          // available inside the outlined module after the transformation.
+          // Ignore references whose root does not exist in the parent, as those
+          // are phantom references (e.g. in unregistered-op attributes) that
+          // were already unresolvable and are simply copied as-is.
+          if (!symbolUse.getSymbolRef().getNestedReferences().empty() &&
+              parentSymbolTable.lookup(
+                  symbolUse.getSymbolRef().getRootReference())) {
+            symbolUse.getUser()->emitError("nested symbol reference '")
+                << symbolUse.getSymbolRef()
+                << "' cannot be resolved inside the outlined kernel module; "
+                   "gpu-kernel-outlining does not support cross-module symbol "
+                   "references inside gpu.launch bodies";
+            kernelModule->erase();
+            return failure();
+          }
           StringAttr symbolName = symbolUse.getSymbolRef().getLeafReference();
           if (symbolTable.lookup(symbolName))
             continue;

diff  --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index cf5b73349f23c..25220dff7a5bb 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt -allow-unregistered-dialect -gpu-launch-sink-index-computations -gpu-kernel-outlining -split-input-file -verify-diagnostics %s | FileCheck %s
-// RUN: mlir-opt -allow-unregistered-dialect -gpu-launch-sink-index-computations -gpu-kernel-outlining=data-layout-str='#dlti.dl_spec<#dlti.dl_entry<index,32:i32>>' -split-input-file %s | FileCheck --check-prefix CHECK-DL %s
+// RUN: mlir-opt -allow-unregistered-dialect -gpu-launch-sink-index-computations -gpu-kernel-outlining=data-layout-str='#dlti.dl_spec<#dlti.dl_entry<index,32:i32>>' -split-input-file -verify-diagnostics %s | FileCheck --check-prefix CHECK-DL %s
 
 // CHECK: module attributes {gpu.container_module}
 
@@ -662,20 +662,15 @@ module attributes {gpu.container_module} {
 
 // -----
 
-// This test tests nested `gpu.launch`.
-
-// CHECK-LABEL: func.func @nested_launch(
-//  CHECK-SAME:                          %[[ARG0:.*]]: index) {
-//       CHECK:   gpu.launch_func  @nested_launch_kernel_0::@nested_launch_kernel blocks in (%[[ARG0]], %[[ARG0]], %[[ARG0]]) threads in (%[[ARG0]], %[[ARG0]], %[[ARG0]])  args(%[[ARG0]] : index)
-//       CHECK: gpu.module @nested_launch_kernel
-//       CHECK:   gpu.func @nested_launch_kernel() kernel
-//       CHECK:     "some_op"
-//       CHECK: gpu.module @nested_launch_kernel_0
-//       CHECK:   gpu.func @nested_launch_kernel(%[[VAL_0:.*]]: index) kernel
-//       CHECK:     gpu.launch_func  @nested_launch_kernel::@nested_launch_kernel blocks in (%[[VAL_0]], %[[VAL_0]], %[[VAL_0]]) threads in (%[[VAL_0]], %[[VAL_0]], %[[VAL_0]])
+// Nested gpu.launch ops are not supported: the inner gpu.launch is outlined
+// first, producing a gpu.launch_func with a nested symbol reference inside the
+// outer launch body. That nested reference cannot be resolved in the outlined
+// outer kernel module.
+
 func.func @nested_launch(%sz : index) {
   gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz)
              threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz) {
+    // expected-error @below {{nested symbol reference '@nested_launch_kernel::@nested_launch_kernel' cannot be resolved inside the outlined kernel module}}
     gpu.launch blocks(%bx1, %by1, %bz1) in (%grid_x1 = %sz, %grid_y1 = %sz, %grid_z1 = %sz)
                threads(%tx1, %ty1, %tz1) in (%block_x1 = %sz, %block_y1 = %sz, %block_z1 = %sz) {
       "some_op"(%bx1, %tx1) : (index, index) -> ()
@@ -685,3 +680,28 @@ func.func @nested_launch(%sz : index) {
   }
   return
 }
+
+// -----
+
+// Nested cross-module symbol references inside gpu.launch bodies are rejected.
+// (https://github.com/llvm/llvm-project/issues/187942)
+
+module attributes {gpu.container_module} {
+  gpu.module @km {
+    gpu.func @k() kernel {
+      gpu.return
+    }
+  }
+
+  func.func @cross_module_nested_ref(%sz: index) {
+    gpu.launch blocks(%bx, %by, %bz) in (%gx = %sz, %gy = %sz, %gz = %sz)
+               threads(%tx, %ty, %tz) in (%bxs = %sz, %bys = %sz, %bzs = %sz) {
+      // expected-error @below {{nested symbol reference '@km::@k' cannot be resolved inside the outlined kernel module}}
+      gpu.launch_func @km::@k
+        blocks in (%sz, %sz, %sz)
+        threads in (%sz, %sz, %sz)
+      gpu.terminator
+    }
+    return
+  }
+}


        


More information about the Mlir-commits mailing list