[Mlir-commits] [mlir] [MLIR][GPU] Reject nested symbol references in gpu-kernel-outlining (PR #188994)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Fri Mar 27 06:32:22 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-gpu
Author: Mehdi Amini (joker-eph)
<details>
<summary>Changes</summary>
Nested symbol references (e.g. `@<!-- -->module::@<!-- -->func`) inside a `gpu.launch` body cannot be resolved after the body is outlined into a new `gpu.module`. Previously, `createKernelModule` used `getLeafReference()` to look up each symbol use, which silently skipped nested references when the leaf name could not be found in the parent symbol table. This left unresolvable cross-module references in the outlined kernel.
This patch detects nested symbol references whose root exists in the parent symbol table — meaning the reference was valid before outlining but will become dangling after it — and emits a diagnostic error. Phantom references whose root does not exist in the parent are left as-is, preserving existing behavior for unregistered-op attributes (regression test from #<!-- -->185357).
The existing `@<!-- -->nested_launch` test was inadvertently testing this broken behavior (silently producing invalid IR with a dangling `@<!-- -->nested_launch_kernel::@<!-- -->nested_launch_kernel` reference inside the outlined outer kernel module); it is updated to expect the new error.
Fixes #<!-- -->187942
Assisted-by: Claude Code
---
Full diff: https://github.com/llvm/llvm-project/pull/188994.diff
2 Files Affected:
- (modified) mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp (+25-5)
- (modified) mlir/test/Dialect/GPU/outlining.mlir (+32-12)
``````````diff
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 0a2268d81b46d..b9529b0d067f2 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -375,8 +375,11 @@ class GpuKernelOutliningPass
// Create nested module and insert outlinedFunc. The module will
// originally get the same name as the function, but may be renamed on
// insertion into the parent module.
- auto kernelModule = createKernelModule(op, outlinedFunc, symbolTable);
- symbolTable.insert(kernelModule, insertPt);
+ FailureOr<gpu::GPUModuleOp> kernelModule =
+ createKernelModule(op, outlinedFunc, symbolTable);
+ if (failed(kernelModule))
+ return WalkResult::interrupt();
+ symbolTable.insert(*kernelModule, insertPt);
// Potentially changes signature, pulling in constants.
convertToLaunchFuncOp(op, outlinedFunc, operands.getArrayRef());
@@ -396,9 +399,9 @@ class GpuKernelOutliningPass
private:
/// Returns a gpu.module containing kernelFunc and all callees (recursive).
- gpu::GPUModuleOp createKernelModule(gpu::LaunchOp gpuLaunchOp,
- gpu::GPUFuncOp kernelFunc,
- const SymbolTable &parentSymbolTable) {
+ FailureOr<gpu::GPUModuleOp>
+ createKernelModule(gpu::LaunchOp gpuLaunchOp, gpu::GPUFuncOp kernelFunc,
+ const SymbolTable &parentSymbolTable) {
// TODO: This code cannot use an OpBuilder because it must be inserted into
// a SymbolTable by the caller. SymbolTable needs to be refactored to
// prevent manual building of Ops with symbols in code using SymbolTables
@@ -435,6 +438,23 @@ class GpuKernelOutliningPass
if (std::optional<SymbolTable::UseRange> symbolUses =
SymbolTable::getSymbolUses(symbolDefWorklist.pop_back_val())) {
for (SymbolTable::SymbolUse symbolUse : *symbolUses) {
+ // Nested symbol references (e.g. @M::@F) cannot be resolved inside
+ // the kernel module when @M exists in the parent: @M will not be
+ // available inside the outlined module after the transformation.
+ // Ignore references whose root does not exist in the parent, as those
+ // are phantom references (e.g. in unregistered-op attributes) that
+ // were already unresolvable and are simply copied as-is.
+ if (!symbolUse.getSymbolRef().getNestedReferences().empty() &&
+ parentSymbolTable.lookup(
+ symbolUse.getSymbolRef().getRootReference())) {
+ symbolUse.getUser()->emitError("nested symbol reference '")
+ << symbolUse.getSymbolRef()
+ << "' cannot be resolved inside the outlined kernel module; "
+ "gpu-kernel-outlining does not support cross-module symbol "
+ "references inside gpu.launch bodies";
+ kernelModule->erase();
+ return failure();
+ }
StringAttr symbolName = symbolUse.getSymbolRef().getLeafReference();
if (symbolTable.lookup(symbolName))
continue;
diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index cf5b73349f23c..25220dff7a5bb 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -1,5 +1,5 @@
// RUN: mlir-opt -allow-unregistered-dialect -gpu-launch-sink-index-computations -gpu-kernel-outlining -split-input-file -verify-diagnostics %s | FileCheck %s
-// RUN: mlir-opt -allow-unregistered-dialect -gpu-launch-sink-index-computations -gpu-kernel-outlining=data-layout-str='#dlti.dl_spec<#dlti.dl_entry<index,32:i32>>' -split-input-file %s | FileCheck --check-prefix CHECK-DL %s
+// RUN: mlir-opt -allow-unregistered-dialect -gpu-launch-sink-index-computations -gpu-kernel-outlining=data-layout-str='#dlti.dl_spec<#dlti.dl_entry<index,32:i32>>' -split-input-file -verify-diagnostics %s | FileCheck --check-prefix CHECK-DL %s
// CHECK: module attributes {gpu.container_module}
@@ -662,20 +662,15 @@ module attributes {gpu.container_module} {
// -----
-// This test tests nested `gpu.launch`.
-
-// CHECK-LABEL: func.func @nested_launch(
-// CHECK-SAME: %[[ARG0:.*]]: index) {
-// CHECK: gpu.launch_func @nested_launch_kernel_0::@nested_launch_kernel blocks in (%[[ARG0]], %[[ARG0]], %[[ARG0]]) threads in (%[[ARG0]], %[[ARG0]], %[[ARG0]]) args(%[[ARG0]] : index)
-// CHECK: gpu.module @nested_launch_kernel
-// CHECK: gpu.func @nested_launch_kernel() kernel
-// CHECK: "some_op"
-// CHECK: gpu.module @nested_launch_kernel_0
-// CHECK: gpu.func @nested_launch_kernel(%[[VAL_0:.*]]: index) kernel
-// CHECK: gpu.launch_func @nested_launch_kernel::@nested_launch_kernel blocks in (%[[VAL_0]], %[[VAL_0]], %[[VAL_0]]) threads in (%[[VAL_0]], %[[VAL_0]], %[[VAL_0]])
+// Nested gpu.launch ops are not supported: the inner gpu.launch is outlined
+// first, producing a gpu.launch_func with a nested symbol reference inside the
+// outer launch body. That nested reference cannot be resolved in the outlined
+// outer kernel module.
+
func.func @nested_launch(%sz : index) {
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz)
threads(%tx, %ty, %tz) in (%block_x = %sz, %block_y = %sz, %block_z = %sz) {
+ // expected-error @below {{nested symbol reference '@nested_launch_kernel::@nested_launch_kernel' cannot be resolved inside the outlined kernel module}}
gpu.launch blocks(%bx1, %by1, %bz1) in (%grid_x1 = %sz, %grid_y1 = %sz, %grid_z1 = %sz)
threads(%tx1, %ty1, %tz1) in (%block_x1 = %sz, %block_y1 = %sz, %block_z1 = %sz) {
"some_op"(%bx1, %tx1) : (index, index) -> ()
@@ -685,3 +680,28 @@ func.func @nested_launch(%sz : index) {
}
return
}
+
+// -----
+
+// Nested cross-module symbol references inside gpu.launch bodies are rejected.
+// (https://github.com/llvm/llvm-project/issues/187942)
+
+module attributes {gpu.container_module} {
+ gpu.module @km {
+ gpu.func @k() kernel {
+ gpu.return
+ }
+ }
+
+ func.func @cross_module_nested_ref(%sz: index) {
+ gpu.launch blocks(%bx, %by, %bz) in (%gx = %sz, %gy = %sz, %gz = %sz)
+ threads(%tx, %ty, %tz) in (%bxs = %sz, %bys = %sz, %bzs = %sz) {
+ // expected-error @below {{nested symbol reference '@km::@k' cannot be resolved inside the outlined kernel module}}
+ gpu.launch_func @km::@k
+ blocks in (%sz, %sz, %sz)
+ threads in (%sz, %sz, %sz)
+ gpu.terminator
+ }
+ return
+ }
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/188994
More information about the Mlir-commits
mailing list