[Mlir-commits] [mlir] [MLIR][XeGPU] Avoid crashing on malformed `gpu.func` in `MoveFuncBodyToWarpOp` (PR #186330)
Akimasa Watanuki
llvmlistbot at llvm.org
Tue Mar 17 04:46:03 PDT 2026
https://github.com/Men-cotton updated https://github.com/llvm/llvm-project/pull/186330
>From 296cfa9a8fc1229f5bffea612211233fcc431093 Mon Sep 17 00:00:00 2001
From: mencotton <mencotton0410 at gmail.com>
Date: Fri, 13 Mar 2026 16:24:25 +0900
Subject: [PATCH 1/2] [MLIR][XeGPU] Avoid crashing on malformed `gpu.func` in
`MoveFuncBodyToWarpOp`
Skip malformed `gpu.func` operations in `MoveFuncBodyToWarpOp`.
This prevents functions without a `gpu.return` terminator from triggering an assertion under `--allow-unregistered-dialect`.
Add a regression test covering a `gpu.func` missing a return terminator.
While touching the test file, trim a few FileCheck lines that were asserting printer details instead of the transform behavior.
---
.../Transforms/XeGPUSubgroupDistribute.cpp | 15 ++++++-----
.../XeGPU/move-gpu-func-to-warp-op.mlir | 27 ++++++++++++-------
2 files changed, 27 insertions(+), 15 deletions(-)
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
index 38bc95d39c2c6..ec6d34912c741 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
@@ -162,6 +162,11 @@ struct MoveFuncBodyToWarpOp : public OpRewritePattern<gpu::GPUFuncOp> {
return isa<gpu::WarpExecuteOnLane0Op>(op);
}))
return failure();
+ gpu::ReturnOp origReturnOp = dyn_cast_if_present<gpu::ReturnOp>(
+ gpuFuncOp.getBlocks().back().getTerminator());
+ if (!origReturnOp)
+ return rewriter.notifyMatchFailure(
+ gpuFuncOp, "expected gpu.func terminator to be gpu.return");
// Create a new function with the same signature and same attributes.
SmallVector<Type> workgroupAttributionsTypes =
llvm::map_to_vector(gpuFuncOp.getWorkgroupAttributions(),
@@ -187,12 +192,10 @@ struct MoveFuncBodyToWarpOp : public OpRewritePattern<gpu::GPUFuncOp> {
newGpuFunc.getArgumentTypes());
Block &warpBodyBlock = warpOp.getBodyRegion().front();
// Replace the ReturnOp of the original gpu function with a YieldOp.
- auto origRetunOp =
- cast<gpu::ReturnOp>(gpuFuncOp.getBlocks().back().getTerminator());
- rewriter.setInsertionPointAfter(origRetunOp);
- gpu::YieldOp::create(rewriter, origRetunOp.getLoc(),
- origRetunOp.getOperands());
- rewriter.eraseOp(origRetunOp);
+ rewriter.setInsertionPointAfter(origReturnOp);
+ gpu::YieldOp::create(rewriter, origReturnOp.getLoc(),
+ origReturnOp.getOperands());
+ rewriter.eraseOp(origReturnOp);
// Move the original function body to the WarpExecuteOnLane0Op body.
rewriter.inlineRegionBefore(gpuFuncOp.getBody(), warpOp.getBodyRegion(),
warpOp.getBodyRegion().begin());
diff --git a/mlir/test/Dialect/XeGPU/move-gpu-func-to-warp-op.mlir b/mlir/test/Dialect/XeGPU/move-gpu-func-to-warp-op.mlir
index 2780212d2917f..bf6f91c0a7985 100644
--- a/mlir/test/Dialect/XeGPU/move-gpu-func-to-warp-op.mlir
+++ b/mlir/test/Dialect/XeGPU/move-gpu-func-to-warp-op.mlir
@@ -6,9 +6,8 @@ gpu.func @empty() {
}
}
-// CHECK-LABEL: gpu.func @empty() {
+// CHECK-LABEL: gpu.func @empty()
// CHECK-NEXT: gpu.return
-// CHECK-NEXT: }
// -----
gpu.module @test {
@@ -27,10 +26,10 @@ gpu.func @gemm(%arg0: memref<8x16xf16>, %arg1: memref<16x16xf16>, %arg2: memref<
// CHECK-LABEL: gpu.func @gemm(
// CHECK: %[[ARG0:[a-zA-Z0-9]+]]: memref<8x16xf16>, %[[ARG1:[a-zA-Z0-9]+]]: memref<16x16xf16>,
-// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: memref<8x16xf32>) {
+// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: memref<8x16xf32>)
// CHECK: %[[LANEID:.*]] = gpu.lane_id
// CHECK-NEXT: gpu.warp_execute_on_lane_0(%[[LANEID]])[16]
-// CHECK-SAME: args(%[[ARG0]], %[[ARG1]], %[[ARG2]] : memref<8x16xf16>, memref<16x16xf16>, memref<8x16xf32>) {
+// CHECK-SAME: args(%[[ARG0]], %[[ARG1]], %[[ARG2]] : memref<8x16xf16>, memref<16x16xf16>, memref<8x16xf32>)
// CHECK: ^bb0(%[[ARG3:[a-zA-Z0-9]+]]: memref<8x16xf16>, %[[ARG4:[a-zA-Z0-9]+]]: memref<16x16xf16>,
// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: memref<8x16xf32>):
// CHECK-NEXT: %[[T1:.*]] = xegpu.create_nd_tdesc %[[ARG3]] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
@@ -40,8 +39,7 @@ gpu.func @gemm(%arg0: memref<8x16xf16>, %arg1: memref<16x16xf16>, %arg2: memref<
// CHECK-NEXT: %[[T5:.*]] = xegpu.dpas %[[T3]], %[[T4]] : vector<8x16xf16>, vector<16x16xf16> -> vector<8x16xf32>
// CHECK-NEXT: %[[T6:.*]] = xegpu.create_nd_tdesc %[[ARG5]] : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32>
// CHECK-NEXT: xegpu.store_nd %[[T5]], %[[T6]][%{{.*}}] : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32>
-// CHECK-NEXT: }
-// CHECK-NEXT: gpu.return
+// CHECK: gpu.return
// -----
gpu.module @test {
@@ -55,9 +53,20 @@ gpu.func @already_in_warp_op() {
}
}
-// CHECK-LABEL: gpu.func @already_in_warp_op() {
+// CHECK-LABEL: gpu.func @already_in_warp_op()
// CHECK: %[[LANEID:.*]] = gpu.lane_id
-// CHECK: gpu.warp_execute_on_lane_0(%[[LANEID]])[16] {
+// CHECK: gpu.warp_execute_on_lane_0(%[[LANEID]])[16]
// CHECK: "some_op"() : () -> ()
-// CHECK: }
// CHECK: gpu.return
+
+// -----
+gpu.module @test {
+"gpu.func"() ({
+^bb0:
+ "test.unknown"() : () -> ()
+}) {function_type = () -> (), kernel, sym_name = "missing_return_terminator"} : () -> ()
+}
+
+// Regression test for MoveFuncBodyToWarpOp on malformed generic gpu.func.
+// CHECK-LABEL: gpu.func @missing_return_terminator
+// CHECK-NEXT: "test.unknown"() : () -> ()
>From e40acc2a5f4c0d2ba003f8a80a55de4d555baea7 Mon Sep 17 00:00:00 2001
From: mencotton <mencotton0410 at gmail.com>
Date: Tue, 17 Mar 2026 20:38:49 +0900
Subject: [PATCH 2/2] fix: drop unnecessary `-allow-unregistered-dialect`
---
mlir/test/Dialect/XeGPU/move-gpu-func-to-warp-op.mlir | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/mlir/test/Dialect/XeGPU/move-gpu-func-to-warp-op.mlir b/mlir/test/Dialect/XeGPU/move-gpu-func-to-warp-op.mlir
index bf6f91c0a7985..57a26fa4d7a7a 100644
--- a/mlir/test/Dialect/XeGPU/move-gpu-func-to-warp-op.mlir
+++ b/mlir/test/Dialect/XeGPU/move-gpu-func-to-warp-op.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -xevm-attach-target='chip=pvc' -test-xegpu-move-func-to-warp-op -split-input-file --allow-unregistered-dialect %s | FileCheck %s
+// RUN: mlir-opt -xevm-attach-target='chip=pvc' -test-xegpu-move-func-to-warp-op -split-input-file %s | FileCheck %s
gpu.module @test {
gpu.func @empty() {
@@ -46,7 +46,7 @@ gpu.module @test {
gpu.func @already_in_warp_op() {
%laneid = gpu.lane_id
gpu.warp_execute_on_lane_0(%laneid)[16] {
- "some_op"() : () -> ()
+ "test.unknown"() : () -> ()
gpu.yield
}
gpu.return
@@ -56,7 +56,7 @@ gpu.func @already_in_warp_op() {
// CHECK-LABEL: gpu.func @already_in_warp_op()
// CHECK: %[[LANEID:.*]] = gpu.lane_id
// CHECK: gpu.warp_execute_on_lane_0(%[[LANEID]])[16]
-// CHECK: "some_op"() : () -> ()
+// CHECK: "test.unknown"() : () -> ()
// CHECK: gpu.return
// -----
More information about the Mlir-commits
mailing list