[Mlir-commits] [mlir] b5ef9e2 - [MLIR][XeGPU] Avoid crashing on `gpu.func` missing `gpu.return` (#186330)

Wed Mar 18 08:25:19 PDT 2026

Author: Akimasa Watanuki
Date: 2026-03-19T00:25:13+09:00
New Revision: b5ef9e29c5fe12590e1f966c4af35f98185a175f

URL: https://github.com/llvm/llvm-project/commit/b5ef9e29c5fe12590e1f966c4af35f98185a175f
DIFF: https://github.com/llvm/llvm-project/commit/b5ef9e29c5fe12590e1f966c4af35f98185a175f.diff

LOG: [MLIR][XeGPU] Avoid crashing on `gpu.func` missing `gpu.return` (#186330)

Skip malformed `gpu.func` operations in `MoveFuncBodyToWarpOp`.
This prevents functions without a `gpu.return` terminator from
triggering an assertion.

Add a regression test covering a `gpu.func` missing a return terminator,
and switch the existing unknown-op test to `test.unknown` so the file no
longer needs `--allow-unregistered-dialect`.
While touching the test file, trim a few FileCheck lines that were
asserting printer details instead of the transform behavior.

Fix https://github.com/llvm/llvm-project/issues/186037

Added: 
    

Modified: 
    mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
    mlir/test/Dialect/XeGPU/move-gpu-func-to-warp-op.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
index 7e8ad733fa0ee..fdfe48c6cdc12 100644

--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
@@ -162,6 +162,11 @@ struct MoveFuncBodyToWarpOp : public OpRewritePattern<gpu::GPUFuncOp> {
           return isa<gpu::WarpExecuteOnLane0Op>(op);
         }))
       return failure();
+    gpu::ReturnOp origReturnOp = dyn_cast_if_present<gpu::ReturnOp>(
+        gpuFuncOp.getBlocks().back().getTerminator());
+    if (!origReturnOp)
+      return rewriter.notifyMatchFailure(
+          gpuFuncOp, "expected gpu.func terminator to be gpu.return");
     // Create a new function with the same signature and same attributes.
     SmallVector<Type> workgroupAttributionsTypes =
         llvm::map_to_vector(gpuFuncOp.getWorkgroupAttributions(),
@@ -187,12 +192,10 @@ struct MoveFuncBodyToWarpOp : public OpRewritePattern<gpu::GPUFuncOp> {
         newGpuFunc.getArgumentTypes());
     Block &warpBodyBlock = warpOp.getBodyRegion().front();
     // Replace the ReturnOp of the original gpu function with a YieldOp.
-    auto origRetunOp =
-        cast<gpu::ReturnOp>(gpuFuncOp.getBlocks().back().getTerminator());
-    rewriter.setInsertionPointAfter(origRetunOp);
-    gpu::YieldOp::create(rewriter, origRetunOp.getLoc(),
-                         origRetunOp.getOperands());
-    rewriter.eraseOp(origRetunOp);
+    rewriter.setInsertionPointAfter(origReturnOp);
+    gpu::YieldOp::create(rewriter, origReturnOp.getLoc(),
+                         origReturnOp.getOperands());
+    rewriter.eraseOp(origReturnOp);
     // Move the original function body to the WarpExecuteOnLane0Op body.
     rewriter.inlineRegionBefore(gpuFuncOp.getBody(), warpOp.getBodyRegion(),
                                 warpOp.getBodyRegion().begin());

diff  --git a/mlir/test/Dialect/XeGPU/move-gpu-func-to-warp-op.mlir b/mlir/test/Dialect/XeGPU/move-gpu-func-to-warp-op.mlir
index 2780212d2917f..57a26fa4d7a7a 100644
--- a/mlir/test/Dialect/XeGPU/move-gpu-func-to-warp-op.mlir
+++ b/mlir/test/Dialect/XeGPU/move-gpu-func-to-warp-op.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -xevm-attach-target='chip=pvc' -test-xegpu-move-func-to-warp-op -split-input-file --allow-unregistered-dialect %s | FileCheck %s
+// RUN: mlir-opt -xevm-attach-target='chip=pvc' -test-xegpu-move-func-to-warp-op -split-input-file %s | FileCheck %s
 
 gpu.module @test {
 gpu.func @empty()  {
@@ -6,9 +6,8 @@ gpu.func @empty()  {
 }
 }
 
-// CHECK-LABEL: gpu.func @empty() {
+// CHECK-LABEL: gpu.func @empty()
 // CHECK-NEXT:      gpu.return
-// CHECK-NEXT:  }
 
 // -----
 gpu.module @test {
@@ -27,10 +26,10 @@ gpu.func @gemm(%arg0: memref<8x16xf16>, %arg1: memref<16x16xf16>, %arg2: memref<
 
 // CHECK-LABEL: gpu.func @gemm(
 // CHECK:         %[[ARG0:[a-zA-Z0-9]+]]: memref<8x16xf16>, %[[ARG1:[a-zA-Z0-9]+]]: memref<16x16xf16>,
-// CHECK-SAME:    %[[ARG2:[a-zA-Z0-9]+]]: memref<8x16xf32>) {
+// CHECK-SAME:    %[[ARG2:[a-zA-Z0-9]+]]: memref<8x16xf32>)
 // CHECK:         %[[LANEID:.*]] = gpu.lane_id
 // CHECK-NEXT:    gpu.warp_execute_on_lane_0(%[[LANEID]])[16]
-// CHECK-SAME:      args(%[[ARG0]], %[[ARG1]], %[[ARG2]] : memref<8x16xf16>, memref<16x16xf16>, memref<8x16xf32>) {
+// CHECK-SAME:      args(%[[ARG0]], %[[ARG1]], %[[ARG2]] : memref<8x16xf16>, memref<16x16xf16>, memref<8x16xf32>)
 // CHECK:           ^bb0(%[[ARG3:[a-zA-Z0-9]+]]: memref<8x16xf16>, %[[ARG4:[a-zA-Z0-9]+]]: memref<16x16xf16>,
 // CHECK-SAME:      %[[ARG5:[a-zA-Z0-9]+]]: memref<8x16xf32>):
 // CHECK-NEXT:      %[[T1:.*]] = xegpu.create_nd_tdesc %[[ARG3]] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
@@ -40,24 +39,34 @@ gpu.func @gemm(%arg0: memref<8x16xf16>, %arg1: memref<16x16xf16>, %arg2: memref<
 // CHECK-NEXT:      %[[T5:.*]] = xegpu.dpas %[[T3]], %[[T4]] : vector<8x16xf16>, vector<16x16xf16> -> vector<8x16xf32>
 // CHECK-NEXT:      %[[T6:.*]] = xegpu.create_nd_tdesc %[[ARG5]] : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32>
 // CHECK-NEXT:      xegpu.store_nd %[[T5]], %[[T6]][%{{.*}}]  : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32>
-// CHECK-NEXT:    }
-// CHECK-NEXT:    gpu.return
+// CHECK:         gpu.return
 
 // -----
 gpu.module @test {
 gpu.func @already_in_warp_op() {
   %laneid = gpu.lane_id
   gpu.warp_execute_on_lane_0(%laneid)[16] {
-    "some_op"() : () -> ()
+    "test.unknown"() : () -> ()
     gpu.yield
   }
   gpu.return
 }
 }
 
-// CHECK-LABEL: gpu.func @already_in_warp_op() {
+// CHECK-LABEL: gpu.func @already_in_warp_op()
 // CHECK:         %[[LANEID:.*]] = gpu.lane_id
-// CHECK:         gpu.warp_execute_on_lane_0(%[[LANEID]])[16] {
-// CHECK:           "some_op"() : () -> ()
-// CHECK:         }
+// CHECK:         gpu.warp_execute_on_lane_0(%[[LANEID]])[16]
+// CHECK:           "test.unknown"() : () -> ()
 // CHECK:         gpu.return
+
+// -----
+gpu.module @test {
+"gpu.func"() ({
+^bb0:
+  "test.unknown"() : () -> ()
+}) {function_type = () -> (), kernel, sym_name = "missing_return_terminator"} : () -> ()
+}
+
+// Regression test for MoveFuncBodyToWarpOp on malformed generic gpu.func.
+// CHECK-LABEL: gpu.func @missing_return_terminator
+// CHECK-NEXT:    "test.unknown"() : () -> ()