[Mlir-commits] [mlir] 632c5a3 - [MLIR][XeGPU] Relax the slice layout check for broadcast operand in subgroup distribution (#181935)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Feb 17 23:00:11 PST 2026
Author: Jianhui Li
Date: 2026-02-17T23:00:06-08:00
New Revision: 632c5a3738b9e9344b8d2f3d66f8aba5ff122f3c
URL: https://github.com/llvm/llvm-project/commit/632c5a3738b9e9344b8d2f3d66f8aba5ff122f3c
DIFF: https://github.com/llvm/llvm-project/commit/632c5a3738b9e9344b8d2f3d66f8aba5ff122f3c.diff
LOG: [MLIR][XeGPU] Relax the slice layout check for broadcast operand in subgroup distribution (#181935)
This PR relaxes the operand layout check in broadcast op in subgroup
distribution. Instead of failing the pattern match, it issues a warning
and proceed the distribution. The layout could be non-slice layout but
still support valid subgroup distribution.
Added:
Modified:
mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
index b8c4a309b8eb2..99c2da386fab6 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
@@ -1511,9 +1511,8 @@ struct VectorBroadcastDistribution : public gpu::WarpDistributionPattern {
// Case 1: source is lower-rank than result.
bool isSliceOf = sourceLayout.isSliceOf(resultLayout);
if (!isSliceOf)
- return rewriter.notifyMatchFailure(
- warpOp,
- "Broadcast input layout must be a slice of result layout.");
+ broadcastOp.emitWarning()
+ << "Broadcast input layout must be a slice of result layout.";
}
// case 2: source and result have same rank
if (rankDiff == 0) {
diff --git a/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir b/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir
index 645e889d40657..fb23f38b44b46 100644
--- a/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir
+++ b/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir
@@ -1038,28 +1038,39 @@ gpu.func @vector_insert_strided_slice_unsupported_offset(%laneid: index) {
gpu.return
}
-// CHECK-LABEL: gpu.func @vector_broadcast_1d_to_2d_broadcast_within_lane
+// CHECK-LABEL: gpu.func @vector_broadcast_1d_to_2d_to_3d_broadcast_within_lane
// CHECK-SAME: (%[[ARG0:.*]]: index) {
-// CHECK: %[[R:.*]]:2 = gpu.warp_execute_on_lane_0(%[[ARG0]])[16] -> (vector<16x1xf16>, vector<1xf16>)
-// CHECK: %[[DEF:.*]] = "some_def"()
-// CHECK: %[[BCAST_INNER:.*]] = vector.broadcast %[[DEF]]
-// CHECK: gpu.yield %[[BCAST_INNER]], %[[DEF]]
-// CHECK: %[[BCAST:.*]] = vector.broadcast %[[R]]#1 : vector<1xf16> to vector<16x1xf16>
-// CHECK: "some_use"(%[[BCAST]])
-gpu.func @vector_broadcast_1d_to_2d_broadcast_within_lane(%laneid: index) {
-
- %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<16x1xf16>) {
+// CHECK: %[[R:.*]]:4 = gpu.warp_execute_on_lane_0(%[[ARG0]])[16] -> (vector<16x1xf16>, vector<1x16x1xf16>, vector<1xf16>, vector<16x1xf16>)
+// CHECK: %[[DEF0:.*]] = "some_def"() : () -> vector<16xf16>
+// CHECK: %[[DEF1:.*]] = "some_def"() : () -> vector<16x16xf16>
+// CHECK: %[[BCAST_INNER:.*]] = vector.broadcast %[[DEF0]]
+// CHECK: %[[CAST_INNER:.*]] = vector.shape_cast %[[DEF1]] : vector<16x16xf16> to vector<1x16x16xf16>
+// CHECK: gpu.yield %[[BCAST_INNER]], %[[CAST_INNER]], %[[DEF0]], %[[DEF1]]
+// CHECK: %[[CAST:.*]] = vector.shape_cast %[[R]]#3 : vector<16x1xf16> to vector<1x16x1xf16>
+// CHECK: %[[BCAST:.*]] = vector.broadcast %[[R]]#2 : vector<1xf16> to vector<16x1xf16>
+// CHECK: "some_use"(%[[BCAST]]) : (vector<16x1xf16>) -> ()
+// CHECK: "some_use"(%[[CAST]]) : (vector<1x16x1xf16>) -> ()
+gpu.func @vector_broadcast_1d_to_2d_to_3d_broadcast_within_lane(%laneid: index) {
+
+ %r:2 = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<16x1xf16>, vector<1x16x1xf16>) {
%1 = "some_def"() : () -> vector<16xf16>
+ %3 = "some_def"() : () -> vector<16x16xf16>
%2 = vector.broadcast %1 {
layout_operand_0 = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [0]>,
layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>
} : vector<16xf16> to vector<16x16xf16>
- gpu.yield %2 : vector<16x16xf16>
+ %4 = vector.broadcast %3 {
+ layout_operand_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>,
+ layout_result_0 = #xegpu.layout<lane_layout = [1, 1, 16], lane_data = [1, 1, 1]>
+ } : vector<16x16xf16> to vector<1x16x16xf16>
+
+ gpu.yield %2, %4 : vector<16x16xf16>, vector<1x16x16xf16>
}
- "some_use"(%r) : (vector<16x1xf16>) -> ()
+ "some_use"(%r#0) : (vector<16x1xf16>) -> ()
+ "some_use"(%r#1) : (vector<1x16x1xf16>) -> ()
gpu.return
}
More information about the Mlir-commits
mailing list