[Mlir-commits] [mlir] [MLIR][XeGPU] Add propagation support for convert_layout op (PR #191598)
Jianhui Li
llvmlistbot at llvm.org
Fri Apr 10 21:34:59 PDT 2026
https://github.com/Jianhui-Li updated https://github.com/llvm/llvm-project/pull/191598
>From debd1fd37d5128b61ac327c073f0316c2713408b Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Sat, 11 Apr 2026 00:24:10 +0000
Subject: [PATCH 1/3] propagate layout associated with convert layout op
---
.../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 6 ++--
.../XeGPU/Transforms/XeGPUPropagateLayout.cpp | 33 +++++++++++++++++--
.../XeGPU/propagate-layout-subgroup.mlir | 20 +++++++++++
3 files changed, 54 insertions(+), 5 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index e001419257d8f..f9c3c155a32d5 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -1494,8 +1494,10 @@ def XeGPU_FenceOp: XeGPU_Op<"fence", []> {
let extraClassDeclaration = extraBaseClassDeclaration;
}
-def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["source", "result"]>, AnchorLayoutInterface]> {
- let summary = "Convert the layout of the input operand";
+def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [AllTypesMatch<["source", "result"]>,
+ MemoryEffects<[MemRead, MemWrite]>,
+ AnchorLayoutInterface]> {
+ let summary = "Convert the layout of the input operand";
let description = [{
`convert_layout` redistribute data across subgroups and/or lanes from the `input_layout` to
the `target_layout`. Both `input_layout` and `target_layout` must correspond to the same programming
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
index 4c30dacae8850..e606fdd843e2b 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
@@ -396,6 +396,10 @@ class LayoutInfoPropagation
ArrayRef<LayoutInfoLattice *> operands,
ArrayRef<const LayoutInfoLattice *> results);
+ void visitConvertLayoutOp(xegpu::ConvertLayoutOp convertLayout,
+ ArrayRef<LayoutInfoLattice *> operands,
+ ArrayRef<const LayoutInfoLattice *> results);
+
bool hasParamsOfLayoutKind(xegpu::DistributeLayoutAttr anchorLayout);
public:
@@ -483,6 +487,9 @@ LogicalResult LayoutInfoPropagation::visitOperation(
.Case([&](xegpu::StoreMatrixOp storeMatrixOp) {
visitStoreMatrixOp(storeMatrixOp, operands, results);
})
+ .Case([&](xegpu::ConvertLayoutOp convertLayoutOp) {
+ visitConvertLayoutOp(convertLayoutOp, operands, results);
+ })
// All other ops.
.Default([&](Operation *op) {
for (const LayoutInfoLattice *resultInfo : results) {
@@ -936,6 +943,17 @@ void LayoutInfoPropagation::visitLoadNdOp(
propagateIfChanged(operands[0], operands[0]->meet(loadLayout));
}
+/// Propagate the layout of the value to the tensor descriptor operand in
+/// ConvertLayoutOp.
+void LayoutInfoPropagation::visitConvertLayoutOp(
+ xegpu::ConvertLayoutOp convert, ArrayRef<LayoutInfoLattice *> operands,
+ ArrayRef<const LayoutInfoLattice *> results) {
+ xegpu::DistributeLayoutAttr anchorLayout = convert.getInputLayoutAttr();
+ LayoutInfo convertLayout(anchorLayout);
+ // Propagate the new layout to the tensor descriptor operand.
+ propagateIfChanged(operands[0], operands[0]->meet(convertLayout));
+}
+
/// For vector::TransposeOp, the layout of the result is transposed and
/// propagated to the operand.
void LayoutInfoPropagation::visitTransposeOp(
@@ -1645,10 +1663,7 @@ LogicalResult xegpu::propagateLayouts(OpBuilder &builder, Operation *target,
// Helper to convert LayoutInfo to xegpu::LayoutAttr.
auto getXeGPULayoutForValue = [&](Value val) -> xegpu::DistributeLayoutAttr {
LayoutInfo layout = analysis.getLayoutInfo(val);
- if (!layout.isAssigned())
- return {};
if (auto opResult = dyn_cast<OpResult>(val)) {
-
Operation *defOp = opResult.getDefiningOp();
if (auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(defOp)) {
auto anchorLayout = anchorOp.getAnchorLayout();
@@ -1660,6 +1675,8 @@ LogicalResult xegpu::propagateLayouts(OpBuilder &builder, Operation *target,
if (requiredResLayoutAttr != nullptr)
return requiredResLayoutAttr;
}
+ if (!layout.isAssigned())
+ return {};
xegpu::DistributeLayoutAttr layoutAttr =
cast<xegpu::DistributeLayoutAttr>(layout.get());
if (layout.isSliceLayout())
@@ -1703,6 +1720,16 @@ LogicalResult xegpu::resolveLayoutConflicts(Operation *target) {
}
void XeGPUPropagateLayoutPass::runOnOperation() {
+ // Clean up temporary layout attributes
+ getOperation()->walk([](Operation *op) {
+ SmallVector<StringAttr> attrsToRemove;
+ for (auto namedAttr : op->getDiscardableAttrs()) {
+ if (isa<xegpu::DistributeLayoutAttr>(namedAttr.getValue()))
+ attrsToRemove.push_back(namedAttr.getName());
+ }
+ for (auto attrName : attrsToRemove)
+ op->removeDiscardableAttr(attrName);
+ });
xegpu::LayoutKind layoutKind;
if (this->layoutKind == "lane") {
layoutKind = xegpu::LayoutKind::Lane;
diff --git a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
index 831d1e05967f8..8b9a84ee5e21f 100644
--- a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
+++ b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
@@ -332,3 +332,23 @@ gpu.module @xevm_module{
gpu.return
}
}
+
+// -----
+gpu.module @test {
+// CHECK-LABEL: convert_layout
+ gpu.func @convert_layout() {
+ %src0 = arith.constant dense<0.000000e+00> : vector<32x128xf32>
+ %src0_cvt = xegpu.convert_layout %src0
+ <{input_layout = #xegpu.layout<sg_layout=[8, 4], sg_data=[4, 32]>,
+ target_layout = #xegpu.layout<sg_layout=[8, 4], sg_data=[4, 32]>}>
+ : vector<32x128xf32>
+ %src1 = arith.constant dense<1.000000e+00> : vector<32x128xf32>
+ %dest = arith.addf %src0_cvt, %src1 : vector<32x128xf32>
+ %desc_cvt = xegpu.convert_layout %dest
+ <{input_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>,
+ target_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>}>
+ : vector<32x128xf32>
+ gpu.return
+ }
+}
+
>From 83adca3929312332d8e8c10a62cb7f47061a90ac Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Sat, 11 Apr 2026 04:19:41 +0000
Subject: [PATCH 2/3] add tests
---
.../XeGPU/propagate-layout-subgroup.mlir | 20 ---------------
.../XeGPU/resolve-layout-conflicts.mlir | 25 +++++++++++++++++++
2 files changed, 25 insertions(+), 20 deletions(-)
diff --git a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
index 8b9a84ee5e21f..831d1e05967f8 100644
--- a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
+++ b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
@@ -332,23 +332,3 @@ gpu.module @xevm_module{
gpu.return
}
}
-
-// -----
-gpu.module @test {
-// CHECK-LABEL: convert_layout
- gpu.func @convert_layout() {
- %src0 = arith.constant dense<0.000000e+00> : vector<32x128xf32>
- %src0_cvt = xegpu.convert_layout %src0
- <{input_layout = #xegpu.layout<sg_layout=[8, 4], sg_data=[4, 32]>,
- target_layout = #xegpu.layout<sg_layout=[8, 4], sg_data=[4, 32]>}>
- : vector<32x128xf32>
- %src1 = arith.constant dense<1.000000e+00> : vector<32x128xf32>
- %dest = arith.addf %src0_cvt, %src1 : vector<32x128xf32>
- %desc_cvt = xegpu.convert_layout %dest
- <{input_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>,
- target_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>}>
- : vector<32x128xf32>
- gpu.return
- }
-}
-
diff --git a/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir b/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir
index c73a9990542de..a33bd015180a6 100644
--- a/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir
+++ b/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir
@@ -252,4 +252,29 @@ func.func @conflict_postop() -> vector<16x16xf16> {
return %1 : vector<16x16xf16>
}
+// CHECK-LABEL: func.func @convert_layout
+// CHECK: %[[V0:.*]] = xegpu.convert_layout %[[CST:.*]] <{input_layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [4, 32]>, target_layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [4, 32]>}> : vector<32x128xf32>
+// CHECK: %[[V1:.*]] = xegpu.convert_layout %[[V0]] <{input_layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [4, 32]>, target_layout = #xegpu.layout<sg_layout = [4, 8], sg_data = [8, 16]>}> : vector<32x128xf32>
+func.func @convert_layout() {
+ %src0 = arith.constant
+ {layout_result_0 = #xegpu.layout<sg_layout=[8, 4], sg_data=[4, 32]>}
+ dense<0.000000e+00>
+ : vector<32x128xf32>
+ %src0_cvt = xegpu.convert_layout %src0
+ <{input_layout = #xegpu.layout<sg_layout=[8, 4], sg_data=[4, 32]>,
+ target_layout = #xegpu.layout<sg_layout=[8, 4], sg_data=[4, 32]>}>
+ : vector<32x128xf32>
+ %src1 = arith.constant
+ {layout_result_0 = #xegpu.layout<sg_layout = [4, 8], sg_data = [8, 16]>}
+ dense<1.000000e+00>
+ : vector<32x128xf32>
+ %dest = arith.addf %src0_cvt, %src1
+ {layout_result_0 = #xegpu.layout<sg_layout = [4, 8], sg_data = [8, 16]>}
+ : vector<32x128xf32>
+ %desc_cvt = xegpu.convert_layout %dest
+ <{input_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>,
+ target_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>}>
+ : vector<32x128xf32>
+ return
+}
}
>From 8d5a22581b136c020fdba36c7913e844c6af1ea7 Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Sat, 11 Apr 2026 04:34:46 +0000
Subject: [PATCH 3/3] adding test
---
.../XeGPU/propagate-layout-subgroup.mlir | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
diff --git a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
index 831d1e05967f8..ef9c4be461b02 100644
--- a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
+++ b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
@@ -332,3 +332,21 @@ gpu.module @xevm_module{
gpu.return
}
}
+
+// -----
+gpu.module @test {
+// CHECK-LABEL: gpu.func @convert_layout
+// CHECK-SAME: %[[ARG0:.*]]: !xegpu.mem_desc<32x128xf32>
+ gpu.func @convert_layout(%arg0: !xegpu.mem_desc<32x128xf32>) {
+ // CHECK: %[[C0:.*]] = arith.constant 0 : index
+ %c0 = arith.constant 0 : index
+ // CHECK: %[[V0:.*]] = xegpu.load_matrix %[[ARG0]][%[[C0]], %[[C0]]]
+ // CHECK-SAME: layout = #xegpu.layout<sg_layout = [4, 8], sg_data = [8, 16]>
+ %1 = xegpu.load_matrix %arg0[%c0, %c0] : !xegpu.mem_desc<32x128xf32>, index, index -> vector<32x128xf32>
+ %2 = xegpu.convert_layout %1
+ <{input_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>,
+ target_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>}>
+ : vector<32x128xf32>
+ gpu.return
+ }
+}
More information about the Mlir-commits
mailing list