[Mlir-commits] [mlir] [MLIR][XeGPU] Add propagation support for convert_layout op (PR #191598)

Fri Apr 10 21:34:59 PDT 2026

https://github.com/Jianhui-Li updated https://github.com/llvm/llvm-project/pull/191598

>From debd1fd37d5128b61ac327c073f0316c2713408b Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Sat, 11 Apr 2026 00:24:10 +0000
Subject: [PATCH 1/3] propagate layout associated with convert layout op

---
 .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td |  6 ++--
 .../XeGPU/Transforms/XeGPUPropagateLayout.cpp | 33 +++++++++++++++++--
 .../XeGPU/propagate-layout-subgroup.mlir      | 20 +++++++++++
 3 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index e001419257d8f..f9c3c155a32d5 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -1494,8 +1494,10 @@ def XeGPU_FenceOp: XeGPU_Op<"fence", []> {
   let extraClassDeclaration = extraBaseClassDeclaration;
 }
 
-def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["source", "result"]>, AnchorLayoutInterface]> {
-    let summary = "Convert the layout of the input operand";
+def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [AllTypesMatch<["source", "result"]>, 
+      MemoryEffects<[MemRead, MemWrite]>, 
+      AnchorLayoutInterface]> {    
+  let summary = "Convert the layout of the input operand";
     let description = [{
       `convert_layout` redistribute data across subgroups and/or lanes from the `input_layout` to
       the `target_layout`. Both `input_layout` and `target_layout` must correspond to the same programming
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
index 4c30dacae8850..e606fdd843e2b 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
@@ -396,6 +396,10 @@ class LayoutInfoPropagation
                            ArrayRef<LayoutInfoLattice *> operands,
                            ArrayRef<const LayoutInfoLattice *> results);
 
+  void visitConvertLayoutOp(xegpu::ConvertLayoutOp convertLayout,
+                            ArrayRef<LayoutInfoLattice *> operands,
+                            ArrayRef<const LayoutInfoLattice *> results);
+
   bool hasParamsOfLayoutKind(xegpu::DistributeLayoutAttr anchorLayout);
 
 public:
@@ -483,6 +487,9 @@ LogicalResult LayoutInfoPropagation::visitOperation(
       .Case([&](xegpu::StoreMatrixOp storeMatrixOp) {
         visitStoreMatrixOp(storeMatrixOp, operands, results);
       })
+      .Case([&](xegpu::ConvertLayoutOp convertLayoutOp) {
+        visitConvertLayoutOp(convertLayoutOp, operands, results);
+      })
       // All other ops.
       .Default([&](Operation *op) {
         for (const LayoutInfoLattice *resultInfo : results) {
@@ -936,6 +943,17 @@ void LayoutInfoPropagation::visitLoadNdOp(
   propagateIfChanged(operands[0], operands[0]->meet(loadLayout));
 }
 
+/// Propagate the layout of the value to the tensor descriptor operand in
+/// ConvertLayoutOp.
+void LayoutInfoPropagation::visitConvertLayoutOp(
+    xegpu::ConvertLayoutOp convert, ArrayRef<LayoutInfoLattice *> operands,
+    ArrayRef<const LayoutInfoLattice *> results) {
+  xegpu::DistributeLayoutAttr anchorLayout = convert.getInputLayoutAttr();
+  LayoutInfo convertLayout(anchorLayout);
+  // Propagate the new layout to the tensor descriptor operand.
+  propagateIfChanged(operands[0], operands[0]->meet(convertLayout));
+}
+
 /// For vector::TransposeOp, the layout of the result is transposed and
 /// propagated to the operand.
 void LayoutInfoPropagation::visitTransposeOp(
@@ -1645,10 +1663,7 @@ LogicalResult xegpu::propagateLayouts(OpBuilder &builder, Operation *target,
   // Helper to convert LayoutInfo to xegpu::LayoutAttr.
   auto getXeGPULayoutForValue = [&](Value val) -> xegpu::DistributeLayoutAttr {
     LayoutInfo layout = analysis.getLayoutInfo(val);
-    if (!layout.isAssigned())
-      return {};
     if (auto opResult = dyn_cast<OpResult>(val)) {
-
       Operation *defOp = opResult.getDefiningOp();
       if (auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(defOp)) {
         auto anchorLayout = anchorOp.getAnchorLayout();
@@ -1660,6 +1675,8 @@ LogicalResult xegpu::propagateLayouts(OpBuilder &builder, Operation *target,
       if (requiredResLayoutAttr != nullptr)
         return requiredResLayoutAttr;
     }
+    if (!layout.isAssigned())
+      return {};
     xegpu::DistributeLayoutAttr layoutAttr =
         cast<xegpu::DistributeLayoutAttr>(layout.get());
     if (layout.isSliceLayout())
@@ -1703,6 +1720,16 @@ LogicalResult xegpu::resolveLayoutConflicts(Operation *target) {
 }
 
 void XeGPUPropagateLayoutPass::runOnOperation() {
+  // Clean up temporary layout attributes
+  getOperation()->walk([](Operation *op) {
+    SmallVector<StringAttr> attrsToRemove;
+    for (auto namedAttr : op->getDiscardableAttrs()) {
+      if (isa<xegpu::DistributeLayoutAttr>(namedAttr.getValue()))
+        attrsToRemove.push_back(namedAttr.getName());
+    }
+    for (auto attrName : attrsToRemove)
+      op->removeDiscardableAttr(attrName);
+  });
   xegpu::LayoutKind layoutKind;
   if (this->layoutKind == "lane") {
     layoutKind = xegpu::LayoutKind::Lane;
diff --git a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
index 831d1e05967f8..8b9a84ee5e21f 100644
--- a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
+++ b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
@@ -332,3 +332,23 @@ gpu.module @xevm_module{
     gpu.return
   }
 }
+
+// -----
+gpu.module @test {
+// CHECK-LABEL: convert_layout
+  gpu.func @convert_layout() {
+    %src0 = arith.constant dense<0.000000e+00> : vector<32x128xf32>
+    %src0_cvt = xegpu.convert_layout %src0
+      <{input_layout = #xegpu.layout<sg_layout=[8, 4], sg_data=[4, 32]>,
+       target_layout = #xegpu.layout<sg_layout=[8, 4], sg_data=[4, 32]>}>
+      : vector<32x128xf32>
+    %src1 = arith.constant dense<1.000000e+00> : vector<32x128xf32>
+    %dest = arith.addf %src0_cvt, %src1 : vector<32x128xf32>
+    %desc_cvt = xegpu.convert_layout %dest 
+      <{input_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>, 
+      target_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>}> 
+      : vector<32x128xf32>
+    gpu.return
+  }
+}
+

>From 83adca3929312332d8e8c10a62cb7f47061a90ac Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Sat, 11 Apr 2026 04:19:41 +0000
Subject: [PATCH 2/3] add tests

---
 .../XeGPU/propagate-layout-subgroup.mlir      | 20 ---------------
 .../XeGPU/resolve-layout-conflicts.mlir       | 25 +++++++++++++++++++
 2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
index 8b9a84ee5e21f..831d1e05967f8 100644
--- a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
+++ b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
@@ -332,23 +332,3 @@ gpu.module @xevm_module{
     gpu.return
   }
 }
-
-// -----
-gpu.module @test {
-// CHECK-LABEL: convert_layout
-  gpu.func @convert_layout() {
-    %src0 = arith.constant dense<0.000000e+00> : vector<32x128xf32>
-    %src0_cvt = xegpu.convert_layout %src0
-      <{input_layout = #xegpu.layout<sg_layout=[8, 4], sg_data=[4, 32]>,
-       target_layout = #xegpu.layout<sg_layout=[8, 4], sg_data=[4, 32]>}>
-      : vector<32x128xf32>
-    %src1 = arith.constant dense<1.000000e+00> : vector<32x128xf32>
-    %dest = arith.addf %src0_cvt, %src1 : vector<32x128xf32>
-    %desc_cvt = xegpu.convert_layout %dest 
-      <{input_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>, 
-      target_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>}> 
-      : vector<32x128xf32>
-    gpu.return
-  }
-}
-
diff --git a/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir b/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir
index c73a9990542de..a33bd015180a6 100644
--- a/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir
+++ b/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir
@@ -252,4 +252,29 @@ func.func @conflict_postop() -> vector<16x16xf16> {
   return %1 : vector<16x16xf16>
 }
 
+// CHECK-LABEL: func.func @convert_layout
+// CHECK: %[[V0:.*]] = xegpu.convert_layout %[[CST:.*]] <{input_layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [4, 32]>, target_layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [4, 32]>}> : vector<32x128xf32>
+// CHECK: %[[V1:.*]] = xegpu.convert_layout %[[V0]]  <{input_layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [4, 32]>, target_layout = #xegpu.layout<sg_layout = [4, 8], sg_data = [8, 16]>}> : vector<32x128xf32>
+func.func @convert_layout() {
+  %src0 = arith.constant
+    {layout_result_0 = #xegpu.layout<sg_layout=[8, 4], sg_data=[4, 32]>}
+      dense<0.000000e+00>
+     : vector<32x128xf32>
+  %src0_cvt = xegpu.convert_layout %src0
+    <{input_layout = #xegpu.layout<sg_layout=[8, 4], sg_data=[4, 32]>,
+     target_layout = #xegpu.layout<sg_layout=[8, 4], sg_data=[4, 32]>}>
+    : vector<32x128xf32>
+  %src1 = arith.constant
+    {layout_result_0 = #xegpu.layout<sg_layout = [4, 8], sg_data = [8, 16]>}
+    dense<1.000000e+00>
+    : vector<32x128xf32>
+  %dest = arith.addf %src0_cvt, %src1
+    {layout_result_0 = #xegpu.layout<sg_layout = [4, 8], sg_data = [8, 16]>}
+    : vector<32x128xf32>
+  %desc_cvt = xegpu.convert_layout %dest
+     <{input_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>,
+      target_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>}>
+     : vector<32x128xf32>
+  return
+}
 }

>From 8d5a22581b136c020fdba36c7913e844c6af1ea7 Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Sat, 11 Apr 2026 04:34:46 +0000
Subject: [PATCH 3/3] adding test

---
 .../XeGPU/propagate-layout-subgroup.mlir       | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
index 831d1e05967f8..ef9c4be461b02 100644
--- a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
+++ b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
@@ -332,3 +332,21 @@ gpu.module @xevm_module{
     gpu.return
   }
 }
+
+// -----
+gpu.module @test {
+// CHECK-LABEL: gpu.func @convert_layout
+// CHECK-SAME: %[[ARG0:.*]]: !xegpu.mem_desc<32x128xf32>
+  gpu.func @convert_layout(%arg0: !xegpu.mem_desc<32x128xf32>) {
+    // CHECK: %[[C0:.*]] = arith.constant 0 : index
+    %c0 = arith.constant 0 : index
+    // CHECK: %[[V0:.*]] = xegpu.load_matrix %[[ARG0]][%[[C0]], %[[C0]]]
+    // CHECK-SAME: layout = #xegpu.layout<sg_layout = [4, 8], sg_data = [8, 16]>
+    %1 = xegpu.load_matrix %arg0[%c0, %c0] : !xegpu.mem_desc<32x128xf32>, index, index -> vector<32x128xf32>
+    %2 = xegpu.convert_layout %1
+       <{input_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>,
+        target_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>}>
+       : vector<32x128xf32>
+    gpu.return
+  }
+}