[Mlir-commits] [mlir] fced809 - [MLIR][XeGPU] Add propagation support for convert_layout op (#191598)

Wed Apr 15 11:31:04 PDT 2026

Author: Jianhui Li
Date: 2026-04-15T11:31:00-07:00
New Revision: fced80904ca5eaf4e0361ba91f594d3cc902f125

URL: https://github.com/llvm/llvm-project/commit/fced80904ca5eaf4e0361ba91f594d3cc902f125
DIFF: https://github.com/llvm/llvm-project/commit/fced80904ca5eaf4e0361ba91f594d3cc902f125.diff

LOG: [MLIR][XeGPU] Add propagation support for convert_layout op (#191598)

As title

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
    mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
    mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
    mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index e001419257d8f..f9c3c155a32d5 100644

--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -1494,8 +1494,10 @@ def XeGPU_FenceOp: XeGPU_Op<"fence", []> {
   let extraClassDeclaration = extraBaseClassDeclaration;
 }
 
-def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["source", "result"]>, AnchorLayoutInterface]> {
-    let summary = "Convert the layout of the input operand";
+def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [AllTypesMatch<["source", "result"]>, 
+      MemoryEffects<[MemRead, MemWrite]>, 
+      AnchorLayoutInterface]> {    
+  let summary = "Convert the layout of the input operand";
     let description = [{
       `convert_layout` redistribute data across subgroups and/or lanes from the `input_layout` to
       the `target_layout`. Both `input_layout` and `target_layout` must correspond to the same programming

diff  --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
index 1748bc27b87c8..ff9ff4937c293 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
@@ -396,6 +396,10 @@ class LayoutInfoPropagation
                            ArrayRef<LayoutInfoLattice *> operands,
                            ArrayRef<const LayoutInfoLattice *> results);
 
+  void visitConvertLayoutOp(xegpu::ConvertLayoutOp convertLayout,
+                            ArrayRef<LayoutInfoLattice *> operands,
+                            ArrayRef<const LayoutInfoLattice *> results);
+
   bool hasParamsOfLayoutKind(xegpu::DistributeLayoutAttr anchorLayout);
 
 public:
@@ -483,6 +487,9 @@ LogicalResult LayoutInfoPropagation::visitOperation(
       .Case([&](xegpu::StoreMatrixOp storeMatrixOp) {
         visitStoreMatrixOp(storeMatrixOp, operands, results);
       })
+      .Case([&](xegpu::ConvertLayoutOp convertLayoutOp) {
+        visitConvertLayoutOp(convertLayoutOp, operands, results);
+      })
       // All other ops.
       .Default([&](Operation *op) {
         for (const LayoutInfoLattice *resultInfo : results) {
@@ -936,6 +943,17 @@ void LayoutInfoPropagation::visitLoadNdOp(
   propagateIfChanged(operands[0], operands[0]->meet(loadLayout));
 }
 
+/// Propagate the layout of the value to the tensor descriptor operand in
+/// ConvertLayoutOp.
+void LayoutInfoPropagation::visitConvertLayoutOp(
+    xegpu::ConvertLayoutOp convert, ArrayRef<LayoutInfoLattice *> operands,
+    ArrayRef<const LayoutInfoLattice *> results) {
+  xegpu::DistributeLayoutAttr anchorLayout = convert.getInputLayoutAttr();
+  LayoutInfo convertLayout(anchorLayout);
+  // Propagate the new layout to the tensor descriptor operand.
+  propagateIfChanged(operands[0], operands[0]->meet(convertLayout));
+}
+
 /// For vector::TransposeOp, the layout of the result is transposed and
 /// propagated to the operand.
 void LayoutInfoPropagation::visitTransposeOp(
@@ -1619,10 +1637,7 @@ LogicalResult xegpu::propagateLayouts(OpBuilder &builder, Operation *target,
   // Helper to convert LayoutInfo to xegpu::LayoutAttr.
   auto getXeGPULayoutForValue = [&](Value val) -> xegpu::DistributeLayoutAttr {
     LayoutInfo layout = analysis.getLayoutInfo(val);
-    if (!layout.isAssigned())
-      return {};
     if (auto opResult = dyn_cast<OpResult>(val)) {
-
       Operation *defOp = opResult.getDefiningOp();
       if (auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(defOp)) {
         auto anchorLayout = anchorOp.getAnchorLayout();
@@ -1634,6 +1649,8 @@ LogicalResult xegpu::propagateLayouts(OpBuilder &builder, Operation *target,
       if (requiredResLayoutAttr != nullptr)
         return requiredResLayoutAttr;
     }
+    if (!layout.isAssigned())
+      return {};
     xegpu::DistributeLayoutAttr layoutAttr =
         cast<xegpu::DistributeLayoutAttr>(layout.get());
     if (layout.isSliceLayout())
@@ -1677,6 +1694,16 @@ LogicalResult xegpu::resolveLayoutConflicts(Operation *target) {
 }
 
 void XeGPUPropagateLayoutPass::runOnOperation() {
+  // Clean up temporary layout attributes
+  getOperation()->walk([](Operation *op) {
+    SmallVector<StringAttr> attrsToRemove;
+    for (auto namedAttr : op->getDiscardableAttrs()) {
+      if (isa<xegpu::DistributeLayoutAttr>(namedAttr.getValue()))
+        attrsToRemove.push_back(namedAttr.getName());
+    }
+    for (auto attrName : attrsToRemove)
+      op->removeDiscardableAttr(attrName);
+  });
   xegpu::LayoutKind layoutKind;
   if (this->layoutKind == "lane") {
     layoutKind = xegpu::LayoutKind::Lane;

diff  --git a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
index 831d1e05967f8..ef9c4be461b02 100644
--- a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
+++ b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
@@ -332,3 +332,21 @@ gpu.module @xevm_module{
     gpu.return
   }
 }
+
+// -----
+gpu.module @test {
+// CHECK-LABEL: gpu.func @convert_layout
+// CHECK-SAME: %[[ARG0:.*]]: !xegpu.mem_desc<32x128xf32>
+  gpu.func @convert_layout(%arg0: !xegpu.mem_desc<32x128xf32>) {
+    // CHECK: %[[C0:.*]] = arith.constant 0 : index
+    %c0 = arith.constant 0 : index
+    // CHECK: %[[V0:.*]] = xegpu.load_matrix %[[ARG0]][%[[C0]], %[[C0]]]
+    // CHECK-SAME: layout = #xegpu.layout<sg_layout = [4, 8], sg_data = [8, 16]>
+    %1 = xegpu.load_matrix %arg0[%c0, %c0] : !xegpu.mem_desc<32x128xf32>, index, index -> vector<32x128xf32>
+    %2 = xegpu.convert_layout %1
+       <{input_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>,
+        target_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>}>
+       : vector<32x128xf32>
+    gpu.return
+  }
+}

diff  --git a/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir b/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir
index c73a9990542de..a33bd015180a6 100644
--- a/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir
+++ b/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir
@@ -252,4 +252,29 @@ func.func @conflict_postop() -> vector<16x16xf16> {
   return %1 : vector<16x16xf16>
 }
 
+// CHECK-LABEL: func.func @convert_layout
+// CHECK: %[[V0:.*]] = xegpu.convert_layout %[[CST:.*]] <{input_layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [4, 32]>, target_layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [4, 32]>}> : vector<32x128xf32>
+// CHECK: %[[V1:.*]] = xegpu.convert_layout %[[V0]]  <{input_layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [4, 32]>, target_layout = #xegpu.layout<sg_layout = [4, 8], sg_data = [8, 16]>}> : vector<32x128xf32>
+func.func @convert_layout() {
+  %src0 = arith.constant
+    {layout_result_0 = #xegpu.layout<sg_layout=[8, 4], sg_data=[4, 32]>}
+      dense<0.000000e+00>
+     : vector<32x128xf32>
+  %src0_cvt = xegpu.convert_layout %src0
+    <{input_layout = #xegpu.layout<sg_layout=[8, 4], sg_data=[4, 32]>,
+     target_layout = #xegpu.layout<sg_layout=[8, 4], sg_data=[4, 32]>}>
+    : vector<32x128xf32>
+  %src1 = arith.constant
+    {layout_result_0 = #xegpu.layout<sg_layout = [4, 8], sg_data = [8, 16]>}
+    dense<1.000000e+00>
+    : vector<32x128xf32>
+  %dest = arith.addf %src0_cvt, %src1
+    {layout_result_0 = #xegpu.layout<sg_layout = [4, 8], sg_data = [8, 16]>}
+    : vector<32x128xf32>
+  %desc_cvt = xegpu.convert_layout %dest
+     <{input_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>,
+      target_layout = #xegpu.layout<sg_layout=[4, 8], sg_data=[8, 16]>}>
+     : vector<32x128xf32>
+  return
+}
 }