[Mlir-commits] [mlir] [mlir][xegpu] Add support for setting `order` in `SetDescLayoutOp` and `SetOpLayoutAttrOp` transform ops. (PR #184705)

Charitha Saumya llvmlistbot at llvm.org
Fri Mar 6 14:51:31 PST 2026


https://github.com/charithaintc updated https://github.com/llvm/llvm-project/pull/184705

>From e1010fe019940630728e7c9bdeaa5c85e07e3d7e Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Wed, 4 Mar 2026 23:02:12 +0000
Subject: [PATCH 1/5] save work

---
 .../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td       |  9 +++
 .../XeGPU/TransformOps/XeGPUTransformOps.td   | 10 +++-
 .../XeGPU/TransformOps/XeGPUTransformOps.cpp  | 22 ++++++-
 mlir/test/Dialect/XeGPU/transform-ops.mlir    | 60 +++++++++++++++++++
 4 files changed, 96 insertions(+), 5 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index 6f667f4801673..fd48411e51b02 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -494,6 +494,15 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> {
       return 0;
     }
 
+    /// Derive a new layout with the same sg_layout, sg_data, inst_data,
+    /// lane_layout and lane_data but different order.
+    LayoutAttr cloneWithOrder(DenseI32ArrayAttr newOrder) const {
+      assert(getRank() == static_cast<int64_t>(newOrder.size())
+        && "The size of new order must match the layout rank.");
+      return LayoutAttr::get(getContext(), getSgLayout(), getSgData(), getInstData(),
+                             getLaneLayout(), getLaneData(), newOrder);
+    }
+
     LayoutAttr dropSgLayoutAndData() const{
       // avoid every field of the attribute is nullptr, which may lead to segment fault
       if (!getInstData() && !getLaneLayout())
diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
index 23dabe4eb380a..bee7eea79dce9 100644
--- a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
@@ -58,7 +58,8 @@ def SetDescLayoutOp : Op<Transform_Dialect, "xegpu.set_desc_layout", [
                    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_sg_layout,
                    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_sg_data,
                    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_inst_data,
-                   DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$slice_dims
+                   DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$slice_dims,
+                   DefaultValuedOptionalAttr<DenseI32ArrayAttr, "{}">:$order
                    );
 
   let results = (outs TransformHandleTypeInterface:$transformed);
@@ -67,7 +68,8 @@ def SetDescLayoutOp : Op<Transform_Dialect, "xegpu.set_desc_layout", [
                    "ArrayRef<OpFoldResult>":$mixedSgLayout,
                    "ArrayRef<OpFoldResult>":$mixedSgData,
                    "ArrayRef<OpFoldResult>":$mixedInstData,
-                   "ArrayRef<int64_t>":$sliceDims
+                   "ArrayRef<int64_t>":$sliceDims,
+                   "ArrayRef<int32_t>":$order
                    )>,
   ];
 
@@ -77,6 +79,7 @@ def SetDescLayoutOp : Op<Transform_Dialect, "xegpu.set_desc_layout", [
     `sg_data` `=` custom<DynamicIndexList>($sg_data, $static_sg_data)
     (`inst_data` `=` custom<DynamicIndexList>($inst_data, $static_inst_data)^)?
     (`slice_dims` `=` $slice_dims^)?
+    (`order` `=` $order^)?
     attr-dict `:` functional-type(operands, results)
   }];
 
@@ -128,6 +131,7 @@ def SetOpLayoutAttrOp : Op<Transform_Dialect, "xegpu.set_op_layout_attr", [
                    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_sg_data,
                    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_inst_data,
                    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$slice_dims,
+                   DefaultValuedOptionalAttr<DenseI32ArrayAttr, "{}">:$order,
                    DefaultValuedAttr<UnitAttr, "false">:$result,
                    DefaultValuedAttr<UnitAttr, "false">:$operand
                    );
@@ -140,6 +144,7 @@ def SetOpLayoutAttrOp : Op<Transform_Dialect, "xegpu.set_op_layout_attr", [
                    "ArrayRef<OpFoldResult>":$mixedSgData,
                    "ArrayRef<OpFoldResult>":$mixedInstData,
                    "ArrayRef<int64_t>":$sliceDims,
+                   "ArrayRef<int32_t>":$order,
                    CArg<"bool", "false">:$result,
                    CArg<"bool", "false">:$operand
                    )>,
@@ -151,6 +156,7 @@ def SetOpLayoutAttrOp : Op<Transform_Dialect, "xegpu.set_op_layout_attr", [
     `sg_data` `=` custom<DynamicIndexList>($sg_data, $static_sg_data)
     (`inst_data` `=` custom<DynamicIndexList>($inst_data, $static_inst_data)^)?
     (`slice_dims` `=` $slice_dims^)?
+    (`order` `=` $order^)?
     attr-dict `:` qualified(type(operands))
   }];
 
diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
index 7bc67da8263dc..58b103614454c 100644
--- a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
@@ -12,6 +12,7 @@
 #include "mlir/Dialect/SCF/Utils/Utils.h"
 #include "mlir/Dialect/XeGPU/IR/XeGPU.h"
 #include "mlir/Dialect/XeGPU/Utils/XeGPUUtils.h"
+#include "mlir/IR/BuiltinAttributes.h"
 #include "llvm/ADT/SmallVectorExtras.h"
 
 #include <optional>
@@ -215,7 +216,8 @@ void transform::SetDescLayoutOp::build(OpBuilder &builder,
                                        ArrayRef<OpFoldResult> mixedSgLayout,
                                        ArrayRef<OpFoldResult> mixedSgData,
                                        ArrayRef<OpFoldResult> mixedInstData,
-                                       ArrayRef<int64_t> sliceDims) {
+                                       ArrayRef<int64_t> sliceDims,
+                                       ArrayRef<int32_t> order) {
   SmallVector<int64_t> staticSgLayout, staticSgData, staticInstData;
   SmallVector<Value> dynamicSgLayout, dynamicSgData, dynamicInstData;
   dispatchIndexOpFoldResults(mixedSgLayout, dynamicSgLayout, staticSgLayout);
@@ -229,7 +231,8 @@ void transform::SetDescLayoutOp::build(OpBuilder &builder,
         /*static_sg_layout=*/staticSgLayout,
         /*static_sg_data=*/staticSgData,
         /*static_inst_data=*/staticInstData,
-        /*slice_dims=*/sliceDims);
+        /*slice_dims=*/sliceDims,
+        /*order=*/order);
 }
 
 DiagnosedSilenceableFailure
@@ -250,6 +253,12 @@ transform::SetDescLayoutOp::apply(transform::TransformRewriter &rewriter,
   if (!status.succeeded())
     return status;
 
+  // If order is provided, clone the layout with the provided order.
+  auto order = getOrder();
+  if (order.size() > 0)
+    layoutAttr =
+        layoutAttr.cloneWithOrder(DenseI32ArrayAttr::get(getContext(), order));
+
   xegpu::DistributeLayoutAttr layout = layoutAttr;
   auto sliceDims = getSliceDims();
   if (sliceDims.size() > 0) {
@@ -291,7 +300,7 @@ void transform::SetOpLayoutAttrOp::build(
     OpBuilder &builder, OperationState &ostate, Value target, int64_t index,
     ArrayRef<OpFoldResult> mixedSgLayout, ArrayRef<OpFoldResult> mixedSgData,
     ArrayRef<OpFoldResult> mixedInstData, ArrayRef<int64_t> sliceDims,
-    bool result, bool operand) {
+    ArrayRef<int32_t> order, bool result, bool operand) {
   SmallVector<int64_t> staticSgLayout, staticSgData, staticInstData;
   SmallVector<Value> dynamicSgLayout, dynamicSgData, dynamicInstData;
   dispatchIndexOpFoldResults(mixedSgLayout, dynamicSgLayout, staticSgLayout);
@@ -307,6 +316,7 @@ void transform::SetOpLayoutAttrOp::build(
         /*static_sg_data=*/staticSgData,
         /*static_inst_data=*/staticInstData,
         /*slice_dims=*/sliceDims,
+        /*order=*/order,
         /*result=*/result,
         /*operand=*/operand);
 }
@@ -342,6 +352,12 @@ transform::SetOpLayoutAttrOp::apply(transform::TransformRewriter &rewriter,
   if (!status.succeeded())
     return status;
 
+  // If order is provided, clone the layout with the provided order.
+  auto order = getOrder();
+  if (order.size() > 0)
+    layoutAttr =
+        layoutAttr.cloneWithOrder(DenseI32ArrayAttr::get(getContext(), order));
+
   xegpu::DistributeLayoutAttr layout = layoutAttr;
   auto sliceDims = getSliceDims();
   if (sliceDims.size() > 0) {
diff --git a/mlir/test/Dialect/XeGPU/transform-ops.mlir b/mlir/test/Dialect/XeGPU/transform-ops.mlir
index 9a278cbf7b498..5b2dc47246463 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops.mlir
@@ -140,6 +140,26 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 
+// CHECK-LABEL: @set_desc_layout_order
+func.func @set_desc_layout_order(%arg0: memref<4096x4096xf16>) {
+  // CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0
+  // CHECK-SAME: #xegpu.block_tdesc_attr<boundary_check = false>
+  // CHECK-SAME: #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], inst_data = [8, 16], order = [1, 0]>
+  %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16, #xegpu.block_tdesc_attr<boundary_check = false>>
+  return
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["xegpu.create_nd_tdesc"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    // CHECK: transform.xegpu.set_desc_layout %{{.*}}
+    %1 = transform.xegpu.set_desc_layout %0 sg_layout = [8, 4] sg_data = [32, 32] inst_data = [8, 16] order = [1, 0] : (!transform.any_op) -> !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
 // CHECK-LABEL: @set_op_layout_attr_result_default
 func.func @set_op_layout_attr_result_default(%arg0: memref<4096x4096xf16>) {
   %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
@@ -225,6 +245,25 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 
+// CHECK-LABEL: @set_op_layout_attr_result_order
+func.func @set_op_layout_attr_result_order(%arg0: vector<256xf16>) {
+  // CHECK: = arith.extf
+  // CHECK-SAME: {layout_result_0 = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16], order = [0, 1]>}
+  %2 = arith.extf %arg0 : vector<256xf16> to vector<256xf32>
+  return
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
+    transform.xegpu.set_op_layout_attr %0 result index = 0 sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] order = [0, 1] : !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
 // CHECK-LABEL: @set_op_layout_attr_operand_minimal
 func.func @set_op_layout_attr_operand_minimal(%arg0: memref<4096x4096xf16>) {
   %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
@@ -287,6 +326,27 @@ module attributes {transform.with_named_sequence} {
   }
 }
 
+// -----
+
+// CHECK-LABEL: @set_op_layout_attr_anchor_order
+func.func @set_op_layout_attr_anchor_order(%arg0: memref<4096x4096xf16>) {
+  %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
+  // CHECK: = xegpu.load_nd %0[0, 0]
+  // CHECK-SAME: <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16], order = [1, 0]>}>
+  %1 = xegpu.load_nd %0[0, 0]  : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
+  return
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["xegpu.load_nd"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
+    transform.xegpu.set_op_layout_attr %0 index = 0 sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] order = [1, 0] : !transform.any_op
+    transform.yield
+  }
+}
+
+
 // -----
 
 // CHECK-LABEL: @set_op_layout_attr_anchor_dpas_a

>From 29dbdc18a4832e99564f753614c6053297939b4e Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Wed, 4 Mar 2026 23:42:24 +0000
Subject: [PATCH 2/5] save work

---
 mlir/python/mlir/dialects/transform/xegpu.py  |  8 ++++
 .../python/dialects/transform_xegpu_ext.py    | 45 +++++++++++++++++++
 2 files changed, 53 insertions(+)

diff --git a/mlir/python/mlir/dialects/transform/xegpu.py b/mlir/python/mlir/dialects/transform/xegpu.py
index a768ce5f4e720..03a5239dceff1 100644
--- a/mlir/python/mlir/dialects/transform/xegpu.py
+++ b/mlir/python/mlir/dialects/transform/xegpu.py
@@ -63,6 +63,7 @@ def __init__(
         *,
         inst_data: Optional[MixedValues] = None,
         slice_dims: Optional[MixedInt] = None,
+        order: Optional[MixedInt] = None,
         loc=None,
         ip=None,
     ):
@@ -94,6 +95,7 @@ def __init__(
             static_sg_data=static_sg_data,
             static_inst_data=static_inst_data,
             slice_dims=slice_dims,
+            order=order,
             loc=loc,
             ip=ip,
         )
@@ -106,6 +108,7 @@ def set_desc_layout(
     *,
     inst_data: Optional[MixedValues] = None,
     slice_dims: Optional[MixedInt] = None,
+    order: Optional[MixedInt] = None,
     loc=None,
     ip=None,
 ) -> OpResult:
@@ -115,6 +118,7 @@ def set_desc_layout(
         sg_data,
         inst_data=inst_data,
         slice_dims=slice_dims,
+        order=order,
         loc=loc,
         ip=ip,
     ).result
@@ -132,6 +136,7 @@ def __init__(
         *,
         inst_data: Optional[MixedValues] = None,
         slice_dims: Optional[MixedInt] = None,
+        order: Optional[MixedInt] = None,
         index: Optional[Union[int, Attribute]] = None,
         result: Optional[Union[bool, Attribute]] = None,
         operand: Optional[Union[bool, Attribute]] = None,
@@ -163,6 +168,7 @@ def __init__(
             static_sg_data=static_sg_data,
             static_inst_data=static_inst_data,
             slice_dims=slice_dims,
+            order=order,
             index=index,
             result=result,
             operand=operand,
@@ -178,6 +184,7 @@ def set_op_layout_attr(
     *,
     inst_data: Optional[MixedValues] = None,
     slice_dims: Optional[MixedInt] = None,
+    order: Optional[MixedInt] = None,
     index: Optional[Union[int, Attribute]] = None,
     result: Optional[Union[bool, Attribute]] = None,
     operand: Optional[Union[bool, Attribute]] = None,
@@ -190,6 +197,7 @@ def set_op_layout_attr(
         sg_data,
         inst_data=inst_data,
         slice_dims=slice_dims,
+        order=order,
         index=index,
         result=result,
         operand=operand,
diff --git a/mlir/test/python/dialects/transform_xegpu_ext.py b/mlir/test/python/dialects/transform_xegpu_ext.py
index e3e1313cf5f81..afb8ef9514354 100644
--- a/mlir/test/python/dialects/transform_xegpu_ext.py
+++ b/mlir/test/python/dialects/transform_xegpu_ext.py
@@ -84,6 +84,24 @@ def setDescLayoutSlice():
     # CHECK: sg_data = [32, 16]
     # CHECK: slice_dims = [0]
 
+ at run
+def setDescLayoutOrder():
+    sequence = transform.SequenceOp(
+        transform.FailurePropagationMode.Propagate,
+        [],
+        transform.OperationType.get("xegpu.create_nd_tdesc"),
+    )
+    with InsertionPoint(sequence.body):
+        xegpu.set_desc_layout(
+            sequence.bodyTarget, sg_layout=[6, 4], sg_data=[32, 16], order=[0, 1]
+        )
+        transform.YieldOp()
+    # CHECK-LABEL: TEST: setDescLayoutOrder
+    # CHECK: %0 = transform.xegpu.set_desc_layout %
+    # CHECK: sg_layout = [6, 4]
+    # CHECK: sg_data = [32, 16]
+    # CHECK: order = [0, 1]
+
 
 @run
 def setOpLayoutAttrOperandMinimal():
@@ -163,6 +181,33 @@ def setOpLayoutAttrResultSlice():
     # CHECK: inst_data = [8, 16]
     # CHECK: slice_dims = [0]
 
+ at run
+def setOpLayoutAttrResultOrder():
+    sequence = transform.SequenceOp(
+        transform.FailurePropagationMode.Propagate,
+        [],
+        transform.OperationType.get("xegpu.dpas"),
+    )
+    with InsertionPoint(sequence.body):
+        xegpu.set_op_layout_attr(
+            sequence.bodyTarget,
+            index=0,
+            sg_layout=[6, 4],
+            sg_data=[32, 16],
+            inst_data=[8, 16],
+            order=[0, 1],
+            result=True,
+        )
+        transform.YieldOp()
+    # CHECK-LABEL: TEST: setOpLayoutAttrResultOrder
+    # CHECK: transform.xegpu.set_op_layout_attr %
+    # CHECK: result
+    # CHECK-NOT: index = 0
+    # CHECK: sg_layout = [6, 4]
+    # CHECK: sg_data = [32, 16]
+    # CHECK: inst_data = [8, 16]
+    # CHECK: order = [0, 1]
+
 
 @run
 def setOpLayoutAttrAnchor():

>From 71dacc01ad7ce44687ea9f50498b37e2803e73c2 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Thu, 5 Mar 2026 22:34:30 +0000
Subject: [PATCH 3/5] save

---
 mlir/test/python/dialects/transform_xegpu_ext.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mlir/test/python/dialects/transform_xegpu_ext.py b/mlir/test/python/dialects/transform_xegpu_ext.py
index afb8ef9514354..99e9faa40d3b8 100644
--- a/mlir/test/python/dialects/transform_xegpu_ext.py
+++ b/mlir/test/python/dialects/transform_xegpu_ext.py
@@ -84,6 +84,7 @@ def setDescLayoutSlice():
     # CHECK: sg_data = [32, 16]
     # CHECK: slice_dims = [0]
 
+
 @run
 def setDescLayoutOrder():
     sequence = transform.SequenceOp(
@@ -181,6 +182,7 @@ def setOpLayoutAttrResultSlice():
     # CHECK: inst_data = [8, 16]
     # CHECK: slice_dims = [0]
 
+
 @run
 def setOpLayoutAttrResultOrder():
     sequence = transform.SequenceOp(

>From ec9f815ea39a9975a2f7f852e1943ac2776699e1 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Fri, 6 Mar 2026 22:34:38 +0000
Subject: [PATCH 4/5] address comments

---
 .../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td       |  9 ---
 .../XeGPU/TransformOps/XeGPUTransformOps.td   | 26 +++++---
 .../XeGPU/TransformOps/XeGPUTransformOps.cpp  | 64 ++++++++-----------
 mlir/python/mlir/dialects/transform/xegpu.py  | 24 ++++---
 mlir/test/Dialect/XeGPU/transform-ops.mlir    | 12 ++--
 .../python/dialects/transform_xegpu_ext.py    |  4 ++
 6 files changed, 70 insertions(+), 69 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index fd48411e51b02..6f667f4801673 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -494,15 +494,6 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> {
       return 0;
     }
 
-    /// Derive a new layout with the same sg_layout, sg_data, inst_data,
-    /// lane_layout and lane_data but different order.
-    LayoutAttr cloneWithOrder(DenseI32ArrayAttr newOrder) const {
-      assert(getRank() == static_cast<int64_t>(newOrder.size())
-        && "The size of new order must match the layout rank.");
-      return LayoutAttr::get(getContext(), getSgLayout(), getSgData(), getInstData(),
-                             getLaneLayout(), getLaneData(), newOrder);
-    }
-
     LayoutAttr dropSgLayoutAndData() const{
       // avoid every field of the attribute is nullptr, which may lead to segment fault
       if (!getInstData() && !getLaneLayout())
diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
index bee7eea79dce9..f7f45508b6a03 100644
--- a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
@@ -58,8 +58,8 @@ def SetDescLayoutOp : Op<Transform_Dialect, "xegpu.set_desc_layout", [
                    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_sg_layout,
                    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_sg_data,
                    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_inst_data,
-                   DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$slice_dims,
-                   DefaultValuedOptionalAttr<DenseI32ArrayAttr, "{}">:$order
+                   DefaultValuedOptionalAttr<DenseI32ArrayAttr, "{}">:$order,
+                   DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$slice_dims
                    );
 
   let results = (outs TransformHandleTypeInterface:$transformed);
@@ -68,8 +68,8 @@ def SetDescLayoutOp : Op<Transform_Dialect, "xegpu.set_desc_layout", [
                    "ArrayRef<OpFoldResult>":$mixedSgLayout,
                    "ArrayRef<OpFoldResult>":$mixedSgData,
                    "ArrayRef<OpFoldResult>":$mixedInstData,
-                   "ArrayRef<int64_t>":$sliceDims,
-                   "ArrayRef<int32_t>":$order
+                   "ArrayRef<int32_t>":$order,
+                   "ArrayRef<int64_t>":$sliceDims
                    )>,
   ];
 
@@ -78,8 +78,8 @@ def SetDescLayoutOp : Op<Transform_Dialect, "xegpu.set_desc_layout", [
     `sg_layout` `=` custom<DynamicIndexList>($sg_layout, $static_sg_layout)
     `sg_data` `=` custom<DynamicIndexList>($sg_data, $static_sg_data)
     (`inst_data` `=` custom<DynamicIndexList>($inst_data, $static_inst_data)^)?
-    (`slice_dims` `=` $slice_dims^)?
     (`order` `=` $order^)?
+    (`slice_dims` `=` $slice_dims^)?
     attr-dict `:` functional-type(operands, results)
   }];
 
@@ -130,8 +130,8 @@ def SetOpLayoutAttrOp : Op<Transform_Dialect, "xegpu.set_op_layout_attr", [
                    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_sg_layout,
                    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_sg_data,
                    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_inst_data,
-                   DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$slice_dims,
                    DefaultValuedOptionalAttr<DenseI32ArrayAttr, "{}">:$order,
+                   DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$slice_dims,
                    DefaultValuedAttr<UnitAttr, "false">:$result,
                    DefaultValuedAttr<UnitAttr, "false">:$operand
                    );
@@ -143,8 +143,8 @@ def SetOpLayoutAttrOp : Op<Transform_Dialect, "xegpu.set_op_layout_attr", [
                    "ArrayRef<OpFoldResult>":$mixedSgLayout,
                    "ArrayRef<OpFoldResult>":$mixedSgData,
                    "ArrayRef<OpFoldResult>":$mixedInstData,
-                   "ArrayRef<int64_t>":$sliceDims,
                    "ArrayRef<int32_t>":$order,
+                   "ArrayRef<int64_t>":$sliceDims,
                    CArg<"bool", "false">:$result,
                    CArg<"bool", "false">:$operand
                    )>,
@@ -155,8 +155,8 @@ def SetOpLayoutAttrOp : Op<Transform_Dialect, "xegpu.set_op_layout_attr", [
     `sg_layout` `=` custom<DynamicIndexList>($sg_layout, $static_sg_layout)
     `sg_data` `=` custom<DynamicIndexList>($sg_data, $static_sg_data)
     (`inst_data` `=` custom<DynamicIndexList>($inst_data, $static_inst_data)^)?
-    (`slice_dims` `=` $slice_dims^)?
     (`order` `=` $order^)?
+    (`slice_dims` `=` $slice_dims^)?
     attr-dict `:` qualified(type(operands))
   }];
 
@@ -287,12 +287,14 @@ def ConvertLayoutOp : Op<Transform_Dialect, "xegpu.convert_layout", [
                    Variadic<TransformAnyParamTypeOrAnyHandle>:$target_sg_layout,
                    Variadic<TransformAnyParamTypeOrAnyHandle>:$target_sg_data,
                    Variadic<TransformAnyParamTypeOrAnyHandle>:$target_inst_data,
+                   DefaultValuedOptionalAttr<DenseI32ArrayAttr, "{}">:$input_order,
                    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_input_sg_layout,
                    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_input_sg_data,
                    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_input_inst_data,
                    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_target_sg_layout,
                    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_target_sg_data,
-                   DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_target_inst_data
+                   DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_target_inst_data,
+                   DefaultValuedOptionalAttr<DenseI32ArrayAttr, "{}">:$target_order
                    );
 
   let results = (outs TransformHandleTypeInterface:$newConvertOp);
@@ -301,9 +303,11 @@ def ConvertLayoutOp : Op<Transform_Dialect, "xegpu.convert_layout", [
                    "ArrayRef<OpFoldResult>":$mixedInputSgLayout,
                    "ArrayRef<OpFoldResult>":$mixedInputSgData,
                    "ArrayRef<OpFoldResult>":$mixedInputInstData,
+                   "ArrayRef<int32_t>":$inputOrder,
                    "ArrayRef<OpFoldResult>":$mixedTargetSgLayout,
                    "ArrayRef<OpFoldResult>":$mixedTargetSgData,
-                   "ArrayRef<OpFoldResult>":$mixedTargetInstData
+                   "ArrayRef<OpFoldResult>":$mixedTargetInstData,
+                   "ArrayRef<int32_t>":$targetOrder
                    )>,
   ];
 
@@ -312,9 +316,11 @@ def ConvertLayoutOp : Op<Transform_Dialect, "xegpu.convert_layout", [
     `input_sg_layout` `=` custom<DynamicIndexList>($input_sg_layout, $static_input_sg_layout)
     `input_sg_data` `=` custom<DynamicIndexList>($input_sg_data, $static_input_sg_data)
     (`input_inst_data` `=` custom<DynamicIndexList>($input_inst_data, $static_input_inst_data)^)?
+    (`input_order` `=` $input_order^)?
     `target_sg_layout` `=` custom<DynamicIndexList>($target_sg_layout, $static_target_sg_layout)
     `target_sg_data` `=` custom<DynamicIndexList>($target_sg_data, $static_target_sg_data)
     (`target_inst_data` `=` custom<DynamicIndexList>($target_inst_data, $static_target_inst_data)^)?
+    (`target_order` `=` $target_order^)?
     attr-dict `:` functional-type(operands, results)
   }];
 
diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
index 58b103614454c..649faf57026e9 100644
--- a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
@@ -123,17 +123,16 @@ static std::optional<T> findProducerOfType(Value val) {
 }
 
 /// Create a layout attribute from the given parameters.
-static xegpu::LayoutAttr
-createLayoutAttr(MLIRContext *ctx, ArrayRef<int32_t> sgLayout,
-                 ArrayRef<int32_t> sgData,
-                 std::optional<ArrayRef<int32_t>> instData) {
+static xegpu::LayoutAttr createLayoutAttr(
+    MLIRContext *ctx, ArrayRef<int32_t> sgLayout, ArrayRef<int32_t> sgData,
+    std::optional<ArrayRef<int32_t>> instData, ArrayRef<int32_t> order) {
   return xegpu::LayoutAttr::get(
       ctx, DenseI32ArrayAttr::get(ctx, sgLayout),
       DenseI32ArrayAttr::get(ctx, sgData),
       instData ? DenseI32ArrayAttr::get(ctx, instData.value()) : nullptr,
       /*lane_layout=*/nullptr,
       /*lane_data=*/nullptr,
-      /*order=*/nullptr);
+      /*order=*/order.empty() ? nullptr : DenseI32ArrayAttr::get(ctx, order));
 }
 
 /// Generate `xegpu::LayoutAttr` from op mixed layout values.
@@ -143,6 +142,7 @@ getLayoutAttrFromOperands(MLIRContext *ctx, transform::TransformState &state,
                           ArrayRef<::mlir::OpFoldResult> mixedSgLayout,
                           ArrayRef<::mlir::OpFoldResult> mixedSgData,
                           ArrayRef<::mlir::OpFoldResult> mixedInstData,
+                          ArrayRef<int32_t> order,
                           xegpu::LayoutAttr &layoutAttr) {
   SmallVector<int32_t> sgLayout, sgData, instData;
   auto status =
@@ -161,7 +161,7 @@ getLayoutAttrFromOperands(MLIRContext *ctx, transform::TransformState &state,
                            ? std::nullopt
                            : std::optional<ArrayRef<int32_t>>(instData);
 
-  layoutAttr = createLayoutAttr(ctx, sgLayout, sgData, maybeInstData);
+  layoutAttr = createLayoutAttr(ctx, sgLayout, sgData, maybeInstData, order);
 
   return DiagnosedSilenceableFailure::success();
 }
@@ -216,8 +216,8 @@ void transform::SetDescLayoutOp::build(OpBuilder &builder,
                                        ArrayRef<OpFoldResult> mixedSgLayout,
                                        ArrayRef<OpFoldResult> mixedSgData,
                                        ArrayRef<OpFoldResult> mixedInstData,
-                                       ArrayRef<int64_t> sliceDims,
-                                       ArrayRef<int32_t> order) {
+                                       ArrayRef<int32_t> order,
+                                       ArrayRef<int64_t> sliceDims) {
   SmallVector<int64_t> staticSgLayout, staticSgData, staticInstData;
   SmallVector<Value> dynamicSgLayout, dynamicSgData, dynamicInstData;
   dispatchIndexOpFoldResults(mixedSgLayout, dynamicSgLayout, staticSgLayout);
@@ -231,8 +231,8 @@ void transform::SetDescLayoutOp::build(OpBuilder &builder,
         /*static_sg_layout=*/staticSgLayout,
         /*static_sg_data=*/staticSgData,
         /*static_inst_data=*/staticInstData,
-        /*slice_dims=*/sliceDims,
-        /*order=*/order);
+        /*order=*/order,
+        /*slice_dims=*/sliceDims);
 }
 
 DiagnosedSilenceableFailure
@@ -247,18 +247,12 @@ transform::SetDescLayoutOp::apply(transform::TransformRewriter &rewriter,
   Operation *target = *targetOps.begin();
 
   xegpu::LayoutAttr layoutAttr = nullptr;
-  auto status = getLayoutAttrFromOperands(getContext(), state, (*this),
-                                          getMixedSgLayout(), getMixedSgData(),
-                                          getMixedInstData(), layoutAttr);
+  auto status = getLayoutAttrFromOperands(
+      getContext(), state, (*this), getMixedSgLayout(), getMixedSgData(),
+      getMixedInstData(), getOrder(), layoutAttr);
   if (!status.succeeded())
     return status;
 
-  // If order is provided, clone the layout with the provided order.
-  auto order = getOrder();
-  if (order.size() > 0)
-    layoutAttr =
-        layoutAttr.cloneWithOrder(DenseI32ArrayAttr::get(getContext(), order));
-
   xegpu::DistributeLayoutAttr layout = layoutAttr;
   auto sliceDims = getSliceDims();
   if (sliceDims.size() > 0) {
@@ -299,8 +293,8 @@ void transform::SetDescLayoutOp::getEffects(
 void transform::SetOpLayoutAttrOp::build(
     OpBuilder &builder, OperationState &ostate, Value target, int64_t index,
     ArrayRef<OpFoldResult> mixedSgLayout, ArrayRef<OpFoldResult> mixedSgData,
-    ArrayRef<OpFoldResult> mixedInstData, ArrayRef<int64_t> sliceDims,
-    ArrayRef<int32_t> order, bool result, bool operand) {
+    ArrayRef<OpFoldResult> mixedInstData, ArrayRef<int32_t> order,
+    ArrayRef<int64_t> sliceDims, bool result, bool operand) {
   SmallVector<int64_t> staticSgLayout, staticSgData, staticInstData;
   SmallVector<Value> dynamicSgLayout, dynamicSgData, dynamicInstData;
   dispatchIndexOpFoldResults(mixedSgLayout, dynamicSgLayout, staticSgLayout);
@@ -315,8 +309,8 @@ void transform::SetOpLayoutAttrOp::build(
         /*static_sg_layout=*/staticSgLayout,
         /*static_sg_data=*/staticSgData,
         /*static_inst_data=*/staticInstData,
-        /*slice_dims=*/sliceDims,
         /*order=*/order,
+        /*slice_dims=*/sliceDims,
         /*result=*/result,
         /*operand=*/operand);
 }
@@ -346,18 +340,12 @@ transform::SetOpLayoutAttrOp::apply(transform::TransformRewriter &rewriter,
   }
 
   xegpu::LayoutAttr layoutAttr = nullptr;
-  auto status = getLayoutAttrFromOperands(getContext(), state, (*this),
-                                          getMixedSgLayout(), getMixedSgData(),
-                                          getMixedInstData(), layoutAttr);
+  auto status = getLayoutAttrFromOperands(
+      getContext(), state, (*this), getMixedSgLayout(), getMixedSgData(),
+      getMixedInstData(), getOrder(), layoutAttr);
   if (!status.succeeded())
     return status;
 
-  // If order is provided, clone the layout with the provided order.
-  auto order = getOrder();
-  if (order.size() > 0)
-    layoutAttr =
-        layoutAttr.cloneWithOrder(DenseI32ArrayAttr::get(getContext(), order));
-
   xegpu::DistributeLayoutAttr layout = layoutAttr;
   auto sliceDims = getSliceDims();
   if (sliceDims.size() > 0) {
@@ -616,10 +604,10 @@ void transform::ConvertLayoutOp::build(
     OpBuilder &builder, OperationState &ostate, Value target,
     ArrayRef<OpFoldResult> mixedInputSgLayout,
     ArrayRef<OpFoldResult> mixedInputSgData,
-    ArrayRef<OpFoldResult> mixedInputInstData,
+    ArrayRef<OpFoldResult> mixedInputInstData, ArrayRef<int32_t> inputOrder,
     ArrayRef<OpFoldResult> mixedTargetSgLayout,
     ArrayRef<OpFoldResult> mixedTargetSgData,
-    ArrayRef<OpFoldResult> mixedTargetInstData) {
+    ArrayRef<OpFoldResult> mixedTargetInstData, ArrayRef<int32_t> targetOrder) {
   SmallVector<int64_t> staticInputSgLayout, staticInputSgData,
       staticInputInstData;
   SmallVector<Value> dynamicInputSgLayout, dynamicInputSgData,
@@ -648,12 +636,14 @@ void transform::ConvertLayoutOp::build(
         /*target_sg_layout=*/dynamicTargetSgLayout,
         /*target_sg_data=*/dynamicTargetSgData,
         /*target_inst_data=*/dynamicTargetInstData,
+        /*input_order=*/inputOrder,
         /*static_input_sg_layout=*/staticInputSgLayout,
         /*static_input_sg_data=*/staticInputSgData,
         /*static_input_inst_data=*/staticInputInstData,
         /*static_target_sg_layout=*/staticTargetSgLayout,
         /*static_target_sg_data=*/staticTargetSgData,
-        /*static_target_inst_data=*/staticTargetInstData);
+        /*static_target_inst_data=*/staticTargetInstData,
+        /*target_order=*/targetOrder);
 }
 
 DiagnosedSilenceableFailure
@@ -671,14 +661,16 @@ transform::ConvertLayoutOp::apply(transform::TransformRewriter &rewriter,
   xegpu::LayoutAttr inputLayoutAttr = nullptr;
   auto status = getLayoutAttrFromOperands(
       getContext(), state, (*this), getMixedInputSgLayout(),
-      getMixedInputSgData(), getMixedInputInstData(), inputLayoutAttr);
+      getMixedInputSgData(), getMixedInputInstData(), getInputOrder(),
+      inputLayoutAttr);
   if (!status.succeeded())
     return status;
 
   xegpu::LayoutAttr targetLayoutAttr = nullptr;
   status = getLayoutAttrFromOperands(
       getContext(), state, (*this), getMixedTargetSgLayout(),
-      getMixedTargetSgData(), getMixedTargetInstData(), targetLayoutAttr);
+      getMixedTargetSgData(), getMixedTargetInstData(), getTargetOrder(),
+      targetLayoutAttr);
   if (!status.succeeded())
     return status;
 
diff --git a/mlir/python/mlir/dialects/transform/xegpu.py b/mlir/python/mlir/dialects/transform/xegpu.py
index 03a5239dceff1..782c9a3f242a0 100644
--- a/mlir/python/mlir/dialects/transform/xegpu.py
+++ b/mlir/python/mlir/dialects/transform/xegpu.py
@@ -62,8 +62,8 @@ def __init__(
         sg_data: MixedValues,
         *,
         inst_data: Optional[MixedValues] = None,
-        slice_dims: Optional[MixedInt] = None,
         order: Optional[MixedInt] = None,
+        slice_dims: Optional[MixedInt] = None,
         loc=None,
         ip=None,
     ):
@@ -94,8 +94,8 @@ def __init__(
             static_sg_layout=static_sg_layout,
             static_sg_data=static_sg_data,
             static_inst_data=static_inst_data,
-            slice_dims=slice_dims,
             order=order,
+            slice_dims=slice_dims,
             loc=loc,
             ip=ip,
         )
@@ -107,8 +107,8 @@ def set_desc_layout(
     sg_data: MixedValues,
     *,
     inst_data: Optional[MixedValues] = None,
-    slice_dims: Optional[MixedInt] = None,
     order: Optional[MixedInt] = None,
+    slice_dims: Optional[MixedInt] = None,
     loc=None,
     ip=None,
 ) -> OpResult:
@@ -117,8 +117,8 @@ def set_desc_layout(
         sg_layout,
         sg_data,
         inst_data=inst_data,
-        slice_dims=slice_dims,
         order=order,
+        slice_dims=slice_dims,
         loc=loc,
         ip=ip,
     ).result
@@ -135,8 +135,8 @@ def __init__(
         sg_data: MixedValues,
         *,
         inst_data: Optional[MixedValues] = None,
-        slice_dims: Optional[MixedInt] = None,
         order: Optional[MixedInt] = None,
+        slice_dims: Optional[MixedInt] = None,
         index: Optional[Union[int, Attribute]] = None,
         result: Optional[Union[bool, Attribute]] = None,
         operand: Optional[Union[bool, Attribute]] = None,
@@ -167,8 +167,8 @@ def __init__(
             static_sg_layout=static_sg_layout,
             static_sg_data=static_sg_data,
             static_inst_data=static_inst_data,
-            slice_dims=slice_dims,
             order=order,
+            slice_dims=slice_dims,
             index=index,
             result=result,
             operand=operand,
@@ -183,8 +183,8 @@ def set_op_layout_attr(
     sg_data: MixedValues,
     *,
     inst_data: Optional[MixedValues] = None,
-    slice_dims: Optional[MixedInt] = None,
     order: Optional[MixedInt] = None,
+    slice_dims: Optional[MixedInt] = None,
     index: Optional[Union[int, Attribute]] = None,
     result: Optional[Union[bool, Attribute]] = None,
     operand: Optional[Union[bool, Attribute]] = None,
@@ -196,8 +196,8 @@ def set_op_layout_attr(
         sg_layout,
         sg_data,
         inst_data=inst_data,
-        slice_dims=slice_dims,
         order=order,
+        slice_dims=slice_dims,
         index=index,
         result=result,
         operand=operand,
@@ -298,6 +298,8 @@ def __init__(
         *,
         input_inst_data: Optional[MixedValues] = None,
         target_inst_data: Optional[MixedValues] = None,
+        input_order: Optional[MixedInt] = None,
+        target_order: Optional[MixedInt] = None,
         loc=None,
         ip=None,
     ):
@@ -342,12 +344,14 @@ def __init__(
             dynamic_target_sg_layout,
             dynamic_target_sg_data,
             dynamic_target_inst_data,
+            input_order=input_order,
             static_input_sg_layout=static_input_sg_layout,
             static_input_sg_data=static_input_sg_data,
             static_input_inst_data=static_input_inst_data,
             static_target_sg_layout=static_target_sg_layout,
             static_target_sg_data=static_target_sg_data,
             static_target_inst_data=static_target_inst_data,
+            target_order=target_order,
             loc=loc,
             ip=ip,
         )
@@ -362,6 +366,8 @@ def convert_layout(
     *,
     input_inst_data: Optional[MixedValues] = None,
     target_inst_data: Optional[MixedValues] = None,
+    input_order: Optional[MixedInt] = None,
+    target_order: Optional[MixedInt] = None,
     loc=None,
     ip=None,
 ) -> ConvertLayoutOp:
@@ -373,6 +379,8 @@ def convert_layout(
         target_sg_data,
         input_inst_data=input_inst_data,
         target_inst_data=target_inst_data,
+        input_order=input_order,
+        target_order=target_order,
         loc=loc,
         ip=ip,
     ).result
diff --git a/mlir/test/Dialect/XeGPU/transform-ops.mlir b/mlir/test/Dialect/XeGPU/transform-ops.mlir
index 5b2dc47246463..5bb1ab708e301 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops.mlir
@@ -575,12 +575,12 @@ module attributes {transform.with_named_sequence} {
 func.func @convert_layout_a(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
   %c0 = arith.constant 0 : index
   // CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0
-  %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], inst_data = [32, 16]>>
+  %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], inst_data = [32, 16], order = [1, 0]>>
   // CHECK: %[[V1:.+]] = xegpu.load_nd %[[V0]]
-  %1 = xegpu.load_nd %0[%c0, %c0]  : !xegpu.tensor_desc<256x32xf16, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], inst_data = [32, 16]>> -> vector<256x32xf16>
+  %1 = xegpu.load_nd %0[%c0, %c0]  : !xegpu.tensor_desc<256x32xf16, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], inst_data = [32, 16], order = [1, 0]>> -> vector<256x32xf16>
   // CHECK: %[[V2:.+]] = xegpu.convert_layout %[[V1]]
-  // CHECK: input_layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], inst_data = [32, 16]>
-  // CHECK: target_layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], inst_data = [8, 16]>
+  // CHECK: input_layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], inst_data = [32, 16], order = [1, 0]>
+  // CHECK: target_layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], inst_data = [8, 16], order = [1, 0]>
   %2 = xegpu.create_nd_tdesc %arg1 : memref<4096x4096xf16> -> !xegpu.tensor_desc<32x256xf16>
   %3 = xegpu.load_nd %2[%c0, %c0]  : !xegpu.tensor_desc<32x256xf16> -> vector<32x256xf16>
   %4 = xegpu.create_nd_tdesc %arg2 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x256xf16>
@@ -596,8 +596,8 @@ module attributes {transform.with_named_sequence} {
     %1 = transform.get_operand %0[0] : (!transform.any_op) -> !transform.any_value
     // CHECK: transform.xegpu.convert_layout %{{.*}}
     transform.xegpu.convert_layout %1
-      input_sg_layout = [8, 4] input_sg_data = [32, 32] input_inst_data = [32, 16]
-      target_sg_layout = [8, 4] target_sg_data = [32, 32] target_inst_data = [8, 16]
+      input_sg_layout = [8, 4] input_sg_data = [32, 32] input_inst_data = [32, 16] input_order = [1, 0]
+      target_sg_layout = [8, 4] target_sg_data = [32, 32] target_inst_data = [8, 16] target_order = [1, 0]
       : (!transform.any_value) -> !transform.any_op
     transform.yield
   }
diff --git a/mlir/test/python/dialects/transform_xegpu_ext.py b/mlir/test/python/dialects/transform_xegpu_ext.py
index 99e9faa40d3b8..346e68eca9201 100644
--- a/mlir/test/python/dialects/transform_xegpu_ext.py
+++ b/mlir/test/python/dialects/transform_xegpu_ext.py
@@ -356,9 +356,11 @@ def ConvertLayout():
             input_sg_layout=[6, 4],
             input_sg_data=[32, 32],
             input_inst_data=[32, 16],
+            input_order=[1, 0],
             target_sg_layout=[6, 4],
             target_sg_data=[32, 32],
             target_inst_data=[8, 16],
+            target_order=[0, 1],
         )
         transform.YieldOp()
     # CHECK-LABEL: TEST: ConvertLayout
@@ -366,6 +368,8 @@ def ConvertLayout():
     # CHECK: input_sg_layout = [6, 4]
     # CHECK: input_sg_data = [32, 32]
     # CHECK: input_inst_data = [32, 16]
+    # CHECK: input_order = [1, 0]
     # CHECK: target_sg_layout = [6, 4]
     # CHECK: target_sg_data = [32, 32]
     # CHECK: target_inst_data = [8, 16]
+    # CHECK: target_order = [0, 1]

>From f148bc8509c467a7b7d1bc2e7b5aa79e5c1811d3 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Fri, 6 Mar 2026 22:51:02 +0000
Subject: [PATCH 5/5] address comments

---
 mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
index 649faf57026e9..39f9ae0bf1287 100644
--- a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
@@ -12,7 +12,6 @@
 #include "mlir/Dialect/SCF/Utils/Utils.h"
 #include "mlir/Dialect/XeGPU/IR/XeGPU.h"
 #include "mlir/Dialect/XeGPU/Utils/XeGPUUtils.h"
-#include "mlir/IR/BuiltinAttributes.h"
 #include "llvm/ADT/SmallVectorExtras.h"
 
 #include <optional>



More information about the Mlir-commits mailing list