[Mlir-commits] [mlir] [mlir][xegpu] Add support for accessing the default order of a layout. (PR #184451)

Tue Mar 3 14:58:42 PST 2026

https://github.com/charithaintc created https://github.com/llvm/llvm-project/pull/184451

Currently, `getOrder` returns null if the user does not provide an `order` in xegpu layout. This behavior is undesirable when coupled with utility functions that work on top of layouts (like `isTransposeOf`). This PR introduce a `getEffectiveOrder` which always returns the true order, even if user decides to omit it. 

>From 6ec9307dc6be5a2de7f5bb8931522fa90dae997b Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Tue, 3 Mar 2026 22:55:16 +0000
Subject: [PATCH] add fix

---
 .../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td       | 61 ++++++++++++++-----
 .../XeGPU/subgroup-distribute-unit.mlir       |  4 +-
 .../Dialect/XeGPU/subgroup-distribute.mlir    |  8 +--
 3 files changed, 52 insertions(+), 21 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index 377967dfdb1e5..6f667f4801673 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -217,6 +217,9 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> {
     InterfaceMethod<"Get the effective LaneData of the layout attribute as integer array",
                     "SmallVector<int64_t>",
                     "getEffectiveLaneDataAsInt">,
+    InterfaceMethod<"Get the effective order of the layout attribute as integer array",
+                    "SmallVector<int64_t>",
+                    "getEffectiveOrderAsInt">,
     InterfaceMethod<"Derive a new layout by dropping sgLayout and sgData",
                     "xegpu::DistributeLayoutAttr",
                     "dropSgLayoutAndData">,
@@ -236,15 +239,15 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> {
                     "FailureOr<SmallVector<Value>>",
                     "delinearizeId",
                     (ins "OpBuilder &": $builder, "Location":$loc, "Value":$linearId)>,
-    InterfaceMethod<[{Derive a new layout with sg_data, inst_data and lane_data set to the 
-                      specified values for the given dimension. Passing -1 for any parameter 
+    InterfaceMethod<[{Derive a new layout with sg_data, inst_data and lane_data set to the
+                      specified values for the given dimension. Passing -1 for any parameter
                       preserves its original value.}],
                     "xegpu::DistributeLayoutAttr",
                     "setDimData",
                     (ins "int64_t": $dim,
                           "int64_t": $sgData,
                           "int64_t": $instData,
-                          "int64_t": $laneData)>,              
+                          "int64_t": $laneData)>,
     InterfaceMethod<[{Derive a new layout by collapsing dimensions.
                       `dimGroup` specifies a group of adjacent dimensions that are collapsed into
                        a single dimension in the derived layout.}],
@@ -297,13 +300,10 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> {
                       // Check laneData
                       if (!checkTranspose($_self.getEffectiveLaneDataAsInt(), other.getEffectiveLaneDataAsInt(), perm))
                         return false;
-                      // Check order if both sides have order field.
-                      if ($_self.getOrder() && other.getOrder()) {
-                        auto thisOrderAsInt = llvm::to_vector_of<int64_t>($_self.getOrder().asArrayRef());
-                        auto otherOrderAsInt = llvm::to_vector_of<int64_t>(other.getOrder().asArrayRef());
-                        if (!checkTranspose(thisOrderAsInt, otherOrderAsInt, perm))
-                          return false;
-                      }
+                      // Check order
+                      if (!checkTranspose($_self.getEffectiveOrderAsInt(), other.getEffectiveOrderAsInt(), perm))
+                        return false;
+
                       return true;
                     }]>,
     InterfaceMethod</*desc=*/[{Check if this layout is a slice of another layout.}],
@@ -484,7 +484,7 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> {
       return !isForWorkgroup();
     }
 
-    int64_t getRank() {
+    int64_t getRank() const {
       if (auto attr = getSgLayout())
         return attr.size();
       if (auto attr = getInstData())
@@ -540,14 +540,25 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> {
       return {};
     }
 
+    // Returns the order as integers. If order is not set, returns the default
+    // value [rank-1, ..., 0] (row-major, last dim is fastest).
+    SmallVector<int64_t> getEffectiveOrderAsInt() const {
+      if (DenseI32ArrayAttr order = getOrder())
+        return llvm::to_vector_of<int64_t>(order.asArrayRef());
+      int64_t rank = getRank();
+      SmallVector<int64_t> defaultOrder(rank);
+      std::iota(defaultOrder.rbegin(), defaultOrder.rend(), 0);
+      return defaultOrder;
+    }
+
     //set the layout for the sepcified unit dims: sg_data, inst_data and lane_data to 1
     DistributeLayoutAttr setUnitDimData(SmallVector<int64_t> unitDims) const;
 
     //set the layout for the sepcified unit dims: sg_lane and lane_layout to 1
     DistributeLayoutAttr setUnitDimLayout(SmallVector<int64_t> unitDims) const;
 
-    // Derive a new layout with sg_data, inst_data and lane_data set to the 
-    // specified values for the given dimension. Passing -1 for any parameter 
+    // Derive a new layout with sg_data, inst_data and lane_data set to the
+    // specified values for the given dimension. Passing -1 for any parameter
     // preserves its original value.
     DistributeLayoutAttr setDimData(int64_t dim, int64_t sgData, int64_t instData, int64_t laneData);
 
@@ -702,6 +713,26 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> {
       return {};
     }
 
+    /// Returns the effective order of the attribute. The parent's effective
+    /// order (or default [rank-1,...,0]) is filtered by removing entries that
+    /// correspond to sliced dimensions, then renumbered to form a valid
+    /// permutation over the remaining dimensions.
+    /// Example: parent order = [2, 0, 1, 3], slice dims = [0, 2] -> result = [1, 0]
+    SmallVector<int64_t> getEffectiveOrderAsInt() const {
+      SliceAttr attr = flatten();
+      auto parent = dyn_cast<LayoutAttr>(attr.getParent());
+      auto order = parent.getEffectiveOrderAsInt();
+      ArrayRef<int64_t> dims = attr.getDims().asArrayRef();
+      SmallVector<int64_t> result;
+      for (int64_t d : order) {
+        if (llvm::is_contained(dims, d))
+          continue;
+        int64_t offset = llvm::count_if(dims, [&](int64_t s) { return s < d; });
+        result.push_back(d - offset);
+      }
+      return result;
+    }
+
     SliceAttr dropSgLayoutAndData() const{
       SliceAttr attr = flatten();
       auto parent = dyn_cast<LayoutAttr>(attr.getParent());
@@ -726,8 +757,8 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> {
     //set the layout for the sepcified unit dims: sg_lane and lane_layout to 1
     DistributeLayoutAttr setUnitDimLayout(SmallVector<int64_t> unitDims) const;
 
-    // Derive a new layout with sg_data, inst_data and lane_data set to the 
-    // specified values for the given dimension. Passing -1 for any parameter 
+    // Derive a new layout with sg_data, inst_data and lane_data set to the
+    // specified values for the given dimension. Passing -1 for any parameter
     // preserves its original value.
     DistributeLayoutAttr setDimData(int64_t dim, int64_t sgData, int64_t instData, int64_t laneData);
 
diff --git a/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir b/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir
index 31bb6704eece9..d6cd6206d7f69 100644
--- a/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir
+++ b/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir
@@ -567,11 +567,11 @@ gpu.func @create_memdesc(%laneid: index, %arg0 : memref<2048xi8, 3>) {
 gpu.func @vector_transpose(%laneid: index) {
   %r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<2x1xf32>) {
     %cst = "some_op"()
-      {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>}
+      {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1], order = [0, 1]>}
       : () -> (vector<16x2xf32>)
     %transpose = vector.transpose %cst, [1, 0]
       {
-        layout_operand_0 = #xegpu.layout<lane_layout = [16 , 1], lane_data = [1, 1]>,
+        layout_operand_0 = #xegpu.layout<lane_layout = [16 , 1], lane_data = [1, 1], order = [0, 1]>,
         layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>
       }
       : vector<16x2xf32> to vector<2x16xf32>
diff --git a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
index 5cf4ae64a0fd4..b8c92ec8d6f87 100644
--- a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
+++ b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
@@ -201,10 +201,10 @@ gpu.module @xevm_module{
     %1 = xegpu.load_nd %0[%c0, %c0]  {layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
       : !xegpu.tensor_desc<8x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<8x16xf16>
     %2 = xegpu.create_nd_tdesc %arg1 : memref<16x8xi32>
-      -> !xegpu.tensor_desc<16x8xi32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>
-    %3 = xegpu.load_nd %2[%c0, %c0]  {layout = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>}
-      : !xegpu.tensor_desc<16x8xi32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>> -> vector<16x8xi32>
-    %4 = vector.bitcast %3 {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2]>}
+      -> !xegpu.tensor_desc<16x8xi32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1], order = [0, 1]>>
+    %3 = xegpu.load_nd %2[%c0, %c0]  {layout = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1], order = [0, 1]>}
+      : !xegpu.tensor_desc<16x8xi32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1], order = [0, 1]>> -> vector<16x8xi32>
+    %4 = vector.bitcast %3 {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2], order = [0, 1]>}
       : vector<16x8xi32> to vector<16x16xf16>
     %5 = vector.transpose %4, [1, 0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>}
       : vector<16x16xf16> to vector<16x16xf16>