[Mlir-commits] [mlir] [mlir][xegpu] Add support for accessing the default order of a layout. (PR #184451)
Charitha Saumya
llvmlistbot at llvm.org
Tue Mar 3 14:58:42 PST 2026
https://github.com/charithaintc created https://github.com/llvm/llvm-project/pull/184451
Currently, `getOrder` returns null if the user does not provide an `order` in xegpu layout. This behavior is undesirable when coupled with utility functions that work on top of layouts (like `isTransposeOf`). This PR introduce a `getEffectiveOrder` which always returns the true order, even if user decides to omit it.
>From 6ec9307dc6be5a2de7f5bb8931522fa90dae997b Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Tue, 3 Mar 2026 22:55:16 +0000
Subject: [PATCH] add fix
---
.../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 61 ++++++++++++++-----
.../XeGPU/subgroup-distribute-unit.mlir | 4 +-
.../Dialect/XeGPU/subgroup-distribute.mlir | 8 +--
3 files changed, 52 insertions(+), 21 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index 377967dfdb1e5..6f667f4801673 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -217,6 +217,9 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> {
InterfaceMethod<"Get the effective LaneData of the layout attribute as integer array",
"SmallVector<int64_t>",
"getEffectiveLaneDataAsInt">,
+ InterfaceMethod<"Get the effective order of the layout attribute as integer array",
+ "SmallVector<int64_t>",
+ "getEffectiveOrderAsInt">,
InterfaceMethod<"Derive a new layout by dropping sgLayout and sgData",
"xegpu::DistributeLayoutAttr",
"dropSgLayoutAndData">,
@@ -236,15 +239,15 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> {
"FailureOr<SmallVector<Value>>",
"delinearizeId",
(ins "OpBuilder &": $builder, "Location":$loc, "Value":$linearId)>,
- InterfaceMethod<[{Derive a new layout with sg_data, inst_data and lane_data set to the
- specified values for the given dimension. Passing -1 for any parameter
+ InterfaceMethod<[{Derive a new layout with sg_data, inst_data and lane_data set to the
+ specified values for the given dimension. Passing -1 for any parameter
preserves its original value.}],
"xegpu::DistributeLayoutAttr",
"setDimData",
(ins "int64_t": $dim,
"int64_t": $sgData,
"int64_t": $instData,
- "int64_t": $laneData)>,
+ "int64_t": $laneData)>,
InterfaceMethod<[{Derive a new layout by collapsing dimensions.
`dimGroup` specifies a group of adjacent dimensions that are collapsed into
a single dimension in the derived layout.}],
@@ -297,13 +300,10 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> {
// Check laneData
if (!checkTranspose($_self.getEffectiveLaneDataAsInt(), other.getEffectiveLaneDataAsInt(), perm))
return false;
- // Check order if both sides have order field.
- if ($_self.getOrder() && other.getOrder()) {
- auto thisOrderAsInt = llvm::to_vector_of<int64_t>($_self.getOrder().asArrayRef());
- auto otherOrderAsInt = llvm::to_vector_of<int64_t>(other.getOrder().asArrayRef());
- if (!checkTranspose(thisOrderAsInt, otherOrderAsInt, perm))
- return false;
- }
+ // Check order
+ if (!checkTranspose($_self.getEffectiveOrderAsInt(), other.getEffectiveOrderAsInt(), perm))
+ return false;
+
return true;
}]>,
InterfaceMethod</*desc=*/[{Check if this layout is a slice of another layout.}],
@@ -484,7 +484,7 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> {
return !isForWorkgroup();
}
- int64_t getRank() {
+ int64_t getRank() const {
if (auto attr = getSgLayout())
return attr.size();
if (auto attr = getInstData())
@@ -540,14 +540,25 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> {
return {};
}
+ // Returns the order as integers. If order is not set, returns the default
+ // value [rank-1, ..., 0] (row-major, last dim is fastest).
+ SmallVector<int64_t> getEffectiveOrderAsInt() const {
+ if (DenseI32ArrayAttr order = getOrder())
+ return llvm::to_vector_of<int64_t>(order.asArrayRef());
+ int64_t rank = getRank();
+ SmallVector<int64_t> defaultOrder(rank);
+ std::iota(defaultOrder.rbegin(), defaultOrder.rend(), 0);
+ return defaultOrder;
+ }
+
//set the layout for the sepcified unit dims: sg_data, inst_data and lane_data to 1
DistributeLayoutAttr setUnitDimData(SmallVector<int64_t> unitDims) const;
//set the layout for the sepcified unit dims: sg_lane and lane_layout to 1
DistributeLayoutAttr setUnitDimLayout(SmallVector<int64_t> unitDims) const;
- // Derive a new layout with sg_data, inst_data and lane_data set to the
- // specified values for the given dimension. Passing -1 for any parameter
+ // Derive a new layout with sg_data, inst_data and lane_data set to the
+ // specified values for the given dimension. Passing -1 for any parameter
// preserves its original value.
DistributeLayoutAttr setDimData(int64_t dim, int64_t sgData, int64_t instData, int64_t laneData);
@@ -702,6 +713,26 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> {
return {};
}
+ /// Returns the effective order of the attribute. The parent's effective
+ /// order (or default [rank-1,...,0]) is filtered by removing entries that
+ /// correspond to sliced dimensions, then renumbered to form a valid
+ /// permutation over the remaining dimensions.
+ /// Example: parent order = [2, 0, 1, 3], slice dims = [0, 2] -> result = [1, 0]
+ SmallVector<int64_t> getEffectiveOrderAsInt() const {
+ SliceAttr attr = flatten();
+ auto parent = dyn_cast<LayoutAttr>(attr.getParent());
+ auto order = parent.getEffectiveOrderAsInt();
+ ArrayRef<int64_t> dims = attr.getDims().asArrayRef();
+ SmallVector<int64_t> result;
+ for (int64_t d : order) {
+ if (llvm::is_contained(dims, d))
+ continue;
+ int64_t offset = llvm::count_if(dims, [&](int64_t s) { return s < d; });
+ result.push_back(d - offset);
+ }
+ return result;
+ }
+
SliceAttr dropSgLayoutAndData() const{
SliceAttr attr = flatten();
auto parent = dyn_cast<LayoutAttr>(attr.getParent());
@@ -726,8 +757,8 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> {
//set the layout for the sepcified unit dims: sg_lane and lane_layout to 1
DistributeLayoutAttr setUnitDimLayout(SmallVector<int64_t> unitDims) const;
- // Derive a new layout with sg_data, inst_data and lane_data set to the
- // specified values for the given dimension. Passing -1 for any parameter
+ // Derive a new layout with sg_data, inst_data and lane_data set to the
+ // specified values for the given dimension. Passing -1 for any parameter
// preserves its original value.
DistributeLayoutAttr setDimData(int64_t dim, int64_t sgData, int64_t instData, int64_t laneData);
diff --git a/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir b/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir
index 31bb6704eece9..d6cd6206d7f69 100644
--- a/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir
+++ b/mlir/test/Dialect/XeGPU/subgroup-distribute-unit.mlir
@@ -567,11 +567,11 @@ gpu.func @create_memdesc(%laneid: index, %arg0 : memref<2048xi8, 3>) {
gpu.func @vector_transpose(%laneid: index) {
%r = gpu.warp_execute_on_lane_0(%laneid)[16] -> (vector<2x1xf32>) {
%cst = "some_op"()
- {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>}
+ {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1], order = [0, 1]>}
: () -> (vector<16x2xf32>)
%transpose = vector.transpose %cst, [1, 0]
{
- layout_operand_0 = #xegpu.layout<lane_layout = [16 , 1], lane_data = [1, 1]>,
+ layout_operand_0 = #xegpu.layout<lane_layout = [16 , 1], lane_data = [1, 1], order = [0, 1]>,
layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>
}
: vector<16x2xf32> to vector<2x16xf32>
diff --git a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
index 5cf4ae64a0fd4..b8c92ec8d6f87 100644
--- a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
+++ b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
@@ -201,10 +201,10 @@ gpu.module @xevm_module{
%1 = xegpu.load_nd %0[%c0, %c0] {layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
: !xegpu.tensor_desc<8x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<8x16xf16>
%2 = xegpu.create_nd_tdesc %arg1 : memref<16x8xi32>
- -> !xegpu.tensor_desc<16x8xi32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>>
- %3 = xegpu.load_nd %2[%c0, %c0] {layout = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>}
- : !xegpu.tensor_desc<16x8xi32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>> -> vector<16x8xi32>
- %4 = vector.bitcast %3 {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2]>}
+ -> !xegpu.tensor_desc<16x8xi32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1], order = [0, 1]>>
+ %3 = xegpu.load_nd %2[%c0, %c0] {layout = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1], order = [0, 1]>}
+ : !xegpu.tensor_desc<16x8xi32, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1], order = [0, 1]>> -> vector<16x8xi32>
+ %4 = vector.bitcast %3 {layout_result_0 = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2], order = [0, 1]>}
: vector<16x8xi32> to vector<16x16xf16>
%5 = vector.transpose %4, [1, 0] {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>}
: vector<16x16xf16> to vector<16x16xf16>
More information about the Mlir-commits
mailing list