[Mlir-commits] [mlir] [MLIR][XeGPU] Add sg layout propagation (PR #170879)
Artem Kroviakov
llvmlistbot at llvm.org
Tue Dec 16 08:16:01 PST 2025
https://github.com/akroviakov updated https://github.com/llvm/llvm-project/pull/170879
>From 98ad0818366dca893ef4b096e17934386338cc9a Mon Sep 17 00:00:00 2001
From: Artem Kroviakov <artem.kroviakov at intel.com>
Date: Fri, 5 Dec 2025 16:08:04 +0000
Subject: [PATCH 1/2] [MLIR][XeGPU] Add sg layout propagation
---
.../mlir/Dialect/XeGPU/Transforms/Passes.td | 6 +-
.../XeGPU/Transforms/XeGPUPropagateLayout.cpp | 59 +++++++++++++++++--
.../Dialect/XeGPU/propagate-layout-sg.mlir | 53 +++++++++++++++++
3 files changed, 112 insertions(+), 6 deletions(-)
create mode 100644 mlir/test/Dialect/XeGPU/propagate-layout-sg.mlir
diff --git a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
index 0ca58426ecfcb..c682e6fdad1df 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
@@ -50,6 +50,10 @@ def XeGPUPropagateLayout : Pass<"xegpu-propagate-layout"> {
- `lane`
Propagate the `lane_layout` and `lane_data` fields of the layout attribute.
Default values are selected to align with hardware.
+
+ - `sg`
+ Propagate the `sg_layout` and `sg_data` fields of the layout attribute.
+ Default values are selected to align with hardware.
}];
let dependentDialects = ["memref::MemRefDialect", "xegpu::XeGPUDialect",
"vector::VectorDialect"];
@@ -60,7 +64,7 @@ def XeGPUPropagateLayout : Pass<"xegpu-propagate-layout"> {
Option<
"layoutKind", "layout-kind", "std::string",
/*default=*/"\"lane\"",
- "Propagate `inst` / `lane` level of xegpu layouts.">
+ "Propagate `sg` / `inst` / `lane` level of xegpu layouts.">
];
}
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
index dc9eb96c169b4..a3d057981024d 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
@@ -53,7 +53,7 @@ using namespace mlir::dataflow;
namespace {
-enum class LayoutKind { Lane, InstData };
+enum class LayoutKind { Lane, InstData, Subgroup };
//===----------------------------------------------------------------------===//
// LayoutInfo
@@ -109,6 +109,12 @@ struct LayoutInfo {
SmallVector<int> getInstData() const;
+ SmallVector<int> getSgLayout() const;
+
+ SmallVector<int> getSgData() const;
+
+ SmallVector<int> getOrder() const;
+
bool isSliceLayout() const {
if (!isAssigned())
return false;
@@ -127,8 +133,6 @@ struct LayoutInfo {
SmallVector<int> LayoutInfo::getLaneLayout() const {
if (!isAssigned())
return {};
- assert(storage.getEffectiveLaneLayoutAsInt().size() &&
- "Expected lane layout to be assigned");
return llvm::map_to_vector(storage.getEffectiveLaneLayoutAsInt(),
[](int64_t val) { return static_cast<int>(val); });
}
@@ -136,8 +140,6 @@ SmallVector<int> LayoutInfo::getLaneLayout() const {
SmallVector<int> LayoutInfo::getLaneData() const {
if (!isAssigned())
return {};
- assert(storage.getEffectiveLaneDataAsInt().size() &&
- "Expected lane data to be assigned");
return llvm::map_to_vector(storage.getEffectiveLaneDataAsInt(),
[](int64_t val) { return static_cast<int>(val); });
}
@@ -149,6 +151,27 @@ SmallVector<int> LayoutInfo::getInstData() const {
[](int64_t val) { return static_cast<int>(val); });
}
+SmallVector<int> LayoutInfo::getSgLayout() const {
+ if (!isAssigned())
+ return {};
+ return llvm::map_to_vector(storage.getEffectiveSgLayoutAsInt(),
+ [](int64_t val) { return static_cast<int>(val); });
+}
+
+SmallVector<int> LayoutInfo::getSgData() const {
+ if (!isAssigned())
+ return {};
+ return llvm::map_to_vector(storage.getEffectiveSgDataAsInt(),
+ [](int64_t val) { return static_cast<int>(val); });
+}
+
+SmallVector<int> LayoutInfo::getOrder() const {
+ if (!isAssigned() || !storage.getOrder())
+ return {};
+ return llvm::map_to_vector(storage.getOrder().asArrayRef(),
+ [](int64_t val) { return static_cast<int>(val); });
+}
+
void LayoutInfo::print(raw_ostream &os) const {
if (isAssigned()) {
os << storage;
@@ -188,6 +211,10 @@ LayoutInfo LayoutInfo::transpose(ArrayRef<int64_t> permutation) const {
SmallVector<int32_t> laneLayout;
SmallVector<int32_t> laneData;
SmallVector<int32_t> instData;
+ SmallVector<int32_t> sgLayout;
+ SmallVector<int32_t> sgData;
+ SmallVector<int32_t> order;
+
for (int64_t idx : permutation) {
if (getLaneLayout().size()) {
laneLayout.push_back(static_cast<int32_t>(getLaneLayout()[idx]));
@@ -195,13 +222,30 @@ LayoutInfo LayoutInfo::transpose(ArrayRef<int64_t> permutation) const {
}
if (getInstData().size())
instData.push_back(static_cast<int32_t>(getInstData()[idx]));
+ if (getSgData().size()) {
+ sgLayout.push_back(static_cast<int32_t>(getSgLayout()[idx]));
+ sgData.push_back(static_cast<int32_t>(getSgData()[idx]));
+ }
+ if (getOrder().size()) {
+ order.push_back(static_cast<int32_t>(getOrder()[idx]));
+ }
}
+ auto orderAttr = order.size()
+ ? DenseI32ArrayAttr::get(storage.getContext(), order)
+ : nullptr;
xegpu::LayoutAttr layoutAttr;
if (getLaneLayout().size())
layoutAttr =
xegpu::LayoutAttr::get(storage.getContext(), laneLayout, laneData);
if (getInstData().size())
layoutAttr = xegpu::LayoutAttr::get(storage.getContext(), instData);
+ if (getSgData().size())
+ layoutAttr = xegpu::LayoutAttr::get(
+ storage.getContext(),
+ DenseI32ArrayAttr::get(storage.getContext(), sgLayout),
+ DenseI32ArrayAttr::get(storage.getContext(), sgData),
+ /*inst_data =*/nullptr, /*lane_layout =*/nullptr,
+ /*lane_data =*/nullptr, orderAttr);
return LayoutInfo(layoutAttr);
}
@@ -487,6 +531,9 @@ bool LayoutInfoPropagation::hasParamsOfLayoutKind(
} else if (layoutKind == LayoutKind::Lane) {
return !(anchorLayout.getEffectiveLaneLayoutAsInt().empty() ||
anchorLayout.getEffectiveLaneDataAsInt().empty());
+ } else if (layoutKind == LayoutKind::Subgroup) {
+ return !(anchorLayout.getEffectiveSgLayoutAsInt().empty() ||
+ anchorLayout.getEffectiveSgDataAsInt().empty());
}
return false;
}
@@ -1311,6 +1358,8 @@ void XeGPUPropagateLayoutPass::runOnOperation() {
layoutKind = LayoutKind::Lane;
} else if (this->layoutKind == "inst") {
layoutKind = LayoutKind::InstData;
+ } else if (this->layoutKind == "sg") {
+ layoutKind = LayoutKind::Subgroup;
} else {
getOperation()->emitError("Unsupported layout kind option: " +
this->layoutKind);
diff --git a/mlir/test/Dialect/XeGPU/propagate-layout-sg.mlir b/mlir/test/Dialect/XeGPU/propagate-layout-sg.mlir
new file mode 100644
index 0000000000000..5659e9995b22a
--- /dev/null
+++ b/mlir/test/Dialect/XeGPU/propagate-layout-sg.mlir
@@ -0,0 +1,53 @@
+// RUN: mlir-opt -xevm-attach-target='chip=pvc' -xegpu-propagate-layout="layout-kind=sg" -split-input-file %s | FileCheck %s
+
+gpu.module @test {
+ // CHECK-LABEL: store_nd
+ // CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
+ func.func @store_nd(%src: memref<256x128xf32>) {
+ // CHECK: %[[TDESC:.*]] = xegpu.create_nd_tdesc %[[ARG_0]] : memref<256x128xf32>
+ // CHECK-SAME: -> !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>>
+ // CHECK: %[[LOAD:.*]] = xegpu.load_nd %[[TDESC]] <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>}>
+ // CHECK-SAME: {layout_result_0 = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>}
+ // CHECK-SAME: : !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>>
+ // CHECK-SAME: -> vector<256x128xf32>
+ // CHECK: xegpu.store_nd %[[LOAD]], %[[TDESC]] <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>}>
+ // CHECK-SAME: : vector<256x128xf32>, !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>>
+ %tdesc = xegpu.create_nd_tdesc %src : memref<256x128xf32> -> !xegpu.tensor_desc<256x128xf32>
+ %load = xegpu.load_nd %tdesc : !xegpu.tensor_desc<256x128xf32> -> vector<256x128xf32>
+ xegpu.store_nd %load, %tdesc {layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>}
+ : vector<256x128xf32>, !xegpu.tensor_desc<256x128xf32>
+ return
+ }
+}
+
+// -----
+
+gpu.module @test {
+ // CHECK-LABEL: vector_transpose
+ // CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
+ // CHECK-SAME: %[[ARG_1:.*]]: memref<128x256xf32>
+ func.func @vector_transpose(%src: memref<256x128xf32>, %src1: memref<128x256xf32>) {
+ // CHECK: %[[TDESC_LD:.*]] = xegpu.create_nd_tdesc %[[ARG_0]] : memref<256x128xf32> ->
+ // CHECK-SAME: !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [4, 8], sg_data = [64, 32], order = [0, 1]>>
+ // CHECK: %[[TDESC_ST:.*]] = xegpu.create_nd_tdesc %[[ARG_1]] : memref<128x256xf32> ->
+ // CHECK-SAME: !xegpu.tensor_desc<128x256xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], order = [1, 0]>>
+
+ // CHECK: %[[LOAD:.*]] = xegpu.load_nd %[[TDESC_LD]][0, 0] <{layout = #xegpu.layout<sg_layout = [4, 8], sg_data = [64, 32], order = [0, 1]>}>
+ // CHECK-SAME: {layout_result_0 = #xegpu.layout<sg_layout = [4, 8], sg_data = [64, 32], order = [0, 1]>} :
+ // CHECK-SAME: !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [4, 8], sg_data = [64, 32], order = [0, 1]>> -> vector<256x128xf32>
+
+ // CHECK: %[[TRANSPOSED:.*]] = vector.transpose %2, [1, 0]
+ // CHECK-SAME {layout_result_0 = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], order = [1, 0]>} : vector<256x128xf32> to vector<128x256xf32>
+
+ // CHECK: xegpu.store_nd %[[TRANSPOSED]], %[[TDESC_ST]][0, 0]
+ // CHECK-SAME: <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], order = [1, 0]>}> : vector<128x256xf32>,
+ // CHECK-SAME: !xegpu.tensor_desc<128x256xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], order = [1, 0]>>
+ %tdesc = xegpu.create_nd_tdesc %src : memref<256x128xf32> -> !xegpu.tensor_desc<256x128xf32>
+ %tdesc1 = xegpu.create_nd_tdesc %src1 : memref<128x256xf32> -> !xegpu.tensor_desc<128x256xf32>
+ %load = xegpu.load_nd %tdesc[0, 0] : !xegpu.tensor_desc<256x128xf32> -> vector<256x128xf32>
+ %trans = vector.transpose %load, [1, 0] : vector<256x128xf32> to vector<128x256xf32>
+ xegpu.store_nd %trans, %tdesc1[0, 0] {layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], order = [1, 0]>}
+ : vector<128x256xf32>, !xegpu.tensor_desc<128x256xf32>
+ return
+ }
+}
>From 353c945276f683a2c3f63d64dd1373f8bbc5004f Mon Sep 17 00:00:00 2001
From: Artem Kroviakov <artem.kroviakov at intel.com>
Date: Tue, 16 Dec 2025 16:15:40 +0000
Subject: [PATCH 2/2] Rename subgroup prop option
---
mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td | 4 ++--
mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp | 2 +-
...pagate-layout-sg.mlir => propagate-layout-subgroup.mlir} | 6 +++---
3 files changed, 6 insertions(+), 6 deletions(-)
rename mlir/test/Dialect/XeGPU/{propagate-layout-sg.mlir => propagate-layout-subgroup.mlir} (92%)
diff --git a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
index c682e6fdad1df..3ff7805263f0e 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
@@ -51,7 +51,7 @@ def XeGPUPropagateLayout : Pass<"xegpu-propagate-layout"> {
Propagate the `lane_layout` and `lane_data` fields of the layout attribute.
Default values are selected to align with hardware.
- - `sg`
+ - `subgroup`
Propagate the `sg_layout` and `sg_data` fields of the layout attribute.
Default values are selected to align with hardware.
}];
@@ -64,7 +64,7 @@ def XeGPUPropagateLayout : Pass<"xegpu-propagate-layout"> {
Option<
"layoutKind", "layout-kind", "std::string",
/*default=*/"\"lane\"",
- "Propagate `sg` / `inst` / `lane` level of xegpu layouts.">
+ "Propagate `subgroup` / `inst` / `lane` level of xegpu layouts.">
];
}
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
index a3d057981024d..cbd91154ce0aa 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
@@ -1358,7 +1358,7 @@ void XeGPUPropagateLayoutPass::runOnOperation() {
layoutKind = LayoutKind::Lane;
} else if (this->layoutKind == "inst") {
layoutKind = LayoutKind::InstData;
- } else if (this->layoutKind == "sg") {
+ } else if (this->layoutKind == "subgroup") {
layoutKind = LayoutKind::Subgroup;
} else {
getOperation()->emitError("Unsupported layout kind option: " +
diff --git a/mlir/test/Dialect/XeGPU/propagate-layout-sg.mlir b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
similarity index 92%
rename from mlir/test/Dialect/XeGPU/propagate-layout-sg.mlir
rename to mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
index 5659e9995b22a..c7dfc9fb7b1f1 100644
--- a/mlir/test/Dialect/XeGPU/propagate-layout-sg.mlir
+++ b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -xevm-attach-target='chip=pvc' -xegpu-propagate-layout="layout-kind=sg" -split-input-file %s | FileCheck %s
+// RUN: mlir-opt -xevm-attach-target='chip=pvc' -xegpu-propagate-layout="layout-kind=subgroup" -split-input-file %s | FileCheck %s
gpu.module @test {
// CHECK-LABEL: store_nd
@@ -14,7 +14,7 @@ gpu.module @test {
// CHECK-SAME: : vector<256x128xf32>, !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>>
%tdesc = xegpu.create_nd_tdesc %src : memref<256x128xf32> -> !xegpu.tensor_desc<256x128xf32>
%load = xegpu.load_nd %tdesc : !xegpu.tensor_desc<256x128xf32> -> vector<256x128xf32>
- xegpu.store_nd %load, %tdesc {layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>}
+ xegpu.store_nd %load, %tdesc <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>}>
: vector<256x128xf32>, !xegpu.tensor_desc<256x128xf32>
return
}
@@ -46,7 +46,7 @@ gpu.module @test {
%tdesc1 = xegpu.create_nd_tdesc %src1 : memref<128x256xf32> -> !xegpu.tensor_desc<128x256xf32>
%load = xegpu.load_nd %tdesc[0, 0] : !xegpu.tensor_desc<256x128xf32> -> vector<256x128xf32>
%trans = vector.transpose %load, [1, 0] : vector<256x128xf32> to vector<128x256xf32>
- xegpu.store_nd %trans, %tdesc1[0, 0] {layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], order = [1, 0]>}
+ xegpu.store_nd %trans, %tdesc1[0, 0] <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], order = [1, 0]>}>
: vector<128x256xf32>, !xegpu.tensor_desc<128x256xf32>
return
}
More information about the Mlir-commits
mailing list