[Mlir-commits] [mlir] f49a262 - [MLIR][XeGPU] TensorDesc Type support generic DistributeLayout instead of Layout (#190401)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Wed Apr 8 14:20:02 PDT 2026
Author: Jianhui Li
Date: 2026-04-08T14:19:57-07:00
New Revision: f49a262fa70cfe4fb83741cbd6a6e30886c560e9
URL: https://github.com/llvm/llvm-project/commit/f49a262fa70cfe4fb83741cbd6a6e30886c560e9
DIFF: https://github.com/llvm/llvm-project/commit/f49a262fa70cfe4fb83741cbd6a6e30886c560e9.diff
LOG: [MLIR][XeGPU] TensorDesc Type support generic DistributeLayout instead of Layout (#190401)
This PR allows TensorDesc to support slice layout, not just plain
layout.
Added:
Modified:
mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
mlir/lib/Dialect/XeGPU/Transforms/XeGPUPeepHoleOptimizer.cpp
mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
index 7e142b20c0894..b13f5a9f2c9d9 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
@@ -82,7 +82,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
static-dim-list ::= decimal-literal `x` decimal-literal
attr-list = (, encoding-attr)? (, layout-attr)?
enconding-attr = (, memory_space = value)? (, arr_len = value)? (, boundary_check = value)? (, scattered = value)?
- layout-attr = (, layout `<`sg_layout = value, sg_data = value, inst_data = value, lane_layout = value, lane_data = value, order = value`>`)?
+ layout-attr = DistributeLayoutAttr
```
Examples:
@@ -158,8 +158,8 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
return llvm::dyn_cast_if_present<T>(getEncoding());
}
- LayoutAttr getLayoutAttr() const {
- return llvm::dyn_cast_if_present<LayoutAttr>(getLayout());
+ DistributeLayoutAttr getLayoutAttr() const {
+ return llvm::dyn_cast_if_present<DistributeLayoutAttr>(getLayout());
}
xegpu::MemorySpace getMemorySpace() const {
diff --git a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
index 0aa2cd45088f3..1b594f17e15ec 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
+++ b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
@@ -219,10 +219,11 @@ void setTemporaryLayout(const T &operandOrResult,
/// Helper function to check if the layout is packed. Layout is packed if it is
/// 2D and lane_data[0] != 1 (data packed from col dimension).
/// TODO: Move to target info.
-bool requirePacked(const LayoutAttr layout);
+bool requirePacked(const DistributeLayoutAttr layout);
/// Helper function to check if the layout requires a transpose effect.
-bool requireTranspose(const LayoutAttr layout, const uArch::uArch *uArch);
+bool requireTranspose(const DistributeLayoutAttr layout,
+ const uArch::uArch *uArch);
// Check if dst shape is an expansion of src shape by inserting unit dimensions.
bool matchUnitDimExpansion(ArrayRef<int64_t> src, ArrayRef<int64_t> dst,
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 950371e17255f..64c56b5adf5d7 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -1318,7 +1318,7 @@ mlir::Type TensorDescType::parse(AsmParser &parser) {
mlir::Attribute attr;
ParseResult res = parser.parseAttribute(attr);
if (mlir::succeeded(res)) {
- if (mlir::isa<LayoutAttr>(attr)) {
+ if (mlir::isa<DistributeLayoutAttr>(attr)) {
layout = attr;
continue;
}
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
index 1ee0bc6ad9507..ef6a494b76638 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
@@ -270,12 +270,11 @@ void XeGPUBlockingPass::runOnOperation() {
}
auto getTileShapeAndCount = [](llvm::ArrayRef<int64_t> shape,
- xegpu::LayoutAttr layout) {
+ xegpu::DistributeLayoutAttr layout) {
int count = 1;
SmallVector<int64_t> tileShape(shape);
- if (layout && layout.getInstData()) {
- DenseI32ArrayAttr instData = layout.getInstData();
- tileShape = llvm::to_vector_of<int64_t>(instData.asArrayRef());
+ if (layout && !layout.getEffectiveInstDataAsInt().empty()) {
+ tileShape = layout.getEffectiveInstDataAsInt();
count = computeProduct(shape) / computeProduct(tileShape);
}
return std::make_pair(tileShape, count);
@@ -308,7 +307,7 @@ void XeGPUBlockingPass::runOnOperation() {
Type elemTy = type.getElementType();
ArrayRef<int64_t> shape = type.getShape();
- xegpu::LayoutAttr layout = type.getLayoutAttr();
+ xegpu::DistributeLayoutAttr layout = type.getLayoutAttr();
if (layout && layout.isForWorkgroup())
return failure();
@@ -348,9 +347,9 @@ void XeGPUBlockingPass::runOnOperation() {
if (chunkSize > 1) {
int64_t blockedChunkSize = chunkSize;
- auto instData = tdescTy.getLayoutAttr().getInstData();
+ auto instData = tdescTy.getLayoutAttr().getEffectiveInstDataAsInt();
if (!instData.empty())
- blockedChunkSize = instData.asArrayRef().back();
+ blockedChunkSize = instData.back();
// To create a new attribute with a
diff erent chunk_size:
auto newEncoding = xegpu::ScatterTensorDescAttr::get(
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPeepHoleOptimizer.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPeepHoleOptimizer.cpp
index 0c7977bb241df..3496756e8a6d3 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPeepHoleOptimizer.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPeepHoleOptimizer.cpp
@@ -145,10 +145,17 @@ static xegpu::TensorDescType tryOptimize(xegpu::TensorDescType tdescType,
return tdescType;
SmallVector<int64_t> supportedShape = {supportedHeight, supportedWidth};
+ auto ctx = tdescType.getContext();
+ auto origLayout = tdescType.getLayoutAttr();
+ auto laneLayoutI64 = origLayout.getEffectiveLaneLayoutAsInt();
+ SmallVector<int32_t> laneLayoutI32(laneLayoutI64.begin(),
+ laneLayoutI64.end());
+
xegpu::LayoutAttr newLayout = xegpu::LayoutAttr::get(
- tdescType.getContext(), tdescType.getLayoutAttr().getLaneLayout(),
- DenseI32ArrayAttr::get(tdescType.getContext(), {1, 1}),
- tdescType.getLayoutAttr().getOrder());
+ ctx, /*lane_layout=*/DenseI32ArrayAttr::get(ctx, laneLayoutI32),
+ /*lane_data=*/DenseI32ArrayAttr::get(ctx, {1, 1}),
+ /*order=*/origLayout.getOrder());
+
// Array length can not be larger than 1 for transpose case.
return xegpu::TensorDescType::get(supportedShape, newElemTy, arrayLen,
tdescType.getBoundaryCheck(),
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
index ecdf253d68182..d8ce24ddd5cb0 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
@@ -256,7 +256,7 @@ struct CreateNdDescDistribution final : public gpu::WarpDistributionPattern {
auto descOp = operand->get().getDefiningOp<xegpu::CreateNdDescOp>();
unsigned operandIdx = operand->getOperandNumber();
- xegpu::LayoutAttr layout = descOp.getType().getLayoutAttr();
+ xegpu::DistributeLayoutAttr layout = descOp.getType().getLayoutAttr();
if (!layout)
return rewriter.notifyMatchFailure(
descOp, "the tensor descriptor lacks layout attribute");
@@ -342,7 +342,7 @@ struct StoreNdDistribution final : public gpu::WarpDistributionPattern {
SmallVector<Type> offsetTypes = llvm::map_to_vector(
offsetsAsValues, [](Value v) { return v.getType(); });
xegpu::TensorDescType tensorDescTy = storeOp.getTensorDescType();
- xegpu::LayoutAttr layout = tensorDescTy.getLayoutAttr();
+ xegpu::DistributeLayoutAttr layout = tensorDescTy.getLayoutAttr();
if (!layout)
return rewriter.notifyMatchFailure(
storeOp, "the source tensor descriptor lacks layout attribute");
@@ -474,7 +474,7 @@ struct LoadNdDistribution final : public gpu::WarpDistributionPattern {
offsetsAsValues, [](Value v) { return v.getType(); });
xegpu::TensorDescType tensorDescTy = loadOp.getTensorDescType();
- xegpu::LayoutAttr layout = tensorDescTy.getLayoutAttr();
+ xegpu::DistributeLayoutAttr layout = tensorDescTy.getLayoutAttr();
if (!layout)
return rewriter.notifyMatchFailure(
loadOp, "the source tensor descriptor lacks layout attribute");
@@ -709,7 +709,8 @@ struct PrefetchNdDistribution final : public gpu::WarpDistributionPattern {
SmallVector<Type> offsetTypes = llvm::map_to_vector(
offsetsAsValues, [](Value v) { return v.getType(); });
- xegpu::LayoutAttr layout = prefetchOp.getTensorDescType().getLayoutAttr();
+ xegpu::DistributeLayoutAttr layout =
+ prefetchOp.getTensorDescType().getLayoutAttr();
if (!layout)
return rewriter.notifyMatchFailure(
prefetchOp, "the source tensor descriptor lacks layout attribute");
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index a5b1df0f93f57..a095c19d66c15 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -1644,7 +1644,7 @@ void XeGPUWgToSgDistributePass::runOnOperation() {
converter.addConversion(
[&](xegpu::TensorDescType type,
SmallVectorImpl<Type> &result) -> std::optional<LogicalResult> {
- xegpu::LayoutAttr layout = type.getLayoutAttr();
+ xegpu::DistributeLayoutAttr layout = type.getLayoutAttr();
// Only convert WG-level tensor descs. SG-level or layout-less types
// are already legal and should pass through unchanged.
if (!layout || !layout.isForWorkgroup())
diff --git a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
index 243581b4ce522..f0508a30621f2 100644
--- a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
+++ b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
@@ -879,7 +879,7 @@ template int
xegpu::getLargestDivisor<unsigned>(unsigned dim, ArrayRef<unsigned> candidates,
ArrayRef<unsigned> candidateMultiples);
-bool xegpu::requirePacked(const xegpu::LayoutAttr layout) {
+bool xegpu::requirePacked(const xegpu::DistributeLayoutAttr layout) {
if (!layout)
return false;
auto laneData = layout.getEffectiveLaneDataAsInt();
@@ -888,7 +888,7 @@ bool xegpu::requirePacked(const xegpu::LayoutAttr layout) {
return laneData[0] != 1;
}
-bool xegpu::requireTranspose(const xegpu::LayoutAttr layout,
+bool xegpu::requireTranspose(const xegpu::DistributeLayoutAttr layout,
const xegpu::uArch::uArch *uArch) {
// Return false for unsupported targets.
// TODO: Add more support or move to target info.
diff --git a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
index 9ca424374335f..61b8046bd04e5 100644
--- a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
+++ b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
@@ -257,7 +257,7 @@ gpu.module @test_kernel {
// -----
#l = #xegpu.layout<inst_data = [16, 16]>
-#r = #xegpu.layout<inst_data = [16]>
+#r = #xegpu.slice<#xegpu.layout<inst_data = [16, 16]>, dims = [0]>
gpu.module @test_kernel {
gpu.func @reduce_dim_0(%a: memref<16x512xf32>, %b: memref<512xf32>) kernel attributes {VectorComputeFunctionINTEL, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
%acc = arith.constant {layout_result_0 = #r} dense<0.0> : vector<64xf32>
@@ -277,7 +277,7 @@ gpu.module @test_kernel {
// -----
#l = #xegpu.layout<inst_data = [16, 16]>
-#r = #xegpu.layout<inst_data = [16]>
+#r = #xegpu.slice<#xegpu.layout<inst_data = [16, 16]>, dims = [1]>
gpu.module @test_kernel {
gpu.func @reduce_dim_1(%a: memref<512x32xf32>, %b: memref<512xf32>) kernel attributes {VectorComputeFunctionINTEL, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
%c1 = arith.constant 1 : index
diff --git a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
index 0d10ab7c74da6..4760016bdcea4 100644
--- a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
+++ b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
@@ -106,10 +106,9 @@ struct TestXeGPUUnrollingPatterns
}
if (auto layout = tdescTy.getLayoutAttr()) {
- auto inst_data = layout.getInstData();
- if (inst_data && layout.isForSubgroup())
- return SmallVector<int64_t>(inst_data.asArrayRef().begin(),
- inst_data.asArrayRef().end());
+ auto inst_data = layout.getEffectiveInstDataAsInt();
+ if (!inst_data.empty() && layout.isForSubgroup())
+ return SmallVector<int64_t>(inst_data.begin(), inst_data.end());
}
}
@@ -138,9 +137,9 @@ struct TestXeGPUUnrollingPatterns
if (chunkSize > 1) {
int64_t blockedChunkSize = chunkSize;
- auto instData = layout.getInstData();
+ auto instData = layout.getEffectiveInstDataAsInt();
if (!instData.empty())
- blockedChunkSize = instData.asArrayRef().back();
+ blockedChunkSize = instData.back();
// To create a new attribute with a
diff erent chunk_size:
auto newEncoding = xegpu::ScatterTensorDescAttr::get(
@@ -150,7 +149,7 @@ struct TestXeGPUUnrollingPatterns
}
}
if (layout) {
- if (layout.getLaneLayout() == nullptr)
+ if (layout.getEffectiveLaneLayoutAsInt().empty())
layout = xegpu::LayoutAttr();
else
layout = layout.dropInstData();
More information about the Mlir-commits
mailing list