[Mlir-commits] [mlir] [mlir][XeGPU] Update utils for LayoutAttr and SliceAttr support (PR #154819)
Chao Chen
llvmlistbot at llvm.org
Wed Aug 27 09:57:35 PDT 2025
https://github.com/chencha3 updated https://github.com/llvm/llvm-project/pull/154819
>From ad5d0a88a4f065dc3720d977c8e3d125c5b768b8 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Thu, 21 Aug 2025 17:58:25 +0000
Subject: [PATCH 1/7] rename getLayoutAttr util
---
.../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 66 +++++++++++++++++++
.../mlir/Dialect/XeGPU/IR/XeGPUDialect.td | 2 +-
.../mlir/Dialect/XeGPU/Utils/XeGPUUtils.h | 27 ++++----
mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 25 ++++---
.../XeGPU/Transforms/XeGPUBlocking.cpp | 16 ++---
.../Transforms/XeGPUSubgroupDistribute.cpp | 5 +-
.../Transforms/XeGPUWgToSgDistribute.cpp | 26 ++++----
mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp | 30 ++++-----
8 files changed, 132 insertions(+), 65 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index b4d696444cc44..5b4b376157c00 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -185,6 +185,9 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> {
InterfaceMethod<"Check the availability of workgroup level layouts",
"bool",
"isForWorkgroup">,
+ InterfaceMethod<"Check the availability of subgroup level layouts",
+ "bool",
+ "isForSubgroup">,
InterfaceMethod<"Get the rank of attribute",
"int64_t",
"getRank">,
@@ -202,6 +205,15 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> {
InterfaceMethod<"Get the SgData field of the attribute as integer array",
"std::optional<SmallVector<int64_t>>",
"getSgDataAsInt">,
+ InterfaceMethod<"Get the InstData field of the attribute as integer array",
+ "std::optional<SmallVector<int64_t>>",
+ "getInstDataAsInt">,
+ InterfaceMethod<"Get the LaneLayout field of the attribute as integer array",
+ "std::optional<SmallVector<int64_t>>",
+ "getLaneLayoutAsInt">,
+ InterfaceMethod<"Get the LaneData field of the attribute as integer array",
+ "std::optional<SmallVector<int64_t>>",
+ "getLaneDataAsInt">,
InterfaceMethod<"Derive a new layout by dropping sgLayout and sgData",
"xegpu::DistributeLayoutAttr",
"dropSgLayoutAndData">,
@@ -388,6 +400,24 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> {
return std::nullopt;
}
+ std::optional<SmallVector<int64_t>> getInstDataAsInt() const {
+ if (DenseI32ArrayAttr inst = getInstData())
+ return llvm::to_vector_of<int64_t>(inst.asArrayRef());
+ return std::nullopt;
+ }
+
+ std::optional<SmallVector<int64_t>> getLaneLayoutAsInt() const {
+ if (DenseI32ArrayAttr layout = getLaneLayout())
+ return llvm::to_vector_of<int64_t>(layout.asArrayRef());
+ return std::nullopt;
+ }
+
+ std::optional<SmallVector<int64_t>> getLaneDataAsInt() const {
+ if (DenseI32ArrayAttr data = getLaneData())
+ return llvm::to_vector_of<int64_t>(data.asArrayRef());
+ return std::nullopt;
+ }
+
/// Delinearizes a linear subgroup ID into its multidimensional indices
/// based on the effective subgroup layout.
FailureOr<SmallVector<Value>>
@@ -488,6 +518,42 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> {
return std::nullopt;
}
+ /// Returns the InstData of the attribute, computed by applying
+ /// the slice dimensions to the underlying LayoutAttr.
+ std::optional<SmallVector<int64_t>> getInstDataAsInt() const {
+ SliceAttr attr = flatten();
+ auto parent = dyn_cast<LayoutAttr>(attr.getParent());
+ if (auto inst = parent.getInstDataAsInt()) {
+ ArrayRef<int64_t> dims = attr.getDims().asArrayRef();
+ return XeGPUDialect::slice(llvm::ArrayRef<int64_t>(*inst), dims);
+ }
+ return std::nullopt;
+ }
+
+ /// Returns the LaneLayout of the attribute, computed by applying
+ /// the slice dimensions to the underlying LayoutAttr.
+ std::optional<SmallVector<int64_t>> getLaneLayoutAsInt() const {
+ SliceAttr attr = flatten();
+ auto parent = dyn_cast<LayoutAttr>(attr.getParent());
+ if (auto layout = parent.getLaneLayoutAsInt()) {
+ ArrayRef<int64_t> dims = attr.getDims().asArrayRef();
+ return XeGPUDialect::slice(llvm::ArrayRef<int64_t>(*layout), dims);
+ }
+ return std::nullopt;
+ }
+
+ /// Returns the LaneData of the attribute, computed by applying
+ /// the slice dimensions to the underlying LayoutAttr.
+ std::optional<SmallVector<int64_t>> getLaneDataAsInt() const {
+ SliceAttr attr = flatten();
+ auto parent = dyn_cast<LayoutAttr>(attr.getParent());
+ if (auto data = parent.getLaneDataAsInt()) {
+ ArrayRef<int64_t> dims = attr.getDims().asArrayRef();
+ return XeGPUDialect::slice(llvm::ArrayRef<int64_t>(*data), dims);
+ }
+ return std::nullopt;
+ }
+
SliceAttr dropSgLayoutAndData() {
SliceAttr attr = flatten();
auto parent = dyn_cast<LayoutAttr>(attr.getParent());
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
index 76d58e5ea2424..c173b93face98 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
@@ -40,7 +40,7 @@ def XeGPU_Dialect : Dialect {
let extraClassDeclaration = [{
/// Checks if the given shape can be evenly distributed based on the layout
/// and data factors provided by the LayoutAttr.
- static bool isEvenlyDistributable(llvm::ArrayRef<int64_t> shape, xegpu::LayoutAttr attr);
+ static bool isEvenlyDistributable(llvm::ArrayRef<int64_t> shape, xegpu::DistributeLayoutAttr attr);
/// drops/slices the shape in the specified dims, and return the rest. e.g.,
/// for shape = [32, 64, 8], dims = [0, 2], it will return [64]
diff --git a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
index b2b2d3ab85231..010199083add9 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
+++ b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
@@ -21,6 +21,7 @@ class ValueRange;
class TypeConverter;
namespace xegpu {
+class DistributeLayoutAttr;
class LayoutAttr;
class TensorDescType;
} // namespace xegpu
@@ -60,22 +61,22 @@ FailureOr<VectorType> getDistributedVectorType(xegpu::TensorDescType tdescTy);
FailureOr<VectorType> getDistributedVectorType(VectorType originalType,
LayoutAttr layout);
-/// Return the attribute name for the OpOperand to attach LayoutAttr
+/// Return the attribute name for the OpOperand to attach DistributeLayoutAttr
std::string getLayoutName(const OpOperand &operand);
-/// Return the attribute name for the OpResult to attach LayoutAttr
+/// Return the attribute name for the OpResult to attach DistributeLayoutAttr
std::string getLayoutName(const OpResult result);
-/// Retrieves the LayoutAttr associated with a given Value. For TensorDescType
-/// values, the LayoutAttr is extracted from the TensorDescType itself. For
+/// Retrieves the DistributeLayoutAttr associated with a given Value. For TensorDescType
+/// values, the DistributeLayoutAttr is extracted from the TensorDescType itself. For
/// other values, it is obtained from the attributes of the defining operation.
-/// Returns nullptr if no LayoutAttr is found.
-LayoutAttr getLayoutAttr(const Value value);
+/// Returns nullptr if no DistributeLayoutAttr is found.
+DistributeLayoutAttr getDistributeLayoutAttr(const Value value);
-/// Retrieves the LayoutAttr associated with a given OpOperand. It will
+/// Retrieves the DistributeLayoutAttr associated with a given OpOperand. It will
/// first check the operand_layout_{id} of the owner operation. If not found,
/// it will check the operand itself and its defining op.
-LayoutAttr getLayoutAttr(const OpOperand &opr);
+DistributeLayoutAttr getDistributeLayoutAttr(const OpOperand &opr);
/// Removes the LayoutAttr for a given OpOperand or OpResult if it exists.
template <typename T,
@@ -83,23 +84,23 @@ template <typename T,
std::is_same_v<T, OpResult>>>
void removeLayoutAttr(const T &operandOrResult);
-/// Removes the LayoutAttr for each OpOperand and OpResult of the given
+/// Removes the DistributeLayoutAttr for each OpOperand and OpResult of the given
/// operation if they exist. If the operation contains regions, it is also
/// applied recursively to the contained operations
void removeLayoutAttrs(Operation *op);
-/// Sets the LayoutAttr for a given OpOperand or OpResult by attaching
+/// Sets the DistributeLayoutAttr for a given OpOperand or OpResult by attaching
/// it to the owner's dictionary attributes
template <typename T,
typename = std::enable_if_t<std::is_same_v<T, OpOperand> ||
std::is_same_v<T, OpResult>>>
-void setLayoutAttr(const T &operandOrResult, const LayoutAttr layout);
+void setLayoutAttr(const T &operandOrResult, const DistributeLayoutAttr layout);
-/// Set the LayoutAttr for each OpOperand and OpResult of the given operation.
+/// Set the DistributeLayoutAttr for each OpOperand and OpResult of the given operation.
/// If the operation contains regions, it is also applied recursively to the
/// contained operations
void setLayoutAttrs(Operation *op,
- function_ref<LayoutAttr(Value)> getLayoutImpl);
+ function_ref<DistributeLayoutAttr(Value)> getLayoutImpl);
/// Extract a set of small vectors from a value with a given shape using
/// vector.extract_stride_slice
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index a2d708be0e937..2079848c878a3 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -91,7 +91,7 @@ genOffsetsComputingInsts(OpBuilder &builder, Location loc,
// Checks if the given shape can be evenly distributed based on the layout
// and data factors provided by the LayoutAttr.
bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef<int64_t> shape,
- xegpu::LayoutAttr attr) {
+ xegpu::DistributeLayoutAttr attr) {
assert(attr && "Layout attribute is missing.");
// Checks whether the given shape can be evenly distributed using the
@@ -104,52 +104,51 @@ bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef<int64_t> shape,
// smaller than `layout[i] * data[i]`, allowing multiple compute units to
// share the data.
auto tryDistribute = [&](llvm::ArrayRef<int64_t> shape,
- DenseI32ArrayAttr layout, DenseI32ArrayAttr data,
+ std::optional<SmallVector<int64_t>> layout,
+ std::optional<SmallVector<int64_t>> data,
bool rr = true) -> optional<SmallVector<int64_t>> {
llvm::SmallVector<int64_t> newShape(shape);
if (layout) {
- auto vec = llvm::to_vector_of<int64_t>(layout.asArrayRef());
- if (vec.size() != shape.size())
+ if ((*layout).size() != shape.size())
return std::nullopt;
- auto ratio = computeShapeRatio(shape, vec);
+ auto ratio = computeShapeRatio(shape, *layout);
if (!ratio.has_value())
return std::nullopt;
newShape = ratio.value();
}
if (data) {
- auto vec = llvm::to_vector_of<int64_t>(data.asArrayRef());
- if (vec.size() != shape.size())
+ if ((*data).size() != shape.size())
return std::nullopt;
- auto ratio = computeShapeRatio(newShape, vec);
+ auto ratio = computeShapeRatio(newShape, *data);
if (!ratio.has_value() && rr)
- ratio = computeShapeRatio(vec, newShape);
+ ratio = computeShapeRatio(*data, newShape);
if (!ratio.has_value())
return std::nullopt;
// if data is not null, we always return it for next phase.
- newShape = vec;
+ newShape = *data;
}
return newShape;
};
// check the sgLayout and sgData
auto maybeSgShape =
- tryDistribute(shape, attr.getSgLayout(), attr.getSgData());
+ tryDistribute(shape, attr.getSgLayoutAsInt(), attr.getSgDataAsInt());
if (!maybeSgShape)
return false;
auto sgShape = maybeSgShape.value();
// check InstData, it neither have layout nor need round-robin
auto maybeInstShape =
- tryDistribute(sgShape, nullptr, attr.getInstData(), false);
+ tryDistribute(sgShape, std::nullopt, attr.getInstDataAsInt(), false);
if (!maybeInstShape)
return false;
auto instShape = maybeInstShape.value();
// check LaneLayout and LaneData
auto maybeLaneShape =
- tryDistribute(instShape, attr.getLaneLayout(), attr.getLaneData(), false);
+ tryDistribute(instShape, attr.getLaneLayoutAsInt(), attr.getLaneDataAsInt(), false);
return maybeLaneShape.has_value();
}
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
index b3144e4c1e55d..c62597df1f895 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
@@ -140,10 +140,10 @@ XeGPUBlockingPass::getTileShape(const T &operandOrResult) const {
else
value = (Value)operandOrResult;
- xegpu::LayoutAttr layout = xegpu::getLayoutAttr(operandOrResult);
+ xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(operandOrResult);
if (layout && layout.isForSubgroup()) {
- if (auto inst_data = layout.getInstData())
- return llvm::to_vector_of<int64_t>(inst_data.asArrayRef());
+ if (auto inst_data = layout.getInstDataAsInt())
+ return inst_data.value();
if (auto type = dyn_cast<ShapedType>(value.getType()))
return llvm::to_vector(type.getShape());
@@ -204,12 +204,12 @@ bool XeGPUBlockingPass::needsUnroll(Operation *op) const {
// skip the op if any of its operands or results has workgroup level layouts
bool hasWgLayoutOperands =
llvm::any_of(op->getOpOperands(), [](OpOperand &opr) {
- xegpu::LayoutAttr layout = xegpu::getLayoutAttr(opr);
+ xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(opr);
return layout && layout.isForWorkgroup();
});
bool hasWgLayoutResults =
llvm::any_of(op->getOpResults(), [](OpResult result) {
- xegpu::LayoutAttr layout = xegpu::getLayoutAttr(result);
+ xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(result);
return layout && layout.isForWorkgroup();
});
if (hasWgLayoutOperands || hasWgLayoutResults) {
@@ -220,8 +220,8 @@ bool XeGPUBlockingPass::needsUnroll(Operation *op) const {
auto isUnrollable = [](Value value, ArrayRef<int64_t> tileShape) {
Type valTy = value.getType();
if (auto tdescTy = dyn_cast<xegpu::TensorDescType>(valTy)) {
- xegpu::LayoutAttr layout = tdescTy.getLayoutAttr();
- return layout && layout.getInstData();
+ xegpu::DistributeLayoutAttr layout = tdescTy.getLayoutAttr();
+ return layout && layout.getInstDataAsInt();
}
auto shapedType = dyn_cast<ShapedType>(valTy);
return shapedType && !llvm::equal(tileShape, shapedType.getShape());
@@ -247,7 +247,7 @@ void XeGPUBlockingPass::runOnOperation() {
// Preserve the LayoutAttr for each operand to the owner's DictionaryAttr.
// This ensures that the LayoutAttr remains accessible even if the defining
// operation is replaced.
- xegpu::setLayoutAttrs(op, [](Value v) { return xegpu::getLayoutAttr(v); });
+ xegpu::setLayoutAttrs(op, [](Value v) { return xegpu::getDistributeLayoutAttr(v); });
auto getTileShapeAndCount = [](llvm::ArrayRef<int64_t> shape,
xegpu::LayoutAttr layout) {
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
index 2088c3c7fc5ec..de9378bd7a6f6 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
@@ -841,7 +841,7 @@ void XeGPUSubgroupDistributePass::runOnOperation() {
if (!isa<VectorType>(operand.get().getType()))
continue;
- xegpu::LayoutAttr layout = xegpu::getLayoutAttr(operand);
+ auto layout = dyn_cast<xegpu::LayoutAttr>(xegpu::getDistributeLayoutAttr(operand));
if (!layout) {
op->emitError("Could not find layout attribute for operand ")
<< operand.getOperandNumber() << " of operation " << op->getName();
@@ -882,7 +882,8 @@ void XeGPUSubgroupDistributePass::runOnOperation() {
if (vecRank == 0)
return AffineMap::get(val.getContext());
// Get the layout of the vector type.
- xegpu::LayoutAttr layout = xegpu::getLayoutAttr(val);
+ // TODO: support more layout types
+ auto layout = dyn_cast<xegpu::LayoutAttr>(xegpu::getDistributeLayoutAttr(val));
// If no layout is specified, assume the inner most dimension is distributed
// for now.
if (!layout)
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index 93b4efcd125ec..c60f9e361bf8e 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -406,7 +406,7 @@ struct WgToSgDpasOp : public OpConversionPattern<xegpu::DpasOp> {
if (resultTy.getRank() != 2)
return failure();
- auto originalLayout = xegpu::getLayoutAttr(op.getResult());
+ auto originalLayout = xegpu::getDistributeLayoutAttr(op.getResult());
if (!originalLayout)
return failure();
@@ -470,8 +470,8 @@ struct WgToSgVectorBroadcastOp
VectorType resultType = op.getResult().getType();
ArrayRef<int64_t> wgShape = resultType.getShape();
- xegpu::LayoutAttr layout = xegpu::getLayoutAttr(op.getResult());
- if (!layout || !layout.getSgLayout())
+ xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op.getResult());
+ if (!layout || !layout.isForWorkgroup())
return failure();
// TODO: Currently only supports cases where the source and result ranks
@@ -487,8 +487,8 @@ struct WgToSgVectorBroadcastOp
// Check if the output layout is distributable
SmallVector<int64_t> sgLayout;
- if (auto sgLayoutAttr = layout.getSgLayout())
- sgLayout = llvm::to_vector_of<int64_t>(sgLayoutAttr.asArrayRef());
+ if (auto maybeSgLayout = layout.getSgLayoutAsInt())
+ sgLayout = *maybeSgLayout;
else
return failure();
@@ -535,8 +535,8 @@ struct WgToSgElementwiseOp : public ConversionPattern {
ArrayRef<int64_t> wgShape = resultType.getShape();
- xegpu::LayoutAttr layout = xegpu::getLayoutAttr(op->getResult(0));
- if (!layout || !layout.getSgLayout())
+ xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op->getResult(0));
+ if (!layout || !layout.isForWorkgroup())
return failure();
SmallVector<int64_t> sgShape = getSgShapeAndCount(wgShape, layout).first;
@@ -737,8 +737,8 @@ struct WgToSgArithConstantOp : public OpConversionPattern<arith::ConstantOp> {
if (!vecAttr || !vecAttr.isSplat() || !vecType)
return failure();
- xegpu::LayoutAttr layout = xegpu::getLayoutAttr(op.getResult());
- if (!layout || !layout.getSgLayout())
+ xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op.getResult());
+ if (!layout || !layout.isForWorkgroup())
return failure();
ArrayRef<int64_t> wgShape = vecType.getShape();
@@ -928,7 +928,7 @@ void XeGPUWgToSgDistributePass::runOnOperation() {
});
target.addDynamicallyLegalOp<xegpu::DpasOp>([=](xegpu::DpasOp op) -> bool {
- auto layout = xegpu::getLayoutAttr(op.getResult());
+ auto layout = xegpu::getDistributeLayoutAttr(op.getResult());
return isLegal(layout);
});
@@ -947,12 +947,12 @@ void XeGPUWgToSgDistributePass::runOnOperation() {
auto vecType = dyn_cast<VectorType>(op.getType());
if (!vecType)
return true;
- return isLegal(xegpu::getLayoutAttr(op.getResult()));
+ return isLegal(xegpu::getDistributeLayoutAttr(op.getResult()));
});
target.addDynamicallyLegalOp<vector::BroadcastOp>(
[=](vector::BroadcastOp op) -> bool {
- return isLegal(xegpu::getLayoutAttr(op.getResult()));
+ return isLegal(xegpu::getDistributeLayoutAttr(op.getResult()));
});
target.addDynamicallyLegalOp<xegpu::ConvertLayoutOp>(
@@ -980,7 +980,7 @@ void XeGPUWgToSgDistributePass::runOnOperation() {
}
}
- xegpu::LayoutAttr layout = xegpu::getLayoutAttr(op->getResult(0));
+ xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op->getResult(0));
return isLegal(layout);
});
diff --git a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
index 6835f64ad8ef7..5ae025ef34739 100644
--- a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
+++ b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
@@ -114,7 +114,7 @@ std::string xegpu::getLayoutName(const OpResult result) {
return llvm::formatv("{0}{1}", prefix, result.getResultNumber()).str();
}
-xegpu::LayoutAttr xegpu::getLayoutAttr(const Value value) {
+xegpu::DistributeLayoutAttr xegpu::getDistributeLayoutAttr(const Value value) {
if (!value)
return nullptr;
@@ -132,11 +132,11 @@ xegpu::LayoutAttr xegpu::getLayoutAttr(const Value value) {
// for LoadNdOp, the layout is stored in the tensor descriptor
if (auto loadNd = dyn_cast<xegpu::LoadNdOp>(defOp))
- return getLayoutAttr(loadNd.getTensorDesc());
+ return getDistributeLayoutAttr(loadNd.getTensorDesc());
std::string layoutName = getLayoutName(result);
if (defOp->hasAttr(layoutName))
- return defOp->getAttrOfType<xegpu::LayoutAttr>(layoutName);
+ return defOp->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
}
if (auto arg = dyn_cast<BlockArgument>(value)) {
@@ -144,41 +144,41 @@ xegpu::LayoutAttr xegpu::getLayoutAttr(const Value value) {
if (auto loop = dyn_cast<LoopLikeOpInterface>(parentOp)) {
OpOperand *tiedInit = loop.getTiedLoopInit(arg);
if (tiedInit)
- return getLayoutAttr(tiedInit->get());
+ return getDistributeLayoutAttr(tiedInit->get());
}
}
return nullptr;
}
-xegpu::LayoutAttr xegpu::getLayoutAttr(const OpOperand &opr) {
+xegpu::DistributeLayoutAttr xegpu::getDistributeLayoutAttr(const OpOperand &opr) {
Operation *op = opr.getOwner();
std::string layoutName = xegpu::getLayoutName(opr);
if (op->hasAttr(layoutName))
- return op->getAttrOfType<xegpu::LayoutAttr>(layoutName);
- return getLayoutAttr(opr.get());
+ return op->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
+ return getDistributeLayoutAttr(opr.get());
}
template <typename T, typename>
-void xegpu::setLayoutAttr(const T &operandOrResult, const LayoutAttr layout) {
+void xegpu::setLayoutAttr(const T &operandOrResult, const DistributeLayoutAttr layout) {
Operation *owner = operandOrResult.getOwner();
std::string name = xegpu::getLayoutName(operandOrResult);
- if (layout && !owner->hasAttrOfType<LayoutAttr>(name))
+ if (layout && !owner->hasAttrOfType<DistributeLayoutAttr>(name))
owner->setAttr(name, layout);
}
// Explicit instantiation for OpResult
template void
xegpu::setLayoutAttr<mlir::OpResult>(const mlir::OpResult &result,
- const mlir::xegpu::LayoutAttr layout);
+ const mlir::xegpu::DistributeLayoutAttr layout);
// Explicit instantiation for OpOperand
template void
xegpu::setLayoutAttr<mlir::OpOperand>(const mlir::OpOperand &operand,
- const mlir::xegpu::LayoutAttr layout);
+ const mlir::xegpu::DistributeLayoutAttr layout);
void xegpu::setLayoutAttrs(Operation *op,
- function_ref<LayoutAttr(Value)> getLayoutImpl) {
+ function_ref<DistributeLayoutAttr(Value)> getLayoutImpl) {
op->walk([&](Operation *nestOp) {
for (OpOperand &opr : nestOp->getOpOperands()) {
auto layout = getLayoutImpl(opr.get());
@@ -195,7 +195,7 @@ template <typename T, typename>
void xegpu::removeLayoutAttr(const T &operandOrResult) {
Operation *owner = operandOrResult.getOwner();
std::string name = xegpu::getLayoutName(operandOrResult);
- if (owner->hasAttrOfType<LayoutAttr>(name))
+ if (owner->hasAttrOfType<DistributeLayoutAttr>(name))
owner->removeAttr(name);
}
@@ -306,7 +306,7 @@ void xegpu::doSCFStructuralTypeConversionWithTensorType(
if (!inputTy || !resultTy)
return WalkResult::skip();
- xegpu::LayoutAttr layout = xegpu::getLayoutAttr(input);
+ xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(input);
if (!layout)
return WalkResult::skip();
@@ -344,7 +344,7 @@ void xegpu::doSCFStructuralTypeConversionWithTensorType(
}
{ // perform the conversion from RankedTensorType to VectorType based on the
- // LayoutAttr
+ // DistributeLayoutAttr
// Handle the UnrealizedConversionCastOp introduced by the first step.
// For vector->RankedTensorType, it will simply forward the inputs.
>From 0e34f36690a34f071afd181649b8f86c90dde9b4 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Thu, 21 Aug 2025 18:10:49 +0000
Subject: [PATCH 2/7] refine
---
.../mlir/Dialect/XeGPU/Utils/XeGPUUtils.h | 17 +++++++++++---
.../XeGPU/Transforms/XeGPUBlocking.cpp | 5 ++--
.../XeGPU/Transforms/XeGPUPropagateLayout.cpp | 4 ++--
.../Transforms/XeGPUSubgroupDistribute.cpp | 7 +++---
.../Transforms/XeGPUWgToSgDistribute.cpp | 10 ++++----
mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp | 23 ++++++++++---------
6 files changed, 40 insertions(+), 26 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
index 010199083add9..7089559d0c51b 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
+++ b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
@@ -73,11 +73,21 @@ std::string getLayoutName(const OpResult result);
/// Returns nullptr if no DistributeLayoutAttr is found.
DistributeLayoutAttr getDistributeLayoutAttr(const Value value);
+template <typename AttrTy>
+AttrTy getDistributeLayoutAttrOfType(const Value value) {
+ return dyn_cast_if_present<AttrTy>(getDistributeLayoutAttr(value));
+}
+
/// Retrieves the DistributeLayoutAttr associated with a given OpOperand. It will
/// first check the operand_layout_{id} of the owner operation. If not found,
/// it will check the operand itself and its defining op.
DistributeLayoutAttr getDistributeLayoutAttr(const OpOperand &opr);
+template <typename AttrTy>
+AttrTy getDistributeLayoutAttrOfType(const OpOperand &opr) {
+ return dyn_cast_if_present<AttrTy>(getDistributeLayoutAttr(opr));
+}
+
/// Removes the LayoutAttr for a given OpOperand or OpResult if it exists.
template <typename T,
typename = std::enable_if_t<std::is_same_v<T, OpOperand> ||
@@ -94,13 +104,14 @@ void removeLayoutAttrs(Operation *op);
template <typename T,
typename = std::enable_if_t<std::is_same_v<T, OpOperand> ||
std::is_same_v<T, OpResult>>>
-void setLayoutAttr(const T &operandOrResult, const DistributeLayoutAttr layout);
+void setDistributeLayoutAttr(const T &operandOrResult,
+ const DistributeLayoutAttr layout);
/// Set the DistributeLayoutAttr for each OpOperand and OpResult of the given operation.
/// If the operation contains regions, it is also applied recursively to the
/// contained operations
-void setLayoutAttrs(Operation *op,
- function_ref<DistributeLayoutAttr(Value)> getLayoutImpl);
+void setDistributeLayoutAttrs(
+ Operation *op, function_ref<DistributeLayoutAttr(Value)> getLayoutImpl);
/// Extract a set of small vectors from a value with a given shape using
/// vector.extract_stride_slice
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
index c62597df1f895..2e3e40ed2d457 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
@@ -247,7 +247,8 @@ void XeGPUBlockingPass::runOnOperation() {
// Preserve the LayoutAttr for each operand to the owner's DictionaryAttr.
// This ensures that the LayoutAttr remains accessible even if the defining
// operation is replaced.
- xegpu::setLayoutAttrs(op, [](Value v) { return xegpu::getDistributeLayoutAttr(v); });
+ xegpu::setDistributeLayoutAttrs(
+ op, [](Value v) { return xegpu::getDistributeLayoutAttr(v); });
auto getTileShapeAndCount = [](llvm::ArrayRef<int64_t> shape,
xegpu::LayoutAttr layout) {
@@ -377,7 +378,7 @@ void XeGPUBlockingPass::runOnOperation() {
if (auto layout = op->getAttrOfType<xegpu::LayoutAttr>(name)) {
op->removeAttr(name);
if (!isa<LoopLikeOpInterface>(op))
- xegpu::setLayoutAttr(result, layout.dropInstData());
+ xegpu::setDistributeLayoutAttr(result, layout.dropInstData());
}
}
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
index bef88042fc663..5cb47b2accd68 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
@@ -718,7 +718,7 @@ static LogicalResult updateOp(mlir::OpBuilder &builder, mlir::Operation *op,
}
// If the result is a vector type, add a temporary layout attribute to the
// op.
- xegpu::setLayoutAttr(result, layout);
+ xegpu::setDistributeLayoutAttr(result, layout);
}
return success();
}
@@ -800,7 +800,7 @@ updateControlFlowOps(mlir::OpBuilder &builder,
// If the type is a vector type and this region argument is an OpResult,
// set the layout attribute on the OpResult.
if (auto result = dyn_cast<OpResult>(successorInput))
- xegpu::setLayoutAttr(result, successorOperandLayout);
+ xegpu::setDistributeLayoutAttr(result, successorOperandLayout);
}
}
return success();
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
index de9378bd7a6f6..e48e2180197ec 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
@@ -841,14 +841,15 @@ void XeGPUSubgroupDistributePass::runOnOperation() {
if (!isa<VectorType>(operand.get().getType()))
continue;
- auto layout = dyn_cast<xegpu::LayoutAttr>(xegpu::getDistributeLayoutAttr(operand));
+ auto layout =
+ xegpu::getDistributeLayoutAttrOfType<xegpu::LayoutAttr>(operand);
if (!layout) {
op->emitError("Could not find layout attribute for operand ")
<< operand.getOperandNumber() << " of operation " << op->getName();
signalPassFailure();
return;
}
- xegpu::setLayoutAttr(operand, layout);
+ xegpu::setDistributeLayoutAttr(operand, layout);
}
});
// Step 2: Move all operations of a GPU function inside
@@ -883,7 +884,7 @@ void XeGPUSubgroupDistributePass::runOnOperation() {
return AffineMap::get(val.getContext());
// Get the layout of the vector type.
// TODO: support more layout types
- auto layout = dyn_cast<xegpu::LayoutAttr>(xegpu::getDistributeLayoutAttr(val));
+ auto layout = xegpu::getDistributeLayoutAttrOfType<xegpu::LayoutAttr>(val);
// If no layout is specified, assume the inner most dimension is distributed
// for now.
if (!layout)
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index c60f9e361bf8e..a8700ca73efc4 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -429,8 +429,8 @@ struct WgToSgDpasOp : public OpConversionPattern<xegpu::DpasOp> {
VectorType resTy = VectorType::get({aVecShape[0], bVecShape[1]},
resultTy.getElementType());
tmpC = xegpu::DpasOp::create(rewriter, loc, resTy, operands);
- xegpu::setLayoutAttr(cast<OpResult>(tmpC),
- originalLayout.dropSgLayoutAndData());
+ xegpu::setDistributeLayoutAttr(cast<OpResult>(tmpC),
+ originalLayout.dropSgLayoutAndData());
newDpasOps.push_back(tmpC);
}
@@ -508,8 +508,8 @@ struct WgToSgVectorBroadcastOp
for (auto operand : adaptor.getOperands().front()) {
auto newBroadcast = vector::BroadcastOp::create(rewriter, op.getLoc(),
newResultType, operand);
- xegpu::setLayoutAttr(newBroadcast->getResult(0),
- layout.dropSgLayoutAndData());
+ xegpu::setDistributeLayoutAttr(newBroadcast->getResult(0),
+ layout.dropSgLayoutAndData());
newBroadcastOps.push_back(newBroadcast.getResult());
}
@@ -755,7 +755,7 @@ struct WgToSgArithConstantOp : public OpConversionPattern<arith::ConstantOp> {
auto cstOp =
arith::ConstantOp::create(rewriter, op.getLoc(), newType, sgAttr);
if (auto newLayout = layout.dropSgLayoutAndData())
- xegpu::setLayoutAttr(cstOp->getResult(0), newLayout);
+ xegpu::setDistributeLayoutAttr(cstOp->getResult(0), newLayout);
SmallVector<Value> newConsts(count, cstOp);
rewriter.replaceOpWithMultiple(op, {newConsts});
diff --git a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
index 5ae025ef34739..1d4de68754c20 100644
--- a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
+++ b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
@@ -160,7 +160,8 @@ xegpu::DistributeLayoutAttr xegpu::getDistributeLayoutAttr(const OpOperand &opr)
}
template <typename T, typename>
-void xegpu::setLayoutAttr(const T &operandOrResult, const DistributeLayoutAttr layout) {
+void xegpu::setDistributeLayoutAttr(const T &operandOrResult,
+ const DistributeLayoutAttr layout) {
Operation *owner = operandOrResult.getOwner();
std::string name = xegpu::getLayoutName(operandOrResult);
if (layout && !owner->hasAttrOfType<DistributeLayoutAttr>(name))
@@ -168,25 +169,25 @@ void xegpu::setLayoutAttr(const T &operandOrResult, const DistributeLayoutAttr l
}
// Explicit instantiation for OpResult
-template void
-xegpu::setLayoutAttr<mlir::OpResult>(const mlir::OpResult &result,
- const mlir::xegpu::DistributeLayoutAttr layout);
+template void xegpu::setDistributeLayoutAttr<mlir::OpResult>(
+ const mlir::OpResult &result,
+ const mlir::xegpu::DistributeLayoutAttr layout);
// Explicit instantiation for OpOperand
-template void
-xegpu::setLayoutAttr<mlir::OpOperand>(const mlir::OpOperand &operand,
- const mlir::xegpu::DistributeLayoutAttr layout);
+template void xegpu::setDistributeLayoutAttr<mlir::OpOperand>(
+ const mlir::OpOperand &operand,
+ const mlir::xegpu::DistributeLayoutAttr layout);
-void xegpu::setLayoutAttrs(Operation *op,
- function_ref<DistributeLayoutAttr(Value)> getLayoutImpl) {
+void xegpu::setDistributeLayoutAttrs(
+ Operation *op, function_ref<DistributeLayoutAttr(Value)> getLayoutImpl) {
op->walk([&](Operation *nestOp) {
for (OpOperand &opr : nestOp->getOpOperands()) {
auto layout = getLayoutImpl(opr.get());
- setLayoutAttr(opr, layout);
+ setDistributeLayoutAttr(opr, layout);
}
for (OpResult result : nestOp->getOpResults()) {
auto layout = getLayoutImpl(result);
- setLayoutAttr(result, layout);
+ setDistributeLayoutAttr(result, layout);
}
});
}
>From a84014ff42002dc5b036558c62e5387536e74019 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Thu, 21 Aug 2025 18:12:17 +0000
Subject: [PATCH 3/7] format
---
.../mlir/Dialect/XeGPU/Utils/XeGPUUtils.h | 25 ++++++++++---------
mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 4 +--
.../XeGPU/Transforms/XeGPUBlocking.cpp | 9 ++++---
.../Transforms/XeGPUWgToSgDistribute.cpp | 12 ++++++---
mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp | 6 +++--
5 files changed, 33 insertions(+), 23 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
index 7089559d0c51b..82fd70571c022 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
+++ b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
@@ -67,10 +67,11 @@ std::string getLayoutName(const OpOperand &operand);
/// Return the attribute name for the OpResult to attach DistributeLayoutAttr
std::string getLayoutName(const OpResult result);
-/// Retrieves the DistributeLayoutAttr associated with a given Value. For TensorDescType
-/// values, the DistributeLayoutAttr is extracted from the TensorDescType itself. For
-/// other values, it is obtained from the attributes of the defining operation.
-/// Returns nullptr if no DistributeLayoutAttr is found.
+/// Retrieves the DistributeLayoutAttr associated with a given Value. For
+/// TensorDescType values, the DistributeLayoutAttr is extracted from the
+/// TensorDescType itself. For other values, it is obtained from the attributes
+/// of the defining operation. Returns nullptr if no DistributeLayoutAttr is
+/// found.
DistributeLayoutAttr getDistributeLayoutAttr(const Value value);
template <typename AttrTy>
@@ -78,9 +79,9 @@ AttrTy getDistributeLayoutAttrOfType(const Value value) {
return dyn_cast_if_present<AttrTy>(getDistributeLayoutAttr(value));
}
-/// Retrieves the DistributeLayoutAttr associated with a given OpOperand. It will
-/// first check the operand_layout_{id} of the owner operation. If not found,
-/// it will check the operand itself and its defining op.
+/// Retrieves the DistributeLayoutAttr associated with a given OpOperand. It
+/// will first check the operand_layout_{id} of the owner operation. If not
+/// found, it will check the operand itself and its defining op.
DistributeLayoutAttr getDistributeLayoutAttr(const OpOperand &opr);
template <typename AttrTy>
@@ -94,8 +95,8 @@ template <typename T,
std::is_same_v<T, OpResult>>>
void removeLayoutAttr(const T &operandOrResult);
-/// Removes the DistributeLayoutAttr for each OpOperand and OpResult of the given
-/// operation if they exist. If the operation contains regions, it is also
+/// Removes the DistributeLayoutAttr for each OpOperand and OpResult of the
+/// given operation if they exist. If the operation contains regions, it is also
/// applied recursively to the contained operations
void removeLayoutAttrs(Operation *op);
@@ -107,9 +108,9 @@ template <typename T,
void setDistributeLayoutAttr(const T &operandOrResult,
const DistributeLayoutAttr layout);
-/// Set the DistributeLayoutAttr for each OpOperand and OpResult of the given operation.
-/// If the operation contains regions, it is also applied recursively to the
-/// contained operations
+/// Set the DistributeLayoutAttr for each OpOperand and OpResult of the given
+/// operation. If the operation contains regions, it is also applied recursively
+/// to the contained operations
void setDistributeLayoutAttrs(
Operation *op, function_ref<DistributeLayoutAttr(Value)> getLayoutImpl);
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 2079848c878a3..6de6049facfc6 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -147,8 +147,8 @@ bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef<int64_t> shape,
auto instShape = maybeInstShape.value();
// check LaneLayout and LaneData
- auto maybeLaneShape =
- tryDistribute(instShape, attr.getLaneLayoutAsInt(), attr.getLaneDataAsInt(), false);
+ auto maybeLaneShape = tryDistribute(instShape, attr.getLaneLayoutAsInt(),
+ attr.getLaneDataAsInt(), false);
return maybeLaneShape.has_value();
}
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
index 2e3e40ed2d457..45fed8e548a89 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
@@ -140,7 +140,8 @@ XeGPUBlockingPass::getTileShape(const T &operandOrResult) const {
else
value = (Value)operandOrResult;
- xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(operandOrResult);
+ xegpu::DistributeLayoutAttr layout =
+ xegpu::getDistributeLayoutAttr(operandOrResult);
if (layout && layout.isForSubgroup()) {
if (auto inst_data = layout.getInstDataAsInt())
return inst_data.value();
@@ -204,12 +205,14 @@ bool XeGPUBlockingPass::needsUnroll(Operation *op) const {
// skip the op if any of its operands or results has workgroup level layouts
bool hasWgLayoutOperands =
llvm::any_of(op->getOpOperands(), [](OpOperand &opr) {
- xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(opr);
+ xegpu::DistributeLayoutAttr layout =
+ xegpu::getDistributeLayoutAttr(opr);
return layout && layout.isForWorkgroup();
});
bool hasWgLayoutResults =
llvm::any_of(op->getOpResults(), [](OpResult result) {
- xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(result);
+ xegpu::DistributeLayoutAttr layout =
+ xegpu::getDistributeLayoutAttr(result);
return layout && layout.isForWorkgroup();
});
if (hasWgLayoutOperands || hasWgLayoutResults) {
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index a8700ca73efc4..518c7817a516e 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -470,7 +470,8 @@ struct WgToSgVectorBroadcastOp
VectorType resultType = op.getResult().getType();
ArrayRef<int64_t> wgShape = resultType.getShape();
- xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op.getResult());
+ xegpu::DistributeLayoutAttr layout =
+ xegpu::getDistributeLayoutAttr(op.getResult());
if (!layout || !layout.isForWorkgroup())
return failure();
@@ -535,7 +536,8 @@ struct WgToSgElementwiseOp : public ConversionPattern {
ArrayRef<int64_t> wgShape = resultType.getShape();
- xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op->getResult(0));
+ xegpu::DistributeLayoutAttr layout =
+ xegpu::getDistributeLayoutAttr(op->getResult(0));
if (!layout || !layout.isForWorkgroup())
return failure();
@@ -737,7 +739,8 @@ struct WgToSgArithConstantOp : public OpConversionPattern<arith::ConstantOp> {
if (!vecAttr || !vecAttr.isSplat() || !vecType)
return failure();
- xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op.getResult());
+ xegpu::DistributeLayoutAttr layout =
+ xegpu::getDistributeLayoutAttr(op.getResult());
if (!layout || !layout.isForWorkgroup())
return failure();
@@ -980,7 +983,8 @@ void XeGPUWgToSgDistributePass::runOnOperation() {
}
}
- xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(op->getResult(0));
+ xegpu::DistributeLayoutAttr layout =
+ xegpu::getDistributeLayoutAttr(op->getResult(0));
return isLegal(layout);
});
diff --git a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
index 1d4de68754c20..cac1ffe4d3bc3 100644
--- a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
+++ b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
@@ -151,7 +151,8 @@ xegpu::DistributeLayoutAttr xegpu::getDistributeLayoutAttr(const Value value) {
return nullptr;
}
-xegpu::DistributeLayoutAttr xegpu::getDistributeLayoutAttr(const OpOperand &opr) {
+xegpu::DistributeLayoutAttr
+xegpu::getDistributeLayoutAttr(const OpOperand &opr) {
Operation *op = opr.getOwner();
std::string layoutName = xegpu::getLayoutName(opr);
if (op->hasAttr(layoutName))
@@ -307,7 +308,8 @@ void xegpu::doSCFStructuralTypeConversionWithTensorType(
if (!inputTy || !resultTy)
return WalkResult::skip();
- xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(input);
+ xegpu::DistributeLayoutAttr layout =
+ xegpu::getDistributeLayoutAttr(input);
if (!layout)
return WalkResult::skip();
>From f3af2c307597bf13a04579b3235b45af7ea10392 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Thu, 21 Aug 2025 18:59:45 +0000
Subject: [PATCH 4/7] update convert_layout
---
mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 3 +++
mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 4 ++--
mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp | 6 +++---
mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp | 5 +++--
4 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index 5b4b376157c00..77e3c257f234e 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -217,6 +217,9 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> {
InterfaceMethod<"Derive a new layout by dropping sgLayout and sgData",
"xegpu::DistributeLayoutAttr",
"dropSgLayoutAndData">,
+ InterfaceMethod<"Derive a new layout by dropping InstData",
+ "xegpu::DistributeLayoutAttr",
+ "dropInstData">,
InterfaceMethod<[{Delinearizes a linear subgroup ID into its multidimensional
indices based on the effective subgroup layout.}],
"FailureOr<SmallVector<Value>>",
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index ab471a1f33ef9..2f6671c5e37cc 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -1162,8 +1162,8 @@ def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["sou
the IR is lowered to WI level because that is the end result of all distributions.
}];
let arguments = (ins XeGPU_VectorType: $source,
- XeGPU_LayoutAttr: $input_layout,
- XeGPU_LayoutAttr: $target_layout);
+ DistributeLayoutAttr: $input_layout,
+ DistributeLayoutAttr: $target_layout);
let results = (outs XeGPU_VectorType: $result);
let assemblyFormat = [{
$source prop-dict attr-dict `:` type($source)
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
index 45fed8e548a89..80e9d4d25b06c 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
@@ -84,9 +84,9 @@ struct ConvertLayoutOpPattern
using OpRewritePattern::OpRewritePattern;
LogicalResult matchAndRewrite(xegpu::ConvertLayoutOp op,
PatternRewriter &rewriter) const override {
- xegpu::LayoutAttr input_layout = op.getInputLayoutAttr();
- xegpu::LayoutAttr target_layout = op.getTargetLayoutAttr();
- if (!input_layout.getInstData() || !target_layout.getInstData())
+ xegpu::DistributeLayoutAttr input_layout = op.getInputLayoutAttr();
+ xegpu::DistributeLayoutAttr target_layout = op.getTargetLayoutAttr();
+ if (!input_layout.getInstDataAsInt() || !target_layout.getInstDataAsInt())
return rewriter.notifyMatchFailure(op, "Not a target ConvertLayoutOp.");
input_layout = input_layout.dropInstData();
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index 518c7817a516e..4fb962908793f 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -613,8 +613,9 @@ struct WgToSgConvertLayoutOp
LogicalResult
matchAndRewrite(xegpu::ConvertLayoutOp op, OneToNOpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
- xegpu::LayoutAttr input = op.getInputLayout();
- xegpu::LayoutAttr target = op.getTargetLayout();
+ // TODO: currently, we only support LayoutAttr
+ auto input = dyn_cast<xegpu::LayoutAttr>(op.getInputLayout());
+ auto target = dyn_cast<xegpu::LayoutAttr>(op.getTargetLayout());
if (!input || !target || !input.isForWorkgroup() ||
!target.isForWorkgroup())
>From 35c64895111db5d7019a64078fbe719dce317b95 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Fri, 22 Aug 2025 14:45:35 +0000
Subject: [PATCH 5/7] fix compilation error in clang
---
mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
index 82fd70571c022..bad734dbfd9f0 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
+++ b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
@@ -9,6 +9,7 @@
#ifndef MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
#define MLIR_DIALECT_XEGPU_UTILS_XEGPUUTILS_H_
+#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/OpDefinition.h"
namespace mlir {
>From c49546af9d1aa1eca506224110f60bea9a5581c2 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Wed, 27 Aug 2025 16:50:33 +0000
Subject: [PATCH 6/7] address comments
---
.../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 85 ++++++++++---------
mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 62 +++++++-------
mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 2 +-
.../XeGPU/Transforms/XeGPUBlocking.cpp | 17 ++--
.../Transforms/XeGPUWgToSgDistribute.cpp | 29 +++----
.../lib/Dialect/XeGPU/TestXeGPUTransforms.cpp | 2 +-
6 files changed, 100 insertions(+), 97 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index 77e3c257f234e..db34e35f27510 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -184,7 +184,7 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> {
let methods = [
InterfaceMethod<"Check the availability of workgroup level layouts",
"bool",
- "isForWorkgroup">,
+ "hasSgLayout">,
InterfaceMethod<"Check the availability of subgroup level layouts",
"bool",
"isForSubgroup">,
@@ -200,19 +200,19 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> {
return 0;
}], [{}]>,
InterfaceMethod<"Get the SgLayout field of the attribute as integer array",
- "std::optional<SmallVector<int64_t>>",
+ "SmallVector<int64_t>",
"getSgLayoutAsInt">,
InterfaceMethod<"Get the SgData field of the attribute as integer array",
- "std::optional<SmallVector<int64_t>>",
+ "SmallVector<int64_t>",
"getSgDataAsInt">,
InterfaceMethod<"Get the InstData field of the attribute as integer array",
- "std::optional<SmallVector<int64_t>>",
+ "SmallVector<int64_t>",
"getInstDataAsInt">,
InterfaceMethod<"Get the LaneLayout field of the attribute as integer array",
- "std::optional<SmallVector<int64_t>>",
+ "SmallVector<int64_t>",
"getLaneLayoutAsInt">,
InterfaceMethod<"Get the LaneData field of the attribute as integer array",
- "std::optional<SmallVector<int64_t>>",
+ "SmallVector<int64_t>",
"getLaneDataAsInt">,
InterfaceMethod<"Derive a new layout by dropping sgLayout and sgData",
"xegpu::DistributeLayoutAttr",
@@ -357,12 +357,12 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> {
];
let extraClassDeclaration = [{
- bool isForWorkgroup() {
+ bool hasSgLayout() {
return getSgLayout() != nullptr;
}
bool isForSubgroup() {
- return !isForWorkgroup();
+ return !hasSgLayout();
}
int64_t getRank() {
@@ -391,34 +391,34 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> {
getLaneLayout(), getLaneData(), getOrder());
}
- std::optional<SmallVector<int64_t>> getSgLayoutAsInt() const {
+ SmallVector<int64_t> getSgLayoutAsInt() const {
if (DenseI32ArrayAttr layout = getSgLayout())
return llvm::to_vector_of<int64_t>(layout.asArrayRef());
- return std::nullopt;
+ return {};
}
- std::optional<SmallVector<int64_t>> getSgDataAsInt() const {
+ SmallVector<int64_t> getSgDataAsInt() const {
if (DenseI32ArrayAttr data = getSgData())
return llvm::to_vector_of<int64_t>(data.asArrayRef());
- return std::nullopt;
+ return {};
}
- std::optional<SmallVector<int64_t>> getInstDataAsInt() const {
+ SmallVector<int64_t> getInstDataAsInt() const {
if (DenseI32ArrayAttr inst = getInstData())
return llvm::to_vector_of<int64_t>(inst.asArrayRef());
- return std::nullopt;
+ return {};
}
- std::optional<SmallVector<int64_t>> getLaneLayoutAsInt() const {
+ SmallVector<int64_t> getLaneLayoutAsInt() const {
if (DenseI32ArrayAttr layout = getLaneLayout())
return llvm::to_vector_of<int64_t>(layout.asArrayRef());
- return std::nullopt;
+ return {};
}
- std::optional<SmallVector<int64_t>> getLaneDataAsInt() const {
+ SmallVector<int64_t> getLaneDataAsInt() const {
if (DenseI32ArrayAttr data = getLaneData())
return llvm::to_vector_of<int64_t>(data.asArrayRef());
- return std::nullopt;
+ return {};
}
/// Delinearizes a linear subgroup ID into its multidimensional indices
@@ -485,10 +485,10 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> {
return parent.getOrder();
}
- bool isForWorkgroup() const {
+ bool hasSgLayout() const {
SliceAttr attr = flatten();
auto parent = dyn_cast<LayoutAttr>(attr.getParent());
- return parent.isForWorkgroup();
+ return parent.hasSgLayout();
}
bool isForSubgroup() const {
@@ -499,62 +499,67 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> {
/// Returns the SgLayout of the attribute, computed by applying
/// the slice dimensions to the underlying LayoutAttr.
- std::optional<SmallVector<int64_t>> getSgLayoutAsInt() const {
+ SmallVector<int64_t> getSgLayoutAsInt() const {
SliceAttr attr = flatten();
auto parent = dyn_cast<LayoutAttr>(attr.getParent());
- if (auto layout = parent.getSgLayoutAsInt()) {
+ auto layout = parent.getSgLayoutAsInt();
+ if (layout.size()) {
ArrayRef<int64_t> dims = attr.getDims().asArrayRef();
- return XeGPUDialect::slice(llvm::ArrayRef<int64_t>(*layout), dims);
+ return XeGPUDialect::slice(ArrayRef<int64_t>(layout), dims);
}
- return std::nullopt;
+ return {};
}
/// Returns the SgData of the attribute, computed by applying
/// the slice dimensions to the underlying LayoutAttr.
- std::optional<SmallVector<int64_t>> getSgDataAsInt() const {
+ SmallVector<int64_t> getSgDataAsInt() const {
SliceAttr attr = flatten();
auto parent = dyn_cast<LayoutAttr>(attr.getParent());
- if (auto data = parent.getSgDataAsInt()) {
+ auto data = parent.getSgDataAsInt();
+ if (data.size()) {
ArrayRef<int64_t> dims = attr.getDims().asArrayRef();
- return XeGPUDialect::slice(llvm::ArrayRef<int64_t>(*data), dims);
+ return XeGPUDialect::slice(ArrayRef<int64_t>(data), dims);
}
- return std::nullopt;
+ return {};
}
/// Returns the InstData of the attribute, computed by applying
/// the slice dimensions to the underlying LayoutAttr.
- std::optional<SmallVector<int64_t>> getInstDataAsInt() const {
+ SmallVector<int64_t> getInstDataAsInt() const {
SliceAttr attr = flatten();
auto parent = dyn_cast<LayoutAttr>(attr.getParent());
- if (auto inst = parent.getInstDataAsInt()) {
+ auto inst = parent.getInstDataAsInt();
+ if (inst.size()) {
ArrayRef<int64_t> dims = attr.getDims().asArrayRef();
- return XeGPUDialect::slice(llvm::ArrayRef<int64_t>(*inst), dims);
+ return XeGPUDialect::slice(llvm::ArrayRef<int64_t>(inst), dims);
}
- return std::nullopt;
+ return {};
}
/// Returns the LaneLayout of the attribute, computed by applying
/// the slice dimensions to the underlying LayoutAttr.
- std::optional<SmallVector<int64_t>> getLaneLayoutAsInt() const {
+ SmallVector<int64_t> getLaneLayoutAsInt() const {
SliceAttr attr = flatten();
auto parent = dyn_cast<LayoutAttr>(attr.getParent());
- if (auto layout = parent.getLaneLayoutAsInt()) {
+ auto layout = parent.getLaneLayoutAsInt();
+ if (layout.size()) {
ArrayRef<int64_t> dims = attr.getDims().asArrayRef();
- return XeGPUDialect::slice(llvm::ArrayRef<int64_t>(*layout), dims);
+ return XeGPUDialect::slice(llvm::ArrayRef<int64_t>(layout), dims);
}
- return std::nullopt;
+ return {};
}
/// Returns the LaneData of the attribute, computed by applying
/// the slice dimensions to the underlying LayoutAttr.
- std::optional<SmallVector<int64_t>> getLaneDataAsInt() const {
+ SmallVector<int64_t> getLaneDataAsInt() const {
SliceAttr attr = flatten();
auto parent = dyn_cast<LayoutAttr>(attr.getParent());
- if (auto data = parent.getLaneDataAsInt()) {
+ auto data = parent.getLaneDataAsInt();
+ if (data.size()) {
ArrayRef<int64_t> dims = attr.getDims().asArrayRef();
- return XeGPUDialect::slice(llvm::ArrayRef<int64_t>(*data), dims);
+ return XeGPUDialect::slice(llvm::ArrayRef<int64_t>(data), dims);
}
- return std::nullopt;
+ return {};
}
SliceAttr dropSgLayoutAndData() {
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 6de6049facfc6..b460f6dfd2769 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -104,30 +104,30 @@ bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef<int64_t> shape,
// smaller than `layout[i] * data[i]`, allowing multiple compute units to
// share the data.
auto tryDistribute = [&](llvm::ArrayRef<int64_t> shape,
- std::optional<SmallVector<int64_t>> layout,
- std::optional<SmallVector<int64_t>> data,
+ SmallVector<int64_t> layout,
+ SmallVector<int64_t> data,
bool rr = true) -> optional<SmallVector<int64_t>> {
llvm::SmallVector<int64_t> newShape(shape);
- if (layout) {
- if ((*layout).size() != shape.size())
+ if (layout.size()) {
+ if (layout.size() != shape.size())
return std::nullopt;
- auto ratio = computeShapeRatio(shape, *layout);
+ auto ratio = computeShapeRatio(shape, layout);
if (!ratio.has_value())
return std::nullopt;
newShape = ratio.value();
}
- if (data) {
- if ((*data).size() != shape.size())
+ if (data.size()) {
+ if (data.size() != shape.size())
return std::nullopt;
- auto ratio = computeShapeRatio(newShape, *data);
+ auto ratio = computeShapeRatio(newShape, data);
if (!ratio.has_value() && rr)
- ratio = computeShapeRatio(*data, newShape);
+ ratio = computeShapeRatio(data, newShape);
if (!ratio.has_value())
return std::nullopt;
// if data is not null, we always return it for next phase.
- newShape = *data;
+ newShape = data;
}
return newShape;
};
@@ -141,7 +141,7 @@ bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef<int64_t> shape,
// check InstData, it neither have layout nor need round-robin
auto maybeInstShape =
- tryDistribute(sgShape, std::nullopt, attr.getInstDataAsInt(), false);
+ tryDistribute(sgShape, {}, attr.getInstDataAsInt(), false);
if (!maybeInstShape)
return false;
auto instShape = maybeInstShape.value();
@@ -270,7 +270,7 @@ LayoutAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc,
Value linearId) {
// delinearizeSubgroupId is only available for
// workgroup-level layout attribute
- if (!isForWorkgroup())
+ if (!hasSgLayout())
return failure();
// TODO: handle order attribute
@@ -282,7 +282,7 @@ LayoutAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc,
if (!hasDefaultOrder())
return mlir::emitError(loc, "order attribute is currently not supported.");
- auto dims = llvm::map_to_vector(*getSgLayoutAsInt(), [&](int64_t d) -> Value {
+ auto dims = llvm::map_to_vector(getSgLayoutAsInt(), [&](int64_t d) -> Value {
return builder.createOrFold<arith::ConstantIndexOp>(loc, d);
});
@@ -295,17 +295,17 @@ LayoutAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc,
FailureOr<SmallVector<SmallVector<Value>>>
LayoutAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId,
ArrayRef<int64_t> shape) {
- if (!isForWorkgroup())
+ if (!hasSgLayout())
return failure();
- SmallVector<int64_t> sgLayout = getSgLayoutAsInt().value();
- SmallVector<int64_t> sgShape;
- if (auto maybeSgShape = getSgDataAsInt())
- sgShape = maybeSgShape.value();
- else if (auto derivedShape = computeShapeRatio(shape, sgLayout))
- sgShape = derivedShape.value();
- else
- return failure();
+ SmallVector<int64_t> sgLayout = getSgLayoutAsInt();
+ SmallVector<int64_t> sgShape = getSgDataAsInt();
+ if (sgShape.empty()) {
+ if (auto derivedShape = computeShapeRatio(shape, sgLayout))
+ sgShape = derivedShape.value();
+ else
+ return failure();
+ }
// delinearize Ids
auto maybeIds = delinearizeSubgroupId(builder, loc, linearId);
@@ -382,17 +382,17 @@ FailureOr<SmallVector<SmallVector<Value>>>
SliceAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId,
ArrayRef<int64_t> shape) {
assert(getRank() == static_cast<int64_t>(shape.size()) && "invalid shape.");
- if (!isForWorkgroup())
+ if (!hasSgLayout())
return failure();
- SmallVector<int64_t> sgLayout = getSgLayoutAsInt().value();
- SmallVector<int64_t> sgShape;
- if (auto maybeSgShape = getSgDataAsInt())
- sgShape = maybeSgShape.value();
- else if (auto derivedShape = computeShapeRatio(shape, sgLayout))
- sgShape = derivedShape.value();
- else
- return failure();
+ SmallVector<int64_t> sgLayout = getSgLayoutAsInt();
+ SmallVector<int64_t> sgShape = getSgDataAsInt();
+ if (sgShape.empty()) {
+ if (auto derivedShape = computeShapeRatio(shape, sgLayout))
+ sgShape = derivedShape.value();
+ else
+ return failure();
+ }
// delinearize Ids
auto maybeIds = delinearizeSubgroupId(builder, loc, linearId);
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index c8d180b973f05..f799205069a18 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -938,7 +938,7 @@ LogicalResult ConvertLayoutOp::verify() {
// both input and target layouts should be WgLayout or SgLayout at the same
// time.
- if ((!srcLayout.isForWorkgroup() || !resLayout.isForWorkgroup()) &&
+ if ((!srcLayout.hasSgLayout() || !resLayout.hasSgLayout()) &&
(!srcLayout.isForSubgroup() || !resLayout.isForSubgroup()))
return emitOpError("expected input layout and target layout be WgLayout or "
"SgLayout at the same time.");
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
index 80e9d4d25b06c..c0be589708df0 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
@@ -86,7 +86,8 @@ struct ConvertLayoutOpPattern
PatternRewriter &rewriter) const override {
xegpu::DistributeLayoutAttr input_layout = op.getInputLayoutAttr();
xegpu::DistributeLayoutAttr target_layout = op.getTargetLayoutAttr();
- if (!input_layout.getInstDataAsInt() || !target_layout.getInstDataAsInt())
+ if (input_layout.getInstDataAsInt().empty() ||
+ target_layout.getInstDataAsInt().empty())
return rewriter.notifyMatchFailure(op, "Not a target ConvertLayoutOp.");
input_layout = input_layout.dropInstData();
@@ -143,8 +144,8 @@ XeGPUBlockingPass::getTileShape(const T &operandOrResult) const {
xegpu::DistributeLayoutAttr layout =
xegpu::getDistributeLayoutAttr(operandOrResult);
if (layout && layout.isForSubgroup()) {
- if (auto inst_data = layout.getInstDataAsInt())
- return inst_data.value();
+ if (!layout.getInstDataAsInt().empty())
+ return layout.getInstDataAsInt();
if (auto type = dyn_cast<ShapedType>(value.getType()))
return llvm::to_vector(type.getShape());
@@ -207,13 +208,13 @@ bool XeGPUBlockingPass::needsUnroll(Operation *op) const {
llvm::any_of(op->getOpOperands(), [](OpOperand &opr) {
xegpu::DistributeLayoutAttr layout =
xegpu::getDistributeLayoutAttr(opr);
- return layout && layout.isForWorkgroup();
+ return layout && layout.hasSgLayout();
});
bool hasWgLayoutResults =
llvm::any_of(op->getOpResults(), [](OpResult result) {
xegpu::DistributeLayoutAttr layout =
xegpu::getDistributeLayoutAttr(result);
- return layout && layout.isForWorkgroup();
+ return layout && layout.hasSgLayout();
});
if (hasWgLayoutOperands || hasWgLayoutResults) {
LDBG() << "skip unrolling for op with workgroup level layout: " << *op;
@@ -224,7 +225,7 @@ bool XeGPUBlockingPass::needsUnroll(Operation *op) const {
Type valTy = value.getType();
if (auto tdescTy = dyn_cast<xegpu::TensorDescType>(valTy)) {
xegpu::DistributeLayoutAttr layout = tdescTy.getLayoutAttr();
- return layout && layout.getInstDataAsInt();
+ return layout && !layout.getInstDataAsInt().empty();
}
auto shapedType = dyn_cast<ShapedType>(valTy);
return shapedType && !llvm::equal(tileShape, shapedType.getShape());
@@ -276,7 +277,7 @@ void XeGPUBlockingPass::runOnOperation() {
auto layout =
llvm::dyn_cast_if_present<xegpu::LayoutAttr>(type.getEncoding());
- if (layout && layout.isForWorkgroup())
+ if (layout && layout.hasSgLayout())
return failure();
int count;
@@ -293,7 +294,7 @@ void XeGPUBlockingPass::runOnOperation() {
ArrayRef<int64_t> shape = type.getShape();
xegpu::LayoutAttr layout = type.getLayoutAttr();
- if (layout && layout.isForWorkgroup())
+ if (layout && layout.hasSgLayout())
return failure();
int count;
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index 4fb962908793f..a84a0b1415072 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -51,10 +51,10 @@ getSgShapeAndCount(ArrayRef<int64_t> shape,
xegpu::DistributeLayoutAttr layout) {
int count = 1;
SmallVector<int64_t> sgShape(shape);
- if (layout && layout.isForWorkgroup()) {
- SmallVector<int64_t> sgLayout = layout.getSgLayoutAsInt().value();
- if (auto maybeSgData = layout.getSgDataAsInt())
- sgShape = *maybeSgData;
+ if (layout && layout.hasSgLayout()) {
+ SmallVector<int64_t> sgLayout = layout.getSgLayoutAsInt();
+ if (!layout.getSgDataAsInt().empty())
+ sgShape = layout.getSgDataAsInt();
else if (auto maybeDerivedSgData = computeShapeRatio(shape, sgLayout))
sgShape = *maybeDerivedSgData;
SmallVector<int64_t> distUnit = computeElementwiseMul(sgLayout, sgShape);
@@ -88,7 +88,7 @@ genOffsetsList(ConversionPatternRewriter &rewriter, OpType op,
// not applicable to ops without workgroup layout attributes
xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
- if (!layout || !layout.isForWorkgroup())
+ if (!layout || !layout.hasSgLayout())
return failure();
Value sgId = rewriter.create<gpu::SubgroupIdOp>(loc, /*upper_bound=*/nullptr);
@@ -226,7 +226,7 @@ struct WgToSgCreateNdOpNoOffset
MLIRContext *ctx = op.getContext();
xegpu::TensorDescType tdescTy = op.getType();
auto layout = dyn_cast<xegpu::LayoutAttr>(tdescTy.getLayout());
- if (!layout || !layout.isForWorkgroup())
+ if (!layout || !layout.hasSgLayout())
return failure();
Type elemTy = tdescTy.getElementType();
@@ -472,7 +472,7 @@ struct WgToSgVectorBroadcastOp
xegpu::DistributeLayoutAttr layout =
xegpu::getDistributeLayoutAttr(op.getResult());
- if (!layout || !layout.isForWorkgroup())
+ if (!layout || !layout.hasSgLayout())
return failure();
// TODO: Currently only supports cases where the source and result ranks
@@ -487,10 +487,8 @@ struct WgToSgVectorBroadcastOp
VectorType::get(sgShape, resultType.getElementType());
// Check if the output layout is distributable
- SmallVector<int64_t> sgLayout;
- if (auto maybeSgLayout = layout.getSgLayoutAsInt())
- sgLayout = *maybeSgLayout;
- else
+ SmallVector<int64_t> sgLayout = layout.getSgLayoutAsInt();
+ if (sgLayout.empty())
return failure();
if (!xegpu::XeGPUDialect::isEvenlyDistributable(wgShape, layout))
@@ -538,7 +536,7 @@ struct WgToSgElementwiseOp : public ConversionPattern {
xegpu::DistributeLayoutAttr layout =
xegpu::getDistributeLayoutAttr(op->getResult(0));
- if (!layout || !layout.isForWorkgroup())
+ if (!layout || !layout.hasSgLayout())
return failure();
SmallVector<int64_t> sgShape = getSgShapeAndCount(wgShape, layout).first;
@@ -617,8 +615,7 @@ struct WgToSgConvertLayoutOp
auto input = dyn_cast<xegpu::LayoutAttr>(op.getInputLayout());
auto target = dyn_cast<xegpu::LayoutAttr>(op.getTargetLayout());
- if (!input || !target || !input.isForWorkgroup() ||
- !target.isForWorkgroup())
+ if (!input || !target || !input.hasSgLayout() || !target.hasSgLayout())
return rewriter.notifyMatchFailure(
op, "Input and target layouts must have subgroup layout");
@@ -742,7 +739,7 @@ struct WgToSgArithConstantOp : public OpConversionPattern<arith::ConstantOp> {
xegpu::DistributeLayoutAttr layout =
xegpu::getDistributeLayoutAttr(op.getResult());
- if (!layout || !layout.isForWorkgroup())
+ if (!layout || !layout.hasSgLayout())
return failure();
ArrayRef<int64_t> wgShape = vecType.getShape();
@@ -920,7 +917,7 @@ void XeGPUWgToSgDistributePass::runOnOperation() {
};
auto isLegal = [&](xegpu::DistributeLayoutAttr layout) -> bool {
- return !layout || !layout.isForWorkgroup();
+ return !layout || !layout.hasSgLayout();
};
target.addDynamicallyLegalOp<xegpu::CreateNdDescOp, xegpu::LoadNdOp,
diff --git a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
index 200323c7a4e51..f5d24a12f86bd 100644
--- a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
+++ b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
@@ -239,7 +239,7 @@ struct TestXeGPULayoutInterface
ConversionTarget target(*ctx);
auto isLegal = [&](xegpu::SliceAttr layout) -> bool {
- return !layout || !layout.isForWorkgroup();
+ return !layout || !layout.hasSgLayout();
};
target.addDynamicallyLegalOp<vector::StepOp>(
>From a723f2115973159e0ddd0333520c7bce87ca208d Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Wed, 27 Aug 2025 16:57:19 +0000
Subject: [PATCH 7/7] roll back isForWorkgroup
---
.../include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td | 10 +++++-----
mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 6 +++---
mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 2 +-
.../Dialect/XeGPU/Transforms/XeGPUBlocking.cpp | 8 ++++----
.../XeGPU/Transforms/XeGPUWgToSgDistribute.cpp | 17 +++++++++--------
.../lib/Dialect/XeGPU/TestXeGPUTransforms.cpp | 2 +-
6 files changed, 23 insertions(+), 22 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index db34e35f27510..cfe3e800484ce 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -184,7 +184,7 @@ def DistributeLayoutAttr: AttrInterface<"DistributeLayoutAttr"> {
let methods = [
InterfaceMethod<"Check the availability of workgroup level layouts",
"bool",
- "hasSgLayout">,
+ "isForWorkgroup">,
InterfaceMethod<"Check the availability of subgroup level layouts",
"bool",
"isForSubgroup">,
@@ -357,12 +357,12 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout", [DistributeLayoutAttr]> {
];
let extraClassDeclaration = [{
- bool hasSgLayout() {
+ bool isForWorkgroup() {
return getSgLayout() != nullptr;
}
bool isForSubgroup() {
- return !hasSgLayout();
+ return !isForWorkgroup();
}
int64_t getRank() {
@@ -485,10 +485,10 @@ def XeGPU_SliceAttr : XeGPUAttr<"Slice", "slice", [DistributeLayoutAttr]> {
return parent.getOrder();
}
- bool hasSgLayout() const {
+ bool isForWorkgroup() const {
SliceAttr attr = flatten();
auto parent = dyn_cast<LayoutAttr>(attr.getParent());
- return parent.hasSgLayout();
+ return parent.isForWorkgroup();
}
bool isForSubgroup() const {
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index b460f6dfd2769..7f3be7f91c56b 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -270,7 +270,7 @@ LayoutAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc,
Value linearId) {
// delinearizeSubgroupId is only available for
// workgroup-level layout attribute
- if (!hasSgLayout())
+ if (!isForWorkgroup())
return failure();
// TODO: handle order attribute
@@ -295,7 +295,7 @@ LayoutAttr::delinearizeSubgroupId(OpBuilder &builder, Location loc,
FailureOr<SmallVector<SmallVector<Value>>>
LayoutAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId,
ArrayRef<int64_t> shape) {
- if (!hasSgLayout())
+ if (!isForWorkgroup())
return failure();
SmallVector<int64_t> sgLayout = getSgLayoutAsInt();
@@ -382,7 +382,7 @@ FailureOr<SmallVector<SmallVector<Value>>>
SliceAttr::getOffsets(OpBuilder &builder, Location loc, Value linearId,
ArrayRef<int64_t> shape) {
assert(getRank() == static_cast<int64_t>(shape.size()) && "invalid shape.");
- if (!hasSgLayout())
+ if (!isForWorkgroup())
return failure();
SmallVector<int64_t> sgLayout = getSgLayoutAsInt();
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index f799205069a18..c8d180b973f05 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -938,7 +938,7 @@ LogicalResult ConvertLayoutOp::verify() {
// both input and target layouts should be WgLayout or SgLayout at the same
// time.
- if ((!srcLayout.hasSgLayout() || !resLayout.hasSgLayout()) &&
+ if ((!srcLayout.isForWorkgroup() || !resLayout.isForWorkgroup()) &&
(!srcLayout.isForSubgroup() || !resLayout.isForSubgroup()))
return emitOpError("expected input layout and target layout be WgLayout or "
"SgLayout at the same time.");
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
index c0be589708df0..9ee002ede7838 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
@@ -208,13 +208,13 @@ bool XeGPUBlockingPass::needsUnroll(Operation *op) const {
llvm::any_of(op->getOpOperands(), [](OpOperand &opr) {
xegpu::DistributeLayoutAttr layout =
xegpu::getDistributeLayoutAttr(opr);
- return layout && layout.hasSgLayout();
+ return layout && layout.isForWorkgroup();
});
bool hasWgLayoutResults =
llvm::any_of(op->getOpResults(), [](OpResult result) {
xegpu::DistributeLayoutAttr layout =
xegpu::getDistributeLayoutAttr(result);
- return layout && layout.hasSgLayout();
+ return layout && layout.isForWorkgroup();
});
if (hasWgLayoutOperands || hasWgLayoutResults) {
LDBG() << "skip unrolling for op with workgroup level layout: " << *op;
@@ -277,7 +277,7 @@ void XeGPUBlockingPass::runOnOperation() {
auto layout =
llvm::dyn_cast_if_present<xegpu::LayoutAttr>(type.getEncoding());
- if (layout && layout.hasSgLayout())
+ if (layout && layout.isForWorkgroup())
return failure();
int count;
@@ -294,7 +294,7 @@ void XeGPUBlockingPass::runOnOperation() {
ArrayRef<int64_t> shape = type.getShape();
xegpu::LayoutAttr layout = type.getLayoutAttr();
- if (layout && layout.hasSgLayout())
+ if (layout && layout.isForWorkgroup())
return failure();
int count;
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index a84a0b1415072..0b7fe81facfce 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -51,7 +51,7 @@ getSgShapeAndCount(ArrayRef<int64_t> shape,
xegpu::DistributeLayoutAttr layout) {
int count = 1;
SmallVector<int64_t> sgShape(shape);
- if (layout && layout.hasSgLayout()) {
+ if (layout && layout.isForWorkgroup()) {
SmallVector<int64_t> sgLayout = layout.getSgLayoutAsInt();
if (!layout.getSgDataAsInt().empty())
sgShape = layout.getSgDataAsInt();
@@ -88,7 +88,7 @@ genOffsetsList(ConversionPatternRewriter &rewriter, OpType op,
// not applicable to ops without workgroup layout attributes
xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
- if (!layout || !layout.hasSgLayout())
+ if (!layout || !layout.isForWorkgroup())
return failure();
Value sgId = rewriter.create<gpu::SubgroupIdOp>(loc, /*upper_bound=*/nullptr);
@@ -226,7 +226,7 @@ struct WgToSgCreateNdOpNoOffset
MLIRContext *ctx = op.getContext();
xegpu::TensorDescType tdescTy = op.getType();
auto layout = dyn_cast<xegpu::LayoutAttr>(tdescTy.getLayout());
- if (!layout || !layout.hasSgLayout())
+ if (!layout || !layout.isForWorkgroup())
return failure();
Type elemTy = tdescTy.getElementType();
@@ -472,7 +472,7 @@ struct WgToSgVectorBroadcastOp
xegpu::DistributeLayoutAttr layout =
xegpu::getDistributeLayoutAttr(op.getResult());
- if (!layout || !layout.hasSgLayout())
+ if (!layout || !layout.isForWorkgroup())
return failure();
// TODO: Currently only supports cases where the source and result ranks
@@ -536,7 +536,7 @@ struct WgToSgElementwiseOp : public ConversionPattern {
xegpu::DistributeLayoutAttr layout =
xegpu::getDistributeLayoutAttr(op->getResult(0));
- if (!layout || !layout.hasSgLayout())
+ if (!layout || !layout.isForWorkgroup())
return failure();
SmallVector<int64_t> sgShape = getSgShapeAndCount(wgShape, layout).first;
@@ -615,7 +615,8 @@ struct WgToSgConvertLayoutOp
auto input = dyn_cast<xegpu::LayoutAttr>(op.getInputLayout());
auto target = dyn_cast<xegpu::LayoutAttr>(op.getTargetLayout());
- if (!input || !target || !input.hasSgLayout() || !target.hasSgLayout())
+ if (!input || !target || !input.isForWorkgroup() ||
+ !target.isForWorkgroup())
return rewriter.notifyMatchFailure(
op, "Input and target layouts must have subgroup layout");
@@ -739,7 +740,7 @@ struct WgToSgArithConstantOp : public OpConversionPattern<arith::ConstantOp> {
xegpu::DistributeLayoutAttr layout =
xegpu::getDistributeLayoutAttr(op.getResult());
- if (!layout || !layout.hasSgLayout())
+ if (!layout || !layout.isForWorkgroup())
return failure();
ArrayRef<int64_t> wgShape = vecType.getShape();
@@ -917,7 +918,7 @@ void XeGPUWgToSgDistributePass::runOnOperation() {
};
auto isLegal = [&](xegpu::DistributeLayoutAttr layout) -> bool {
- return !layout || !layout.hasSgLayout();
+ return !layout || !layout.isForWorkgroup();
};
target.addDynamicallyLegalOp<xegpu::CreateNdDescOp, xegpu::LoadNdOp,
diff --git a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
index f5d24a12f86bd..200323c7a4e51 100644
--- a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
+++ b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
@@ -239,7 +239,7 @@ struct TestXeGPULayoutInterface
ConversionTarget target(*ctx);
auto isLegal = [&](xegpu::SliceAttr layout) -> bool {
- return !layout || !layout.hasSgLayout();
+ return !layout || !layout.isForWorkgroup();
};
target.addDynamicallyLegalOp<vector::StepOp>(
More information about the Mlir-commits
mailing list