[Mlir-commits] [mlir] [mlir][xegpu] Add definitons of MatrixDescType and related ops. (PR #153273)
Chao Chen
llvmlistbot at llvm.org
Wed Aug 13 17:06:19 PDT 2025
https://github.com/chencha3 updated https://github.com/llvm/llvm-project/pull/153273
>From cce8abaa92703dea562536c02fee3a8fd00ef9e6 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Fri, 8 Aug 2025 15:57:16 +0000
Subject: [PATCH 01/10] init
---
.../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 96 +++++++++++--------
.../mlir/Dialect/XeGPU/IR/XeGPUTypes.td | 23 +++++
mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 56 +++++++++++
3 files changed, 134 insertions(+), 41 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 75b16a87e03c6..3b074a35e9cbd 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -29,7 +29,7 @@ class XeGPU_Op<string mnemonic, list<Trait> traits = []>:
void printProperties(::mlir::MLIRContext *ctx,
::mlir::OpAsmPrinter &p, const Properties &prop,
::mlir::ArrayRef<::llvm::StringRef> elidedProps) {
-
+
DictionaryAttr propAttr = dyn_cast_if_present<mlir::DictionaryAttr>(getPropertiesAsAttr(ctx, prop));
// filter out the elidedProps from propAttr, and get the resultAttr
@@ -43,7 +43,7 @@ class XeGPU_Op<string mnemonic, list<Trait> traits = []>:
}
if (!filteredAttrs.empty()) {
- p << "<" << DictionaryAttr::get(ctx, filteredAttrs) << ">";
+ p << "<" << DictionaryAttr::get(ctx, filteredAttrs) << ">";
}
}
@@ -189,11 +189,11 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
ArrayRef<int64_t> getStaticOffsets(){
auto attr = getConstOffsetsAttr();
- if (attr)
+ if (attr)
return attr;
int64_t rank = getMixedSizes().size();
-
+
setConstOffsets(llvm::SmallVector<int64_t, 4>(rank, 0));
attr = getConstOffsetsAttr();
@@ -233,7 +233,7 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
auto attr = getConstStridesAttr();
if (attr)
return attr;
-
+
if (llvm::isa<IntegerType>(getSourceType()))
return emptyStrides;
@@ -314,15 +314,15 @@ def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> {
}];
let assemblyFormat = [{
- $TensorDesc ``
- custom<OptionalDynamicIndexList>($offsets, $const_offsets)
+ $TensorDesc ``
+ custom<OptionalDynamicIndexList>($offsets, $const_offsets)
prop-dict attr-dict `:` qualified(type($TensorDesc))
}];
let builders = [
- OpBuilder<(ins "Value": $TensorDesc,
- "xegpu::CachePolicyAttr": $l1_hint,
- "xegpu::CachePolicyAttr": $l2_hint,
+ OpBuilder<(ins "Value": $TensorDesc,
+ "xegpu::CachePolicyAttr": $l1_hint,
+ "xegpu::CachePolicyAttr": $l2_hint,
"xegpu::CachePolicyAttr": $l3_hint)>
];
@@ -370,7 +370,7 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
Variadic<Index>: $offsets,
- OptionalAttr<DenseI64ArrayAttr>: $const_offsets,
+ OptionalAttr<DenseI64ArrayAttr>: $const_offsets,
OptionalAttr<UnitAttr>: $packed,
OptionalAttr<DenseI64ArrayAttr>: $transpose,
OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
@@ -390,16 +390,16 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
}];
let assemblyFormat = [{
- $TensorDesc ``
- custom<OptionalDynamicIndexList>($offsets, $const_offsets)
+ $TensorDesc ``
+ custom<OptionalDynamicIndexList>($offsets, $const_offsets)
prop-dict attr-dict `:` qualified(type($TensorDesc)) `->` type($value)
}];
let builders = [
- OpBuilder<(ins "Type": $value, "Value": $TensorDesc,
+ OpBuilder<(ins "Type": $value, "Value": $TensorDesc,
"UnitAttr": $packed, "DenseI64ArrayAttr": $transpose,
- "xegpu::CachePolicyAttr": $l1_hint,
- "xegpu::CachePolicyAttr": $l2_hint,
+ "xegpu::CachePolicyAttr": $l1_hint,
+ "xegpu::CachePolicyAttr": $l2_hint,
"xegpu::CachePolicyAttr": $l3_hint)>
];
@@ -442,7 +442,7 @@ def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", [
let arguments = (ins XeGPU_ValueType: $value,
XeGPU_TensorDesc: $TensorDesc,
Variadic<Index>: $offsets,
- OptionalAttr<DenseI64ArrayAttr>: $const_offsets,
+ OptionalAttr<DenseI64ArrayAttr>: $const_offsets,
OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
@@ -458,16 +458,16 @@ def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", [
}];
let assemblyFormat = [{
- $value `,`
- $TensorDesc ``
- custom<OptionalDynamicIndexList>($offsets, $const_offsets)
+ $value `,`
+ $TensorDesc ``
+ custom<OptionalDynamicIndexList>($offsets, $const_offsets)
prop-dict attr-dict `:` type($value) `,` qualified(type($TensorDesc))
}];
let builders = [
- OpBuilder<(ins "Value": $value, "Value": $TensorDesc,
- "xegpu::CachePolicyAttr": $l1_hint,
- "xegpu::CachePolicyAttr": $l2_hint,
+ OpBuilder<(ins "Value": $value, "Value": $TensorDesc,
+ "xegpu::CachePolicyAttr": $l1_hint,
+ "xegpu::CachePolicyAttr": $l2_hint,
"xegpu::CachePolicyAttr": $l3_hint)>
];
@@ -635,12 +635,12 @@ def XeGPU_PrefetchOp : XeGPU_Op<"prefetch", []> {
l3_hint = #xegpu.cache_hint<cached>}
: !xegpu.tensor_desc<16xf16>
```
-
+
Example 2:
A variant accepts memref as base pointer and an offset instead of scattered TensorTdesc.
It combines "create scattered TensorTdesc" and "prefetch with scattered TensorTdesc".
The source operand could be a raw pointer (uint64_t).
- Please refer to create_tdesc for the restriction of memref.
+ Please refer to create_tdesc for the restriction of memref.
```mlir
%a = memref.alloc() : memref<1024xf32>
%0 = arith.constant dense<[0, 16, 32, 64]> : vector<4xindex>
@@ -676,16 +676,16 @@ def XeGPU_PrefetchOp : XeGPU_Op<"prefetch", []> {
}];
let assemblyFormat = [{
- $source
+ $source
(`[` $offsets^ `]`)?
prop-dict
- attr-dict `:` type(operands)
+ attr-dict `:` type(operands)
}];
-
+
let builders = [
OpBuilder<(ins "Value": $source,
- "xegpu::CachePolicyAttr": $l1_hint,
- "xegpu::CachePolicyAttr": $l2_hint,
+ "xegpu::CachePolicyAttr": $l1_hint,
+ "xegpu::CachePolicyAttr": $l2_hint,
"xegpu::CachePolicyAttr": $l3_hint)>
];
@@ -723,7 +723,7 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [MemoryEffects<[MemRead]>]> {
: !xegpu.tensor_desc<16x8xf32, #xegpu.scatter_tdesc_attr<memory_space=global, chunk_size=8>>,
vector<16xi1> -> vector<16x8xf32>
```
-
+
Example 3 (SIMT mode):
```mlir
%2 = xegpu.load %1, %0 <{l1_hint = #xegpu.cache_hint<cached>,
@@ -732,12 +732,12 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [MemoryEffects<[MemRead]>]> {
: !xegpu.tensor_desc<16x8xf32, #xegpu.scatter_tdesc_attr<memory_space=global, chunk_size=8>>
vector<16xi1> -> vector<8xf32>
```
-
+
Example 4:
A variant accepts memref as base pointer and an offset instead of scattered TensorTdesc.
It combines "create scattered TensorTdesc" and "load with scattered TensorTdesc".
The source operand could be a raw pointer (uint64_t). Please refer to create_tdesc
- for the restriction of memref.
+ for the restriction of memref.
```mlir
%a = memref.alloc() : memref<1024xf32>
%offsets = vector.step : vector<16xindex>
@@ -794,14 +794,14 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [MemoryEffects<[MemRead]>]> {
let assemblyFormat = [{
$source
(`[` $offsets^ `]`)? `,`
- $mask prop-dict
+ $mask prop-dict
attr-dict `:` type(operands) `->` type($value)
}];
let builders = [
OpBuilder<(ins "Type": $value, "Value": $source, "Value": $mask,
- "xegpu::CachePolicyAttr": $l1_hint,
- "xegpu::CachePolicyAttr": $l2_hint,
+ "xegpu::CachePolicyAttr": $l1_hint,
+ "xegpu::CachePolicyAttr": $l2_hint,
"xegpu::CachePolicyAttr": $l3_hint)>
];
@@ -848,7 +848,7 @@ def XeGPU_StoreScatterOp : XeGPU_Op<"store", [MemoryEffects<[MemWrite]>]> {
A variant accepts memref as base pointer and an offset instead of scattered TensorTdesc.
It combines "create scattered TensorTdesc" and "store with scattered TensorTdesc".
The dest operand could be a raw pointer (uint64_t).
- Please refer to create_tdesc for the restriction of memref.
+ Please refer to create_tdesc for the restriction of memref.
```mlir
%a = memref.alloc() : memref<1024xf32>
%val = arith.constant dense<0.0> : vector<16xf32>
@@ -901,15 +901,15 @@ def XeGPU_StoreScatterOp : XeGPU_Op<"store", [MemoryEffects<[MemWrite]>]> {
$value `,`
$dest
(`[` $offsets^ `]`)? `,`
- $mask
- prop-dict
+ $mask
+ prop-dict
attr-dict `:` type(operands)
}];
let builders = [
OpBuilder<(ins "Value": $value, "Value": $dest, "Value": $mask,
- "xegpu::CachePolicyAttr": $l1_hint,
- "xegpu::CachePolicyAttr": $l2_hint,
+ "xegpu::CachePolicyAttr": $l1_hint,
+ "xegpu::CachePolicyAttr": $l2_hint,
"xegpu::CachePolicyAttr": $l3_hint)>
];
@@ -1146,4 +1146,18 @@ def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["sou
let hasCanonicalizer = 1;
}
+def XeGPU_CreateMatrixDescOp: XeGPU_Op<"create_matrix_desc"> {
+ let summary = "Create a matrix descriptor.";
+ let description = [{
+ Matrices are treated as 2D units.
+ In case the ROI rank is >2, the two fastest changing dimensions
+ represent a 2D unit and other dimensions specify the multiple
+ of these units that are stacked vertically.
+ Results:
+ - `matrix_desc` : a descriptor for SLM allocation.
+ }];
+ let results = (outs XeGPU_MatrixDesc:$matrix_desc);
+ let assemblyFormat = "attr-dict `:` type($matrix_desc)";
+}
+
#endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
index b268cabb5d266..6ac126a84d39c 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
@@ -201,4 +201,27 @@ def XeGPU_Nbarrier: XeGPUTypeDef<"Nbarrier", "nbarrier", [], "mlir::Type"> {
}];
}
+def XeGPU_MatrixDesc: XeGPUTypeDef<"MatrixDesc", "matrix_desc", [ShapedTypeInterface], "mlir::Type"> {
+ let summary = "MatrixDesc describing the data in SLM";
+ let description = [{
+ MatrixDesc describes the data stored in SLM. Unleass specified via
+ the the optional layout attribute, the data is stored in a continuous
+ SLM region in row-major order by default.
+ }];
+ let parameters = (ins ArrayRefParameter<"int64_t">: $shape,
+ "mlir::Type": $elementType,
+ OptionalParameter<"mlir::Attribute">: $layout);
+
+ let extraClassDeclaration = [{
+ // using mlir::ShapedType::Trait<MatrixDescType>::getElementTypeBitWidth;
+ // using mlir::ShapedType::Trait<MatrixDescType>::getElementTypeBitWidth;
+ // using mlir::ShapedType::Trait<MatrixDescType>::getRank;
+ // using mlir::ShapedType::Trait<MatrixDescType>::getNumElements;
+ // using mlir::ShapedType::Trait<MatrixDescType>::isDynamicDim;
+ // using mlir::ShapedType::Trait<MatrixDescType>::hasStaticShape;
+ }];
+
+ let hasCustomAssemblyFormat = true;
+}
+
#endif // MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 3c0ca114a62d4..50eb90dbc1df9 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -394,6 +394,62 @@ LogicalResult TensorDescType::verify(
return success();
}
+//===----------------------------------------------------------------------===//
+// XeGPU_MatrixDescType
+//===----------------------------------------------------------------------===//
+mlir::Type MatrixDescType::parse(::mlir::AsmParser &parser) {
+ llvm::SmallVector<int64_t> shape;
+ mlir::Type elementType;
+ mlir::FailureOr<mlir::Attribute> layout;
+
+ // Parse literal '<'
+ if (parser.parseLess())
+ return {};
+
+ auto shapeLoc = parser.getCurrentLocation();
+ if (mlir::failed(parser.parseDimensionList(shape, false, true))) {
+ parser.emitError(shapeLoc, "failed to parse parameter 'shape'");
+ return {};
+ }
+
+ auto elemTypeLoc = parser.getCurrentLocation();
+ if (mlir::failed(parser.parseType(elementType))) {
+ parser.emitError(elemTypeLoc, "failed to parse parameter 'elementType'");
+ return {};
+ }
+
+ // parse optional attributes
+ if (mlir::succeeded(parser.parseOptionalComma())) {
+ mlir::Attribute attr;
+ ParseResult res = parser.parseAttribute(attr);
+ if (mlir::failed(res))
+ return {};
+ layout = attr;
+ }
+
+ // Parse literal '>'
+ if (parser.parseGreater())
+ return {};
+
+ MLIRContext *ctxt = parser.getContext();
+ return MatrixDescType::getChecked(
+ [&]() { return parser.emitError(parser.getNameLoc()); }, ctxt, shape,
+ elementType, layout.value_or(mlir::Attribute()));
+}
+
+void MatrixDescType::print(::mlir::AsmPrinter &printer) const {
+ printer << "<";
+
+ printer.printDimensionList(getShape());
+ printer << 'x';
+ printer << getElementType();
+
+ if (auto layout = getLayout())
+ printer << ", " << layout;
+
+ printer << ">";
+}
+
} // namespace xegpu
} // namespace mlir
>From 76ccc39d6f3c599015d0d6d853cc20a4853fcb7f Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Mon, 11 Aug 2025 18:48:38 +0000
Subject: [PATCH 02/10] sync
---
.../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 87 ++++++++++++++++++-
.../mlir/Dialect/XeGPU/IR/XeGPUTypes.td | 16 ++--
2 files changed, 92 insertions(+), 11 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 3b074a35e9cbd..59c1a432dce66 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -1146,18 +1146,101 @@ def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["sou
let hasCanonicalizer = 1;
}
-def XeGPU_CreateMatrixDescOp: XeGPU_Op<"create_matrix_desc"> {
+def XeGPU_CreateMatrixDescOp: XeGPU_Op<"create_matrix_desc", [Pure]> {
let summary = "Create a matrix descriptor.";
let description = [{
Matrices are treated as 2D units.
In case the ROI rank is >2, the two fastest changing dimensions
represent a 2D unit and other dimensions specify the multiple
of these units that are stacked vertically.
+ Arguments:
+ - `source` : a base address of SLM allocation.
Results:
- `matrix_desc` : a descriptor for SLM allocation.
}];
+ let arguments = (ins XeGPU_BaseAddrType:$source);
let results = (outs XeGPU_MatrixDesc:$matrix_desc);
- let assemblyFormat = "attr-dict `:` type($matrix_desc)";
+ let assemblyFormat = "$source prop-dict attr-dict `:` type($source) `->` type($matrix_desc)";
}
+def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>]> {
+ let arguments = (ins XeGPU_MatrixDesc:$matrix_desc,
+ Variadic<Index>: $offsets,
+ DenseI64ArrayAttr: $const_offsets,
+ OptionalAttr<XeGPU_LayoutAttr>:$layout
+ );
+ let results = (outs XeGPU_ValueType:$res);
+ let assemblyFormat = [{
+ $matrix_desc `` custom<DynamicIndexList>($offsets, $const_offsets)
+ prop-dict attr-dict `:` functional-type(operands, results)
+ }];
+ let summary = "Load matrix from SLM.";
+ let description = [{
+ This operation loads a matrix from the SLM using the matrix descriptor.
+ There are additional parameters and attributes that support loading, but they must only
+ be specified for a work-item level operation.
+
+ General rules:
+ 1. Non-WI-level code must not specify optional attributes.
+ 2. If the load uses `vector` semantics, all of the vector attributes must be specified.
+ 3. If the load uses `array` semantics, all of the array attributes must be specified.
+
+ Arguments:
+ - `matrix_desc` : a matrix descriptor (SLM allocation + matrix type).
+ - `offsets` : Coordinates of the matrix to load.
+ Results:
+ - `res` : loaded matrix elements.
+ }];
+
+ let builders = [
+ // OpBuilder<(ins "Type":$res, "TypedValue<MatrixDescType>": $matrix_desc, "llvm::ArrayRef<OpFoldResult>": $offsets, "LayoutAttr": $layout)>,
+ ];
+ let extraClassDeclaration = [{
+ SmallVector<OpFoldResult> getMixedOffsets() {
+ return getMixedValues(getConstOffsets(), getOffsets(), getContext());
+ }
+ }];
+ // let hasVerifier = 1;
+}
+
+def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix"> {
+ let arguments = (ins
+ XeGPU_MatrixDesc:$matrix_desc,
+ XeGPU_ValueType:$data,
+ Variadic<Index>: $offsets,
+ DenseI64ArrayAttr: $const_offsets,
+ OptionalAttr<XeGPU_LayoutAttr>:$layout
+ );
+ let assemblyFormat = [{
+ $matrix_desc `` custom<DynamicIndexList>($offsets, $const_offsets) `,` $data
+ prop-dict attr-dict `:` type(operands)
+ }];
+ let summary = "Store matrix from SLM.";
+ let description = [{
+ This operation stores workitem's `data` fragment of the matrix to the SLM (`matrix_desc`).
+ There are additional parameters and attributes that support loading, but they must only
+ be specified for a work-item level operation.
+
+ General rules:
+ 1. Non-WI-level code must not specify optional attributes.
+ 2. If the store uses `vector` semantics, all of the vector attributes must be specified.
+
+ Arguments:
+ - `matrix_desc` : a matrix descriptor.
+ - `data` : data to be stored to the matrix.
+ - `offsets` : Coordinates of the matrix where the data will be stored.
+ }];
+ let builders = [
+ // OpBuilder<(ins "TypedValue<MatrixDescType>": $matrix_desc, "Value" : $data, "llvm::ArrayRef<OpFoldResult>": $offsets, "LayoutAttr": $layout)>,
+ ];
+ let extraClassDeclaration = [{
+ SmallVector<OpFoldResult> getMixedOffsets() {
+ Builder b(getContext());
+ return getMixedValues(getConstOffsets(), getOffsets(), b);
+ }
+ }];
+ // let hasVerifier = 1;
+}
+
+
#endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
index 6ac126a84d39c..f578fc8bc0735 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
@@ -204,21 +204,19 @@ def XeGPU_Nbarrier: XeGPUTypeDef<"Nbarrier", "nbarrier", [], "mlir::Type"> {
def XeGPU_MatrixDesc: XeGPUTypeDef<"MatrixDesc", "matrix_desc", [ShapedTypeInterface], "mlir::Type"> {
let summary = "MatrixDesc describing the data in SLM";
let description = [{
- MatrixDesc describes the data stored in SLM. Unleass specified via
- the the optional layout attribute, the data is stored in a continuous
- SLM region in row-major order by default.
+ MatrixDesc describes a SLM region. Unleass specified via the optional layout attribute,
+ the data is stored contiguously in the region in row-major order by default.
}];
let parameters = (ins ArrayRefParameter<"int64_t">: $shape,
"mlir::Type": $elementType,
OptionalParameter<"mlir::Attribute">: $layout);
let extraClassDeclaration = [{
- // using mlir::ShapedType::Trait<MatrixDescType>::getElementTypeBitWidth;
- // using mlir::ShapedType::Trait<MatrixDescType>::getElementTypeBitWidth;
- // using mlir::ShapedType::Trait<MatrixDescType>::getRank;
- // using mlir::ShapedType::Trait<MatrixDescType>::getNumElements;
- // using mlir::ShapedType::Trait<MatrixDescType>::isDynamicDim;
- // using mlir::ShapedType::Trait<MatrixDescType>::hasStaticShape;
+ bool hasRank() const { return true; }
+
+ MatrixDescType cloneWith(std::optional<llvm::ArrayRef<int64_t>> shape, Type elementType) const {
+ return MatrixDescType::get(getContext(), shape.value_or(getShape()), elementType, getLayout());
+ }
}];
let hasCustomAssemblyFormat = true;
>From cb0a195e340bac10e10b6d5cb9de0d925d39deeb Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Tue, 12 Aug 2025 18:10:33 +0000
Subject: [PATCH 03/10] add unit tests for create_matrix_desc
---
.../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 24 +++++++++++++------
mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt | 1 +
mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 10 ++++++++
mlir/test/Dialect/XeGPU/invalid.mlir | 16 +++++++++++++
mlir/test/Dialect/XeGPU/ops.mlir | 18 ++++++++++++++
5 files changed, 62 insertions(+), 7 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 37e4c2c811155..e4ea0b27323ec 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -1101,21 +1101,31 @@ def XeGPU_ConvertLayoutOp: XeGPU_Op<"convert_layout", [Pure, AllTypesMatch<["sou
let hasCanonicalizer = 1;
}
-def XeGPU_CreateMatrixDescOp: XeGPU_Op<"create_matrix_desc", [Pure]> {
+def isSharedPred : CPred<"isSharedMemory(llvm::cast<mlir::MemRefType>($_self))">;
+class StaticShared1DMemRefOf<list<Type> allowedTypes> :
+ ConfinedType<MemRefRankOf<allowedTypes, [1]>, [HasStaticShapePred, isSharedPred],
+ "statically shaped " # MemRefOf<allowedTypes>.summary # " for shared memory",
+ "mlir::MemRefType">;
+
+class SizeInBits<string name> :
+ StrFunc<"llvm::cast<mlir::ShapedType>($" # name # ".getType()).getNumElements()"
+ "*llvm::cast<mlir::ShapedType>($" # name # ".getType()).getElementTypeBitWidth()">;
+class AllMemSizesMatch<list<string> names> :
+ AllMatchSameOperatorTrait<names, SizeInBits<"_self">.result,
+ "size in bits">;
+
+def XeGPU_CreateMatrixDescOp: XeGPU_Op<"create_matrix_desc", [Pure,
+ AllMemSizesMatch<["source", "matrix_desc"]>]> {
let summary = "Create a matrix descriptor.";
let description = [{
- Matrices are treated as 2D units.
- In case the ROI rank is >2, the two fastest changing dimensions
- represent a 2D unit and other dimensions specify the multiple
- of these units that are stacked vertically.
Arguments:
- `source` : a base address of SLM allocation.
Results:
- `matrix_desc` : a descriptor for SLM allocation.
}];
- let arguments = (ins XeGPU_BaseAddrType:$source);
+ let arguments = (ins StaticShared1DMemRefOf<[I8]>:$source);
let results = (outs XeGPU_MatrixDesc:$matrix_desc);
- let assemblyFormat = "$source prop-dict attr-dict `:` type($source) `->` type($matrix_desc)";
+ let assemblyFormat = "$source prop-dict attr-dict `` `:` type($source) `->` qualified(type($matrix_desc))";
}
def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>]> {
diff --git a/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt
index 7c6a4f37db9af..603fb5d237544 100644
--- a/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt
@@ -17,6 +17,7 @@ add_mlir_dialect_library(MLIRXeGPUDialect
MLIRAffineUtils
MLIRArithUtils
MLIRDialectUtils
+ MLIRGPUDialect
MLIRIR
MLIRViewLikeInterface
MLIRVectorDialect
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 2cd086feb5deb..ad4d8bd6e22cd 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/Arith/Utils/Utils.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
@@ -21,6 +22,15 @@
namespace mlir {
namespace xegpu {
+bool isSharedMemory(const MemRefType &memrefTy) {
+ Attribute attr = memrefTy.getMemorySpace();
+ if (auto intAttr = llvm::dyn_cast<IntegerAttr>(attr))
+ return intAttr.getInt() == 3;
+ if (auto memrefSpace = llvm::dyn_cast<MemorySpaceAttr>(attr))
+ return memrefSpace.getValue() == MemorySpace::SLM;
+ return gpu::GPUDialect::isWorkgroupMemoryAddressSpace(attr);
+}
+
template <typename T>
static std::string makeString(T array, bool breakline = false) {
std::string buf;
diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir
index 44e15dd7cbb38..1cd817918a772 100644
--- a/mlir/test/Dialect/XeGPU/invalid.mlir
+++ b/mlir/test/Dialect/XeGPU/invalid.mlir
@@ -762,3 +762,19 @@ func.func @slice_attr_repeat_dim() {
return
}
+// -----
+func.func @create_matrix_desc_non_slm() {
+ %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 1>
+ // expected-error at +1 {{operand #0 must be statically shaped memref of 8-bit signless integer values for shared memory}}
+ %matrix_desc = xegpu.create_matrix_desc %m : memref<2048xi8, 1> -> !xegpu.matrix_desc<16x64xf16>
+ return
+}
+
+// -----
+func.func @create_matrix_desc_mismatch_sizes() {
+ %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 3>
+ // expected-error at +1 {{failed to verify that all of {source, matrix_desc} have same size in bits}}
+ %matrix_desc = xegpu.create_matrix_desc %m : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x32xf16>
+ return
+}
+
diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir
index 67c00f5a9cc2f..c224749031328 100644
--- a/mlir/test/Dialect/XeGPU/ops.mlir
+++ b/mlir/test/Dialect/XeGPU/ops.mlir
@@ -751,4 +751,22 @@ gpu.func @fence() {
gpu.return
}
+// CHECK-LABEL: gpu.func @create_matrix_desc({{.*}}) {
+gpu.func @create_matrix_desc() {
+ //CHECK: [[alloc:%.+]] = memref.alloca() {alignment = 1024 : i64} : memref<2048xi8, 3>
+ //CHECK: [[mdesc:%.+]] = xegpu.create_matrix_desc [[alloc]] : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16>
+ %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 3>
+ %matrix_desc = xegpu.create_matrix_desc %m : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16>
+ gpu.return
+}
+
+// CHECK-LABEL: gpu.func @create_matrix_desc_with_stride({{.*}}) {
+gpu.func @create_matrix_desc_with_stride() {
+ //CHECK: [[alloc:%.+]] = memref.alloca() {alignment = 1024 : i64} : memref<2048xi8, 3>
+ //CHECK: [[mdesc:%.+]] = xegpu.create_matrix_desc [[alloc]] : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>
+ %m = memref.alloca() {alignment = 1024} : memref<2048xi8, 3>
+ %matrix_desc = xegpu.create_matrix_desc %m : memref<2048xi8, 3> -> !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>
+ gpu.return
+}
+
}
>From 98871ccb013229593e8d169533ab3b03b136f687 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Tue, 12 Aug 2025 20:18:09 +0000
Subject: [PATCH 04/10] add unit test for load_matrix and store_matrix
---
.../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 31 ++++++-----
mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 53 +++++++++++++++++++
mlir/test/Dialect/XeGPU/invalid.mlir | 28 ++++++++++
mlir/test/Dialect/XeGPU/ops.mlir | 29 ++++++++++
4 files changed, 129 insertions(+), 12 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index e4ea0b27323ec..461df6efb8528 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -1128,16 +1128,18 @@ def XeGPU_CreateMatrixDescOp: XeGPU_Op<"create_matrix_desc", [Pure,
let assemblyFormat = "$source prop-dict attr-dict `` `:` type($source) `->` qualified(type($matrix_desc))";
}
-def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>]> {
+def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>,
+ AllElementTypesMatch<["matrix_desc", "res"]>,
+ AllRanksMatch<["matrix_desc", "res"]>]> {
let arguments = (ins XeGPU_MatrixDesc:$matrix_desc,
Variadic<Index>: $offsets,
DenseI64ArrayAttr: $const_offsets,
- OptionalAttr<XeGPU_LayoutAttr>:$layout
+ OptionalAttr<LayoutTrait>:$layout
);
let results = (outs XeGPU_ValueType:$res);
let assemblyFormat = [{
$matrix_desc `` custom<DynamicIndexList>($offsets, $const_offsets)
- prop-dict attr-dict `:` functional-type(operands, results)
+ prop-dict attr-dict `` `:` type(operands) `->` type(results)
}];
let summary = "Load matrix from SLM.";
let description = [{
@@ -1158,23 +1160,27 @@ def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>]> {
}];
let builders = [
- // OpBuilder<(ins "Type":$res, "TypedValue<MatrixDescType>": $matrix_desc, "llvm::ArrayRef<OpFoldResult>": $offsets, "LayoutAttr": $layout)>,
+ OpBuilder<(ins "Type":$res, "TypedValue<MatrixDescType>": $matrix_desc,
+ "llvm::ArrayRef<OpFoldResult>": $offsets, "LayoutTrait": $layout)>,
];
let extraClassDeclaration = [{
SmallVector<OpFoldResult> getMixedOffsets() {
return getMixedValues(getConstOffsets(), getOffsets(), getContext());
}
}];
- // let hasVerifier = 1;
+
+ let hasVerifier = 1;
}
-def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix"> {
+def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>,
+ AllElementTypesMatch<["matrix_desc", "data"]>,
+ AllRanksMatch<["matrix_desc", "data"]>]> {
let arguments = (ins
XeGPU_MatrixDesc:$matrix_desc,
- XeGPU_ValueType:$data,
Variadic<Index>: $offsets,
DenseI64ArrayAttr: $const_offsets,
- OptionalAttr<XeGPU_LayoutAttr>:$layout
+ XeGPU_ValueType:$data,
+ OptionalAttr<LayoutTrait>:$layout
);
let assemblyFormat = [{
$matrix_desc `` custom<DynamicIndexList>($offsets, $const_offsets) `,` $data
@@ -1196,15 +1202,16 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix"> {
- `offsets` : Coordinates of the matrix where the data will be stored.
}];
let builders = [
- // OpBuilder<(ins "TypedValue<MatrixDescType>": $matrix_desc, "Value" : $data, "llvm::ArrayRef<OpFoldResult>": $offsets, "LayoutAttr": $layout)>,
+ OpBuilder<(ins "TypedValue<MatrixDescType>": $matrix_desc, "llvm::ArrayRef<OpFoldResult>": $offsets,
+ "Value" : $data, "LayoutTrait": $layout)>,
];
let extraClassDeclaration = [{
SmallVector<OpFoldResult> getMixedOffsets() {
- Builder b(getContext());
- return getMixedValues(getConstOffsets(), getOffsets(), b);
+ return getMixedValues(getConstOffsets(), getOffsets(), getContext());
}
}];
- // let hasVerifier = 1;
+
+ let hasVerifier = 1;
}
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index ad4d8bd6e22cd..2051d7030340e 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -935,6 +935,59 @@ void ConvertLayoutOp::getCanonicalizationPatterns(RewritePatternSet &patterns,
patterns.add<FoldConvertLayoutOp>(context);
}
+//===----------------------------------------------------------------------===//
+// XeGPU_LoadMatrixOp
+//===----------------------------------------------------------------------===//
+void LoadMatrixOp::build(OpBuilder &builder, OperationState &state, Type res,
+ TypedValue<MatrixDescType> matrixDesc,
+ llvm::ArrayRef<OpFoldResult> offsets,
+ LayoutTrait layout) {
+ llvm::SmallVector<Value> dynamicOffsets;
+ llvm::SmallVector<int64_t> staticOffsets;
+
+ dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
+ auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets);
+
+ build(builder, state, res, matrixDesc, dynamicOffsets, staticOffsetsAttr,
+ layout);
+}
+
+LogicalResult LoadMatrixOp::verify() {
+ ArrayRef<int64_t> valueShape = getRes().getType().getShape();
+ ArrayRef<int64_t> mdescShape = getMatrixDesc().getType().getShape();
+ if (llvm::any_of(llvm::zip_equal(valueShape, mdescShape),
+ [](auto p) { return std::get<0>(p) > std::get<1>(p); }))
+ return emitOpError("result shape must not exceed matrix desc shape.");
+ return success();
+}
+
+//===----------------------------------------------------------------------===//
+// XeGPU_StoreMatrixOp
+//===----------------------------------------------------------------------===//
+void StoreMatrixOp::build(OpBuilder &builder, OperationState &state,
+ TypedValue<MatrixDescType> matrixDesc,
+ llvm::ArrayRef<OpFoldResult> offsets, Value data,
+ LayoutTrait layout) {
+ llvm::SmallVector<Value> dynamicOffsets;
+ llvm::SmallVector<int64_t> staticOffsets;
+
+ dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
+ auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets);
+
+ build(builder, state, matrixDesc, dynamicOffsets, staticOffsetsAttr, data,
+ layout);
+}
+
+LogicalResult StoreMatrixOp::verify() {
+ ArrayRef<int64_t> dataShape = getData().getType().getShape();
+ ArrayRef<int64_t> mdescShape = getMatrixDesc().getType().getShape();
+ if (llvm::any_of(llvm::zip_equal(dataShape, mdescShape),
+ [](auto p) { return std::get<0>(p) > std::get<1>(p); }))
+ return emitOpError("data shape must not exceed matrix desc shape.");
+
+ return success();
+}
+
} // namespace xegpu
} // namespace mlir
diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir
index 1cd817918a772..2feb010d343a8 100644
--- a/mlir/test/Dialect/XeGPU/invalid.mlir
+++ b/mlir/test/Dialect/XeGPU/invalid.mlir
@@ -778,3 +778,31 @@ func.func @create_matrix_desc_mismatch_sizes() {
return
}
+// -----
+func.func @load_matrix_desc_mismatch_element_type(%arg0: !xegpu.matrix_desc<16x64xf16>) {
+ // expected-error at +1 {{failed to verify that all of {matrix_desc, res} have same element type}}
+ %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> vector<8x16xf32>
+ return
+}
+
+// -----
+func.func @load_matrix_desc_invalid_result_size(%arg0: !xegpu.matrix_desc<16x64xf16>) {
+ // expected-error at +1 {{result shape must not exceed matrix desc shape}}
+ %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> vector<32x16xf16>
+ return
+}
+
+// -----
+func.func @store_matrix_desc_mismatch_element_type(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector<16x16xf32>) {
+ // expected-error at +1 {{failed to verify that all of {matrix_desc, data} have same element type}}
+ xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16>, vector<16x16xf32>
+ return
+}
+
+// -----
+func.func @store_matrix_desc_invalid_data_size(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector<32x32xf16>) {
+ // expected-error at +1 {{data shape must not exceed matrix desc shape}}
+ xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16>, vector<32x32xf16>
+ return
+}
+
diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir
index c224749031328..cda8f0ac1bb40 100644
--- a/mlir/test/Dialect/XeGPU/ops.mlir
+++ b/mlir/test/Dialect/XeGPU/ops.mlir
@@ -769,4 +769,33 @@ gpu.func @create_matrix_desc_with_stride() {
gpu.return
}
+// CHECK: gpu.func @load_matrix_desc([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>)
+gpu.func @load_matrix_desc(%arg0: !xegpu.matrix_desc<16x64xf16>) {
+ // CHECK: xegpu.load_matrix [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16> -> vector<8x16xf16>
+ %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> vector<8x16xf16>
+ gpu.return
+}
+
+// CHECK: gpu.func @load_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>)
+gpu.func @load_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>) {
+ // CHECK: xegpu.load_matrix [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> -> vector<8x16xf16>
+ %data = xegpu.load_matrix %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> -> vector<8x16xf16>
+ gpu.return
+}
+
+
+// CHECK: gpu.func @store_matrix_desc([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>, [[ARG1:%.+]]: vector<16x16xf16>)
+gpu.func @store_matrix_desc(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector<16x16xf16>) {
+ // CHECK: xegpu.store_matrix [[ARG0]][8, 8], [[ARG1]] : !xegpu.matrix_desc<16x64xf16>, vector<16x16xf16>
+ xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16>, vector<16x16xf16>
+ gpu.return
+}
+
+// CHECK: gpu.func @store_matrix_desc_with_stride([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, [[ARG1:%.+]]: vector<16x16xf16>)
+gpu.func @store_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, %arg1: vector<16x16xf16>) {
+ // CHECK: xegpu.store_matrix [[ARG0]][8, 8], [[ARG1]] : !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, vector<16x16xf16>
+ xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, vector<16x16xf16>
+ gpu.return
+}
+
}
>From 06eec6e51b755cbb13b62cfaa3ba2320e8bc3cb6 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Tue, 12 Aug 2025 20:33:56 +0000
Subject: [PATCH 05/10] refine description
---
.../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 42 ++++++++-----------
.../mlir/Dialect/XeGPU/IR/XeGPUTypes.td | 5 ++-
2 files changed, 20 insertions(+), 27 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 461df6efb8528..f536650e9d872 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -1118,10 +1118,14 @@ def XeGPU_CreateMatrixDescOp: XeGPU_Op<"create_matrix_desc", [Pure,
AllMemSizesMatch<["source", "matrix_desc"]>]> {
let summary = "Create a matrix descriptor.";
let description = [{
+ Creates a matrix descriptor from a shared local memory (SLM) buffer.
+ The resulting matrix descriptor has to have the same size as the underlying
+ shared local memory.
+
Arguments:
- - `source` : a base address of SLM allocation.
+ - `source` : a 1D statically shaped memref with element type i8, representing the raw SLM buffer.
Results:
- - `matrix_desc` : a descriptor for SLM allocation.
+ - `matrix_desc` : the matrix descriptor.
}];
let arguments = (ins StaticShared1DMemRefOf<[I8]>:$source);
let results = (outs XeGPU_MatrixDesc:$matrix_desc);
@@ -1141,22 +1145,16 @@ def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>,
$matrix_desc `` custom<DynamicIndexList>($offsets, $const_offsets)
prop-dict attr-dict `` `:` type(operands) `->` type(results)
}];
- let summary = "Load matrix from SLM.";
- let description = [{
- This operation loads a matrix from the SLM using the matrix descriptor.
- There are additional parameters and attributes that support loading, but they must only
- be specified for a work-item level operation.
- General rules:
- 1. Non-WI-level code must not specify optional attributes.
- 2. If the load uses `vector` semantics, all of the vector attributes must be specified.
- 3. If the load uses `array` semantics, all of the array attributes must be specified.
+ let description = [{
+ This operation reads a block of data from shared local memory (SLM)
+ using the provided matrix descriptor.
Arguments:
- - `matrix_desc` : a matrix descriptor (SLM allocation + matrix type).
- - `offsets` : Coordinates of the matrix to load.
+ - `matrix_desc`: the matrix descriptor identifying the SLM region.
+ - `offsets`: the coordinates within the matrix to read from.
Results:
- - `res` : loaded matrix elements.
+ - `res`: the matrix elements loaded from SLM.
}];
let builders = [
@@ -1186,20 +1184,14 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>,
$matrix_desc `` custom<DynamicIndexList>($offsets, $const_offsets) `,` $data
prop-dict attr-dict `:` type(operands)
}];
- let summary = "Store matrix from SLM.";
let description = [{
- This operation stores workitem's `data` fragment of the matrix to the SLM (`matrix_desc`).
- There are additional parameters and attributes that support loading, but they must only
- be specified for a work-item level operation.
-
- General rules:
- 1. Non-WI-level code must not specify optional attributes.
- 2. If the store uses `vector` semantics, all of the vector attributes must be specified.
+ This operation writes the `data` fragment into the shared local memory region
+ identified by `matrix_desc`.
Arguments:
- - `matrix_desc` : a matrix descriptor.
- - `data` : data to be stored to the matrix.
- - `offsets` : Coordinates of the matrix where the data will be stored.
+ - `matrix_desc`: the matrix descriptor specifying the SLM region.
+ - `offsets`: the coordinates within the matrix where the data will be written.
+ - `data`: the values to be stored in the matrix.
}];
let builders = [
OpBuilder<(ins "TypedValue<MatrixDescType>": $matrix_desc, "llvm::ArrayRef<OpFoldResult>": $offsets,
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
index f578fc8bc0735..02cabce82398b 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
@@ -204,8 +204,9 @@ def XeGPU_Nbarrier: XeGPUTypeDef<"Nbarrier", "nbarrier", [], "mlir::Type"> {
def XeGPU_MatrixDesc: XeGPUTypeDef<"MatrixDesc", "matrix_desc", [ShapedTypeInterface], "mlir::Type"> {
let summary = "MatrixDesc describing the data in SLM";
let description = [{
- MatrixDesc describes a SLM region. Unleass specified via the optional layout attribute,
- the data is stored contiguously in the region in row-major order by default.
+ MatrixDesc represents a block of data stored in shared local memory.
+ By default, unless a layout attribute is provided, the data is stored
+ contiguously in row-major order within the region.
}];
let parameters = (ins ArrayRefParameter<"int64_t">: $shape,
"mlir::Type": $elementType,
>From 6df4291c7fcecccc233f0b9ffea67e5edaef5d9b Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Wed, 13 Aug 2025 00:02:35 +0000
Subject: [PATCH 06/10] add subview op
---
.../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 31 ++++++++++++++++++
mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 32 ++++++++++++++++---
mlir/test/Dialect/XeGPU/invalid.mlir | 20 ++++++++++++
mlir/test/Dialect/XeGPU/ops.mlir | 14 ++++++++
4 files changed, 93 insertions(+), 4 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index f536650e9d872..0c8980bb04b2e 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -1206,5 +1206,36 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>,
let hasVerifier = 1;
}
+def XeGPU_MatrixDescSubviewOp: XeGPU_Op<"matrix_desc_subview", [Pure, ViewLikeOpInterface,
+ AllElementTypesMatch<["src", "res"]>,
+ AllRanksMatch<["src", "res"]>]> {
+ let description = [{
+ Create a subview of a matrix descriptor.
+ Results:
+ - `src` : a matrix descriptor.
+ - `offsets` : the coordinates within the matrix the subview will be created from.
+ }];
+ let arguments = (ins XeGPU_MatrixDesc:$src,
+ Variadic<Index>:$offsets,
+ DenseI64ArrayAttr:$const_offsets,
+ OptionalAttr<LayoutTrait>: $layout);
+ let results = (outs XeGPU_MatrixDesc:$res);
+ let assemblyFormat = [{$src `` custom<DynamicIndexList>($offsets, $const_offsets) prop-dict
+ attr-dict `` `:` qualified(type($src)) `->` qualified(type($res))}];
+ let builders = [
+ OpBuilder<(ins "Type": $res, "Value":$src, "llvm::ArrayRef<OpFoldResult>": $offsets, "LayoutTrait": $layout)>
+ ];
+
+ let extraClassDeclaration = [{
+ mlir::Value getViewSource() { return getSrc(); }
+
+ SmallVector<OpFoldResult> getMixedOffsets() {
+ return getMixedValues(getConstOffsets(), getOffsets(), getContext());
+ }
+ }];
+
+ let hasVerifier = 1;
+}
+
#endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 2051d7030340e..a8ec058a12a93 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -944,10 +944,8 @@ void LoadMatrixOp::build(OpBuilder &builder, OperationState &state, Type res,
LayoutTrait layout) {
llvm::SmallVector<Value> dynamicOffsets;
llvm::SmallVector<int64_t> staticOffsets;
-
dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets);
-
build(builder, state, res, matrixDesc, dynamicOffsets, staticOffsetsAttr,
layout);
}
@@ -970,10 +968,8 @@ void StoreMatrixOp::build(OpBuilder &builder, OperationState &state,
LayoutTrait layout) {
llvm::SmallVector<Value> dynamicOffsets;
llvm::SmallVector<int64_t> staticOffsets;
-
dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets);
-
build(builder, state, matrixDesc, dynamicOffsets, staticOffsetsAttr, data,
layout);
}
@@ -988,6 +984,34 @@ LogicalResult StoreMatrixOp::verify() {
return success();
}
+//===----------------------------------------------------------------------===//
+// XeGPU_MatrixDescSubviewOp
+//===----------------------------------------------------------------------===//
+
+void MatrixDescSubviewOp::build(OpBuilder &builder, OperationState &state,
+ Type resTy, Value src,
+ llvm::ArrayRef<OpFoldResult> offsets,
+ LayoutTrait layout) {
+ llvm::SmallVector<Value> dynamicOffsets;
+ llvm::SmallVector<int64_t> staticOffsets;
+ dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
+ auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets);
+ build(builder, state, resTy, src, dynamicOffsets, staticOffsetsAttr, layout);
+}
+
+LogicalResult MatrixDescSubviewOp::verify() {
+ ArrayRef<int64_t> srcShape = getSrc().getType().getShape();
+ ArrayRef<int64_t> resShape = getRes().getType().getShape();
+ if (llvm::any_of(llvm::zip_equal(resShape, srcShape),
+ [](auto p) { return std::get<0>(p) > std::get<1>(p); }))
+ return emitOpError("result shape must not exceed source shape.");
+
+ if (getSrc().getType().getLayout() != getRes().getType().getLayout())
+ return emitOpError("result must inherit the source layout.");
+
+ return success();
+}
+
} // namespace xegpu
} // namespace mlir
diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir
index 2feb010d343a8..63945dab1ccc2 100644
--- a/mlir/test/Dialect/XeGPU/invalid.mlir
+++ b/mlir/test/Dialect/XeGPU/invalid.mlir
@@ -806,3 +806,23 @@ func.func @store_matrix_desc_invalid_data_size(%arg0: !xegpu.matrix_desc<16x64xf
return
}
+// -----
+func.func @matrix_desc_subview_size_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16>) {
+ // expected-error at +1 {{result shape must not exceed source shape}}
+ %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<32x16xf16>
+ return
+}
+
+// -----
+func.func @matrix_desc_subview_layout_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>) {
+ // expected-error at +1 {{result must inherit the source layout}}
+ %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> -> !xegpu.matrix_desc<8x16xf16>
+ return
+}
+
+// -----
+func.func @matrix_desc_subview_rank_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16>) {
+ // expected-error at +1 {{failed to verify that all of {src, res} have same element type}}
+ %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf32>
+ return
+}
diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir
index cda8f0ac1bb40..7bceda70dea9f 100644
--- a/mlir/test/Dialect/XeGPU/ops.mlir
+++ b/mlir/test/Dialect/XeGPU/ops.mlir
@@ -798,4 +798,18 @@ gpu.func @store_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, str
gpu.return
}
+// CHECK: gpu.func @matrix_desc_subview([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>)
+gpu.func @matrix_desc_subview(%arg0: !xegpu.matrix_desc<16x64xf16>) {
+ //CHECK: xegpu.matrix_desc_subview [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf16>
+ %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf16>
+ gpu.return
+}
+
+// CHECK: gpu.func @matrix_desc_subview_with_stride([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>)
+gpu.func @matrix_desc_subview_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>) {
+ //CHECK: xegpu.matrix_desc_subview [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> -> !xegpu.matrix_desc<8x16xf16, strided<[1, 16]>>
+ %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> -> !xegpu.matrix_desc<8x16xf16, strided<[1, 16]>>
+ gpu.return
+}
+
}
>From e11c88db66366d3c61b158959f5418230ce2abbb Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Wed, 13 Aug 2025 13:57:59 +0000
Subject: [PATCH 07/10] address comments
---
mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 6 ++++++
mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt | 1 +
mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 3 +++
3 files changed, 10 insertions(+)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 0c8980bb04b2e..6d06464e204a6 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -1153,6 +1153,9 @@ def XeGPU_LoadMatrixOp: XeGPU_Op<"load_matrix", [MemoryEffects<[MemRead]>,
Arguments:
- `matrix_desc`: the matrix descriptor identifying the SLM region.
- `offsets`: the coordinates within the matrix to read from.
+ - `layout`: [optional] An attribute for guiding distributions among
+ subgroups and/or work-items. It currently can accept either
+ LayoutAttr or SliceAttr.
Results:
- `res`: the matrix elements loaded from SLM.
}];
@@ -1192,6 +1195,9 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>,
- `matrix_desc`: the matrix descriptor specifying the SLM region.
- `offsets`: the coordinates within the matrix where the data will be written.
- `data`: the values to be stored in the matrix.
+ - `layout`: [optional] An attribute for guiding distributions among
+ subgroups and/or work-items. It currently can accept either
+ LayoutAttr or SliceAttr.
}];
let builders = [
OpBuilder<(ins "TypedValue<MatrixDescType>": $matrix_desc, "llvm::ArrayRef<OpFoldResult>": $offsets,
diff --git a/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt
index 603fb5d237544..7869a28dfed57 100644
--- a/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt
@@ -18,6 +18,7 @@ add_mlir_dialect_library(MLIRXeGPUDialect
MLIRArithUtils
MLIRDialectUtils
MLIRGPUDialect
+ MLIRXeVMDialect
MLIRIR
MLIRViewLikeInterface
MLIRVectorDialect
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index a8ec058a12a93..1157f21230485 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -8,6 +8,7 @@
#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/LLVMIR/XeVMDialect.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
@@ -28,6 +29,8 @@ bool isSharedMemory(const MemRefType &memrefTy) {
return intAttr.getInt() == 3;
if (auto memrefSpace = llvm::dyn_cast<MemorySpaceAttr>(attr))
return memrefSpace.getValue() == MemorySpace::SLM;
+ if (auto xevmSpace = llvm::dyn_cast<xevm::AddrSpaceAttr>(attr))
+ return xevmSpace.getValue() == xevm::AddrSpace::SHARED;
return gpu::GPUDialect::isWorkgroupMemoryAddressSpace(attr);
}
>From 23380a923cd2c2073a66fd31b70c3650869dcf3b Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Wed, 13 Aug 2025 14:30:21 +0000
Subject: [PATCH 08/10] update doc
---
mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 3 +++
1 file changed, 3 insertions(+)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 6d06464e204a6..112a18f0705ab 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -1220,6 +1220,9 @@ def XeGPU_MatrixDescSubviewOp: XeGPU_Op<"matrix_desc_subview", [Pure, ViewLikeOp
Results:
- `src` : a matrix descriptor.
- `offsets` : the coordinates within the matrix the subview will be created from.
+ - `layout`: [optional] An attribute for guiding distributions among
+ subgroups and/or work-items. It currently can accept either
+ LayoutAttr or SliceAttr.
}];
let arguments = (ins XeGPU_MatrixDesc:$src,
Variadic<Index>:$offsets,
>From 9e3aa8d6631fe177fd17bfdb9fd48da2ef1d5072 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Wed, 13 Aug 2025 21:25:18 +0000
Subject: [PATCH 09/10] remove the layout attribute from the subview op
---
mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 8 ++------
mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 5 ++---
2 files changed, 4 insertions(+), 9 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 112a18f0705ab..9ae2eb0c2e178 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -1220,19 +1220,15 @@ def XeGPU_MatrixDescSubviewOp: XeGPU_Op<"matrix_desc_subview", [Pure, ViewLikeOp
Results:
- `src` : a matrix descriptor.
- `offsets` : the coordinates within the matrix the subview will be created from.
- - `layout`: [optional] An attribute for guiding distributions among
- subgroups and/or work-items. It currently can accept either
- LayoutAttr or SliceAttr.
}];
let arguments = (ins XeGPU_MatrixDesc:$src,
Variadic<Index>:$offsets,
- DenseI64ArrayAttr:$const_offsets,
- OptionalAttr<LayoutTrait>: $layout);
+ DenseI64ArrayAttr:$const_offsets);
let results = (outs XeGPU_MatrixDesc:$res);
let assemblyFormat = [{$src `` custom<DynamicIndexList>($offsets, $const_offsets) prop-dict
attr-dict `` `:` qualified(type($src)) `->` qualified(type($res))}];
let builders = [
- OpBuilder<(ins "Type": $res, "Value":$src, "llvm::ArrayRef<OpFoldResult>": $offsets, "LayoutTrait": $layout)>
+ OpBuilder<(ins "Type": $res, "Value":$src, "llvm::ArrayRef<OpFoldResult>": $offsets)>
];
let extraClassDeclaration = [{
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 1157f21230485..27fd6797fed39 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -993,13 +993,12 @@ LogicalResult StoreMatrixOp::verify() {
void MatrixDescSubviewOp::build(OpBuilder &builder, OperationState &state,
Type resTy, Value src,
- llvm::ArrayRef<OpFoldResult> offsets,
- LayoutTrait layout) {
+ llvm::ArrayRef<OpFoldResult> offsets) {
llvm::SmallVector<Value> dynamicOffsets;
llvm::SmallVector<int64_t> staticOffsets;
dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets);
- build(builder, state, resTy, src, dynamicOffsets, staticOffsetsAttr, layout);
+ build(builder, state, resTy, src, dynamicOffsets, staticOffsetsAttr);
}
LogicalResult MatrixDescSubviewOp::verify() {
>From af2c25f457f4a94a0e304196040c0484718d54ca Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Thu, 14 Aug 2025 00:06:01 +0000
Subject: [PATCH 10/10] refine subview op
---
.../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 27 ++++++++++---------
mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 23 ++++++++++------
mlir/test/Dialect/XeGPU/invalid.mlir | 14 +++++++---
mlir/test/Dialect/XeGPU/ops.mlir | 15 ++++++++---
4 files changed, 52 insertions(+), 27 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 9ae2eb0c2e178..65f805d1efa93 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -1177,16 +1177,14 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>,
AllElementTypesMatch<["matrix_desc", "data"]>,
AllRanksMatch<["matrix_desc", "data"]>]> {
let arguments = (ins
+ XeGPU_ValueType:$data,
XeGPU_MatrixDesc:$matrix_desc,
Variadic<Index>: $offsets,
DenseI64ArrayAttr: $const_offsets,
- XeGPU_ValueType:$data,
OptionalAttr<LayoutTrait>:$layout
);
- let assemblyFormat = [{
- $matrix_desc `` custom<DynamicIndexList>($offsets, $const_offsets) `,` $data
- prop-dict attr-dict `:` type(operands)
- }];
+ let assemblyFormat = [{ $data `,` $matrix_desc `` custom<DynamicIndexList>($offsets, $const_offsets)
+ prop-dict attr-dict `` `:` type(operands)}];
let description = [{
This operation writes the `data` fragment into the shared local memory region
identified by `matrix_desc`.
@@ -1200,8 +1198,8 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>,
LayoutAttr or SliceAttr.
}];
let builders = [
- OpBuilder<(ins "TypedValue<MatrixDescType>": $matrix_desc, "llvm::ArrayRef<OpFoldResult>": $offsets,
- "Value" : $data, "LayoutTrait": $layout)>,
+ OpBuilder<(ins "Value" : $data, "TypedValue<MatrixDescType>": $matrix_desc,
+ "llvm::ArrayRef<OpFoldResult>": $offsets, "LayoutTrait": $layout)>,
];
let extraClassDeclaration = [{
SmallVector<OpFoldResult> getMixedOffsets() {
@@ -1212,14 +1210,19 @@ def XeGPU_StoreMatrixOp: XeGPU_Op<"store_matrix", [MemoryEffects<[MemWrite]>,
let hasVerifier = 1;
}
-def XeGPU_MatrixDescSubviewOp: XeGPU_Op<"matrix_desc_subview", [Pure, ViewLikeOpInterface,
- AllElementTypesMatch<["src", "res"]>,
- AllRanksMatch<["src", "res"]>]> {
+def XeGPU_MatrixDescSubviewOp: XeGPU_Op<"matrix_desc_subview",
+ [Pure, ViewLikeOpInterface, AllElementTypesMatch<["src", "res"]>]> {
let description = [{
- Create a subview of a matrix descriptor.
- Results:
+ Creates a subview of a matrix descriptor. The resulting matrix descriptor
+ may have a lower rank than the source, in which case the dimensions are left-aligned.
+
+ Arguments:
- `src` : a matrix descriptor.
- `offsets` : the coordinates within the matrix the subview will be created from.
+
+ Results:
+ - `res` : a matrix descriptor with smaller size.
+
}];
let arguments = (ins XeGPU_MatrixDesc:$src,
Variadic<Index>:$offsets,
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 27fd6797fed39..27a652663190d 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -965,15 +965,15 @@ LogicalResult LoadMatrixOp::verify() {
//===----------------------------------------------------------------------===//
// XeGPU_StoreMatrixOp
//===----------------------------------------------------------------------===//
-void StoreMatrixOp::build(OpBuilder &builder, OperationState &state,
+void StoreMatrixOp::build(OpBuilder &builder, OperationState &state, Value data,
TypedValue<MatrixDescType> matrixDesc,
- llvm::ArrayRef<OpFoldResult> offsets, Value data,
+ llvm::ArrayRef<OpFoldResult> offsets,
LayoutTrait layout) {
llvm::SmallVector<Value> dynamicOffsets;
llvm::SmallVector<int64_t> staticOffsets;
dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets);
- build(builder, state, matrixDesc, dynamicOffsets, staticOffsetsAttr, data,
+ build(builder, state, data, matrixDesc, dynamicOffsets, staticOffsetsAttr,
layout);
}
@@ -1002,13 +1002,20 @@ void MatrixDescSubviewOp::build(OpBuilder &builder, OperationState &state,
}
LogicalResult MatrixDescSubviewOp::verify() {
- ArrayRef<int64_t> srcShape = getSrc().getType().getShape();
- ArrayRef<int64_t> resShape = getRes().getType().getShape();
- if (llvm::any_of(llvm::zip_equal(resShape, srcShape),
- [](auto p) { return std::get<0>(p) > std::get<1>(p); }))
+ MatrixDescType srcTy = getSrc().getType();
+ MatrixDescType resTy = getRes().getType();
+ ArrayRef<int64_t> srcShape = srcTy.getShape();
+ ArrayRef<int64_t> resShape = resTy.getShape();
+
+ if (srcTy.getRank() < resTy.getRank())
+ return emitOpError("result rank must not exceed source rank.");
+
+ if (llvm::any_of(
+ llvm::zip_equal(resShape, srcShape.take_back(resShape.size())),
+ [](auto p) { return std::get<0>(p) > std::get<1>(p); }))
return emitOpError("result shape must not exceed source shape.");
- if (getSrc().getType().getLayout() != getRes().getType().getLayout())
+ if (srcTy.getLayout() != resTy.getLayout())
return emitOpError("result must inherit the source layout.");
return success();
diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir
index 63945dab1ccc2..f2df1a3920e23 100644
--- a/mlir/test/Dialect/XeGPU/invalid.mlir
+++ b/mlir/test/Dialect/XeGPU/invalid.mlir
@@ -795,14 +795,14 @@ func.func @load_matrix_desc_invalid_result_size(%arg0: !xegpu.matrix_desc<16x64x
// -----
func.func @store_matrix_desc_mismatch_element_type(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector<16x16xf32>) {
// expected-error at +1 {{failed to verify that all of {matrix_desc, data} have same element type}}
- xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16>, vector<16x16xf32>
+ xegpu.store_matrix %arg1, %arg0[8, 8] : vector<16x16xf32>, !xegpu.matrix_desc<16x64xf16>
return
}
// -----
func.func @store_matrix_desc_invalid_data_size(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector<32x32xf16>) {
// expected-error at +1 {{data shape must not exceed matrix desc shape}}
- xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16>, vector<32x32xf16>
+ xegpu.store_matrix %arg1, %arg0[8, 8] : vector<32x32xf16>, !xegpu.matrix_desc<16x64xf16>
return
}
@@ -821,8 +821,16 @@ func.func @matrix_desc_subview_layout_mismatch(%arg0: !xegpu.matrix_desc<16x64xf
}
// -----
-func.func @matrix_desc_subview_rank_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16>) {
+func.func @matrix_desc_subview_element_type_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16>) {
// expected-error at +1 {{failed to verify that all of {src, res} have same element type}}
%data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<8x16xf32>
return
}
+
+// -----
+func.func @matrix_desc_subview_rank_mismatch(%arg0: !xegpu.matrix_desc<16x64xf16>) {
+ // expected-error at +1 {{result rank must not exceed source rank}}
+ %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<4x8x16xf16>
+ return
+}
+
diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir
index 7bceda70dea9f..7a9657587070a 100644
--- a/mlir/test/Dialect/XeGPU/ops.mlir
+++ b/mlir/test/Dialect/XeGPU/ops.mlir
@@ -786,15 +786,15 @@ gpu.func @load_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, stri
// CHECK: gpu.func @store_matrix_desc([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>, [[ARG1:%.+]]: vector<16x16xf16>)
gpu.func @store_matrix_desc(%arg0: !xegpu.matrix_desc<16x64xf16>, %arg1: vector<16x16xf16>) {
- // CHECK: xegpu.store_matrix [[ARG0]][8, 8], [[ARG1]] : !xegpu.matrix_desc<16x64xf16>, vector<16x16xf16>
- xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16>, vector<16x16xf16>
+ // CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][8, 8] : vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16>
+ xegpu.store_matrix %arg1, %arg0[8, 8]: vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16>
gpu.return
}
// CHECK: gpu.func @store_matrix_desc_with_stride([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, [[ARG1:%.+]]: vector<16x16xf16>)
gpu.func @store_matrix_desc_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, %arg1: vector<16x16xf16>) {
- // CHECK: xegpu.store_matrix [[ARG0]][8, 8], [[ARG1]] : !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, vector<16x16xf16>
- xegpu.store_matrix %arg0[8, 8], %arg1: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>, vector<16x16xf16>
+ // CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][8, 8] : vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>
+ xegpu.store_matrix %arg1, %arg0[8, 8]: vector<16x16xf16>, !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>
gpu.return
}
@@ -805,6 +805,13 @@ gpu.func @matrix_desc_subview(%arg0: !xegpu.matrix_desc<16x64xf16>) {
gpu.return
}
+// CHECK: gpu.func @matrix_desc_subview_lower_rank([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16>)
+gpu.func @matrix_desc_subview_lower_rank(%arg0: !xegpu.matrix_desc<16x64xf16>) {
+ //CHECK: xegpu.matrix_desc_subview [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<16xf16>
+ %data = xegpu.matrix_desc_subview %arg0[8, 8]: !xegpu.matrix_desc<16x64xf16> -> !xegpu.matrix_desc<16xf16>
+ gpu.return
+}
+
// CHECK: gpu.func @matrix_desc_subview_with_stride([[ARG0:%.+]]: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>)
gpu.func @matrix_desc_subview_with_stride(%arg0: !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>>) {
//CHECK: xegpu.matrix_desc_subview [[ARG0]][8, 8] : !xegpu.matrix_desc<16x64xf16, strided<[1, 16]>> -> !xegpu.matrix_desc<8x16xf16, strided<[1, 16]>>
More information about the Mlir-commits
mailing list