[Mlir-commits] [mlir] [MLIR][XeGPU][TransformOps] Remove obsolete transform ops (PR #187561)
Tuomas Kärnä
llvmlistbot at llvm.org
Fri Mar 20 02:27:47 PDT 2026
https://github.com/tkarna updated https://github.com/llvm/llvm-project/pull/187561
>From 846a2edbcbc40b042fbdb8b08876440c3936d0f1 Mon Sep 17 00:00:00 2001
From: Tuomas Karna <tuomas.karna at intel.com>
Date: Thu, 19 Mar 2026 17:05:59 +0200
Subject: [PATCH 1/8] xegpu transform ops: add get_load_op
---
.../XeGPU/TransformOps/XeGPUTransformOps.td | 18 +++++
.../XeGPU/TransformOps/XeGPUTransformOps.cpp | 31 ++++++++
mlir/python/mlir/dialects/transform/xegpu.py | 29 +++++++
mlir/test/Dialect/XeGPU/transform-ops.mlir | 79 +++++++++++++++++++
.../python/dialects/transform_xegpu_ext.py | 19 ++++-
5 files changed, 174 insertions(+), 2 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
index f7f45508b6a03..ad636b8b638f9 100644
--- a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
@@ -34,6 +34,24 @@ def GetDescOp : Op<Transform_Dialect, "xegpu.get_desc_op", [
let assemblyFormat = "$target attr-dict `:` functional-type(operands, results)";
}
+def GetLoadOp : Op<Transform_Dialect, "xegpu.get_load_op", [
+ DeclareOpInterfaceMethods<TransformOpInterface>,
+ NavigationTransformOpTrait, MemoryEffectsOpInterface
+]> {
+
+ let summary = "Get a handle to the load_nd op in producer chain of a value.";
+ let description = [{
+ Traces the producers of the given value until an `xegpu.load_nd` or
+ `xegpu.load` op is found. Returns a handle to it. Currently traces
+ producers by following only the first operand of producer ops.
+ }];
+
+ let arguments = (ins TransformValueHandleTypeInterface:$target);
+
+ let results = (outs TransformHandleTypeInterface:$loadNdHandle);
+ let assemblyFormat = "$target attr-dict `:` functional-type(operands, results)";
+}
+
def SetDescLayoutOp : Op<Transform_Dialect, "xegpu.set_desc_layout", [
AttrSizedOperandSegments,
DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
index 39f9ae0bf1287..c520f0d4ae859 100644
--- a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
@@ -210,6 +210,37 @@ transform::GetDescOp::apply(transform::TransformRewriter &rewriter,
return DiagnosedSilenceableFailure::success();
}
+DiagnosedSilenceableFailure
+transform::GetLoadOp::apply(transform::TransformRewriter &rewriter,
+ transform::TransformResults &results,
+ transform::TransformState &state) {
+ auto targetValues = state.getPayloadValues(getTarget());
+ if (!llvm::hasSingleElement(targetValues)) {
+ return emitDefiniteFailure()
+ << "requires exactly one target value handle (got "
+ << llvm::range_size(targetValues) << ")";
+ }
+
+ Operation* loadOp = nullptr;
+ auto maybeLoadNdOp =
+ findProducerOfType<xegpu::LoadNdOp>(*targetValues.begin());
+ if (maybeLoadNdOp) {
+ loadOp = maybeLoadNdOp->getOperation();
+ } else {
+ auto maybeLoadOp = findProducerOfType<xegpu::LoadGatherOp>(*targetValues.begin());
+ if (maybeLoadOp) {
+ loadOp = maybeLoadOp->getOperation();
+ } else {
+ return emitSilenceableFailure(getLoc())
+ << "Could not find a matching xegpu.load_nd or xegpu.load op when walking the "
+ "producer chain of the first operand.";
+ }
+ }
+
+ results.set(llvm::cast<OpResult>(getResult()), {loadOp});
+ return DiagnosedSilenceableFailure::success();
+}
+
void transform::SetDescLayoutOp::build(OpBuilder &builder,
OperationState &result, Value target,
ArrayRef<OpFoldResult> mixedSgLayout,
diff --git a/mlir/python/mlir/dialects/transform/xegpu.py b/mlir/python/mlir/dialects/transform/xegpu.py
index 782c9a3f242a0..b0fd0d322f436 100644
--- a/mlir/python/mlir/dialects/transform/xegpu.py
+++ b/mlir/python/mlir/dialects/transform/xegpu.py
@@ -51,6 +51,35 @@ def get_desc_op(
return GetDescOp(target, loc=loc, ip=ip).result
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class GetLoadOp(GetLoadOp):
+ """Specialization for GetLoadOp class."""
+
+ def __init__(
+ self,
+ target: Value,
+ *,
+ loc=None,
+ ip=None,
+ ):
+ load_nd_type = transform.AnyOpType.get()
+ super().__init__(
+ load_nd_type,
+ target,
+ loc=loc,
+ ip=ip,
+ )
+
+
+def get_load_op(
+ target: Value,
+ *,
+ loc=None,
+ ip=None,
+) -> OpResult:
+ return GetLoadOp(target, loc=loc, ip=ip).result
+
+
@_ods_cext.register_operation(_Dialect, replace=True)
class SetDescLayoutOp(SetDescLayoutOp):
"""Specialization for SetDescLayoutOp class."""
diff --git a/mlir/test/Dialect/XeGPU/transform-ops.mlir b/mlir/test/Dialect/XeGPU/transform-ops.mlir
index 5bb1ab708e301..55faa369c5287 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops.mlir
@@ -62,6 +62,85 @@ module attributes {transform.with_named_sequence} {
// -----
+// CHECK-LABEL: @get_load_op
+func.func @get_load_op(%arg0: memref<4096x4096xf16>) {
+ %c0 = arith.constant 0 : index
+ %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
+ // CHECK: xegpu.load_nd
+ // expected-remark @below {{found load_nd op}}
+ %1 = xegpu.load_nd %0[%c0, %c0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
+ %2 = arith.extf %1 : vector<256x32xf16> to vector<256x32xf32>
+ return
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1 = transform.get_operand %0[0] : (!transform.any_op) -> !transform.any_value
+ %2 = transform.xegpu.get_load_op %1 : (!transform.any_value) -> !transform.any_op
+ transform.debug.emit_remark_at %2, "found load_nd op" : !transform.any_op
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK-LABEL: @get_load_op_c
+func.func @get_load_op_c(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
+ %c32 = arith.constant 32 : index
+ %c4096 = arith.constant 4096 : index
+ %c0 = arith.constant 0 : index
+ %0 = xegpu.create_nd_tdesc %arg2 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x256xf16>
+ // expected-remark @below {{found load_nd op}}
+ %1 = xegpu.load_nd %0[%c0, %c0] : !xegpu.tensor_desc<256x256xf16> -> vector<256x256xf16>
+ %3 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
+ %4 = xegpu.create_nd_tdesc %arg1 : memref<4096x4096xf16> -> !xegpu.tensor_desc<32x256xf16>
+ %2 = scf.for %arg3 = %c0 to %c4096 step %c32 iter_args(%arg4 = %1) -> (vector<256x256xf16>) {
+ %5 = xegpu.load_nd %3[%c0, %arg3] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
+ %6 = xegpu.load_nd %4[%arg3, %c0] : !xegpu.tensor_desc<32x256xf16> -> vector<32x256xf16>
+ %7 = xegpu.dpas %5, %6, %arg4 : vector<256x32xf16>, vector<32x256xf16>, vector<256x256xf16> -> vector<256x256xf16>
+ scf.yield %7 : vector<256x256xf16>
+ }
+ return
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["xegpu.dpas"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1 = transform.get_operand %0[2] : (!transform.any_op) -> !transform.any_value
+ %2 = transform.xegpu.get_load_op %1 : (!transform.any_value) -> !transform.any_op
+ transform.debug.emit_remark_at %2, "found load_nd op" : !transform.any_op
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK-LABEL: @get_load_op_1d
+func.func @get_load_op_1d(%arg0: memref<4096xf32>) {
+ %cst = arith.constant dense<true> : vector<256xi1>
+ %0 = vector.step : vector<256xindex>
+ %intptr = memref.extract_aligned_pointer_as_index %arg0 : memref<4096xf32> -> index
+ %1 = arith.index_cast %intptr : index to i64
+ // CHECK: xegpu.load %1[%0]
+ // expected-remark @below {{found load op}}
+ %2 = xegpu.load %1[%0], %cst : i64, vector<256xindex>, vector<256xi1> -> vector<256xf32>
+ %3 = arith.extf %2 : vector<256xf32> to vector<256xf64>
+ return
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1 = transform.get_operand %0[0] : (!transform.any_op) -> !transform.any_value
+ %2 = transform.xegpu.get_load_op %1 : (!transform.any_value) -> !transform.any_op
+ transform.debug.emit_remark_at %2, "found load op" : !transform.any_op
+ transform.yield
+ }
+}
+
+// -----
+
// CHECK-LABEL: @set_desc_layout
func.func @set_desc_layout(%arg0: memref<4096x4096xf16>) {
// CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0
diff --git a/mlir/test/python/dialects/transform_xegpu_ext.py b/mlir/test/python/dialects/transform_xegpu_ext.py
index 346e68eca9201..b4c96e66e8326 100644
--- a/mlir/test/python/dialects/transform_xegpu_ext.py
+++ b/mlir/test/python/dialects/transform_xegpu_ext.py
@@ -17,7 +17,7 @@ def run(f):
@run
-def getDescOpDefaultIndex():
+def getDescOp():
sequence = transform.SequenceOp(
transform.FailurePropagationMode.Propagate,
[],
@@ -27,10 +27,25 @@ def getDescOpDefaultIndex():
operand = transform.GetOperandOp(AnyValueType.get(), sequence.bodyTarget, [0])
desc_handle = xegpu.get_desc_op(operand)
transform.YieldOp()
- # CHECK-LABEL: TEST: getDescOpDefaultIndex
+ # CHECK-LABEL: TEST: getDescOp
# CHECK: transform.xegpu.get_desc_op %
+ at run
+def getLoadOp():
+ sequence = transform.SequenceOp(
+ transform.FailurePropagationMode.Propagate,
+ [],
+ transform.OperationType.get("xegpu.dpas"),
+ )
+ with InsertionPoint(sequence.body):
+ operand = transform.GetOperandOp(AnyValueType.get(), sequence.bodyTarget, [0])
+ load_handle = xegpu.get_load_op(operand)
+ transform.YieldOp()
+ # CHECK-LABEL: TEST: getLoadOp
+ # CHECK: transform.xegpu.get_load_op %
+
+
@run
def setDescLayoutMinimal():
sequence = transform.SequenceOp(
>From 71a451e8cd4c65af6146cf3ae1df8dfea8a70263 Mon Sep 17 00:00:00 2001
From: Tuomas Karna <tuomas.karna at intel.com>
Date: Thu, 19 Mar 2026 18:22:10 +0200
Subject: [PATCH 2/8] rename set_op_layout_attr -> set_anchor_layout
---
.../XeGPU/TransformOps/XeGPUTransformOps.td | 31 ++-
.../XeGPU/TransformOps/XeGPUTransformOps.cpp | 38 +---
mlir/python/mlir/dialects/transform/xegpu.py | 18 +-
.../Dialect/XeGPU/transform-ops-invalid.mlir | 55 +-----
mlir/test/Dialect/XeGPU/transform-ops.mlir | 180 +++++-------------
.../python/dialects/transform_xegpu_ext.py | 86 +++------
6 files changed, 105 insertions(+), 303 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
index ad636b8b638f9..c7c3e14ceb066 100644
--- a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
@@ -122,22 +122,21 @@ def SetDescLayoutOp : Op<Transform_Dialect, "xegpu.set_desc_layout", [
}];
}
-def SetOpLayoutAttrOp : Op<Transform_Dialect, "xegpu.set_op_layout_attr", [
+def SetAnchorLayoutOp : Op<Transform_Dialect, "xegpu.set_anchor_layout", [
AttrSizedOperandSegments,
DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
TransformOpInterface
]> {
- let summary = "Set xegpu.layout attribute of an op.";
+ let summary = "Set anchor layout of an op.";
let description = [{
- Sets the `xegpu.layout` attribute of an op. By default it sets the anchor
- layout for XeGPU ops that support it. If `result=true` or `operand=true`,
- it sets the `layout_result_{index}` or `layout_operand_{index}` attribute,
- respectively, applicable to any op. The target operand/result value is
- defined by the `index` argument. The layout is defined by the `sg_layout`,
- `sg_data` and optional `inst_data` attributes. If `slice_dims` is provided,
- the `xegpu.layout` attribute is wrapped in an `xegpu.slice<..., dims=slice_dims>`
- attribute.
+ Sets the `xegpu.layout` anchor layout for XeGPU ops that support it. The
+ target operand value can be set by the `index` argument (currently only
+ applicable to a DPAS op). The layout is defined by the `sg_layout`,
+ `sg_data` and optional `inst_data` and `order` attributes. If `slice_dims`
+ is provided, the `xegpu.layout` attribute is wrapped in an
+ `xegpu.slice<..., dims=slice_dims>` attribute. Emits a silenceable failure
+ if the target op does not support anchor layouts.
}];
let arguments = (ins TransformHandleTypeInterface:$target,
@@ -149,9 +148,7 @@ def SetOpLayoutAttrOp : Op<Transform_Dialect, "xegpu.set_op_layout_attr", [
DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_sg_data,
DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_inst_data,
DefaultValuedOptionalAttr<DenseI32ArrayAttr, "{}">:$order,
- DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$slice_dims,
- DefaultValuedAttr<UnitAttr, "false">:$result,
- DefaultValuedAttr<UnitAttr, "false">:$operand
+ DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$slice_dims
);
let results = (outs);
@@ -162,14 +159,12 @@ def SetOpLayoutAttrOp : Op<Transform_Dialect, "xegpu.set_op_layout_attr", [
"ArrayRef<OpFoldResult>":$mixedSgData,
"ArrayRef<OpFoldResult>":$mixedInstData,
"ArrayRef<int32_t>":$order,
- "ArrayRef<int64_t>":$sliceDims,
- CArg<"bool", "false">:$result,
- CArg<"bool", "false">:$operand
+ "ArrayRef<int64_t>":$sliceDims
)>,
];
let assemblyFormat = [{
- $target (`result` $result^)? (`operand` $operand^)? (`index` `=` $index^)?
+ $target (`index` `=` $index^)?
`sg_layout` `=` custom<DynamicIndexList>($sg_layout, $static_sg_layout)
`sg_data` `=` custom<DynamicIndexList>($sg_data, $static_sg_data)
(`inst_data` `=` custom<DynamicIndexList>($inst_data, $static_inst_data)^)?
@@ -197,8 +192,6 @@ def SetOpLayoutAttrOp : Op<Transform_Dialect, "xegpu.set_op_layout_attr", [
return getMixedValues(getStaticInstData(), getInstData(), b);
}
}];
-
- let hasVerifier = 1;
}
def SetGPULaunchThreadsOp
diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
index c520f0d4ae859..6802087610b41 100644
--- a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
@@ -320,11 +320,11 @@ void transform::SetDescLayoutOp::getEffects(
modifiesPayload(effects);
}
-void transform::SetOpLayoutAttrOp::build(
+void transform::SetAnchorLayoutOp::build(
OpBuilder &builder, OperationState &ostate, Value target, int64_t index,
ArrayRef<OpFoldResult> mixedSgLayout, ArrayRef<OpFoldResult> mixedSgData,
ArrayRef<OpFoldResult> mixedInstData, ArrayRef<int32_t> order,
- ArrayRef<int64_t> sliceDims, bool result, bool operand) {
+ ArrayRef<int64_t> sliceDims) {
SmallVector<int64_t> staticSgLayout, staticSgData, staticInstData;
SmallVector<Value> dynamicSgLayout, dynamicSgData, dynamicInstData;
dispatchIndexOpFoldResults(mixedSgLayout, dynamicSgLayout, staticSgLayout);
@@ -340,13 +340,11 @@ void transform::SetOpLayoutAttrOp::build(
/*static_sg_data=*/staticSgData,
/*static_inst_data=*/staticInstData,
/*order=*/order,
- /*slice_dims=*/sliceDims,
- /*result=*/result,
- /*operand=*/operand);
+ /*slice_dims=*/sliceDims);
}
DiagnosedSilenceableFailure
-transform::SetOpLayoutAttrOp::apply(transform::TransformRewriter &rewriter,
+transform::SetAnchorLayoutOp::apply(transform::TransformRewriter &rewriter,
transform::TransformResults &results,
transform::TransformState &state) {
auto targetOps = state.getPayloadOps(getTarget());
@@ -356,18 +354,7 @@ transform::SetOpLayoutAttrOp::apply(transform::TransformRewriter &rewriter,
}
Operation *target = *targetOps.begin();
- bool resultTarget = getResult();
- bool operandTarget = getOperand();
-
int64_t index = getIndex();
- if (resultTarget && index >= target->getNumResults()) {
- return emitSilenceableFailure(getLoc())
- << "Index exceeds the number of op results";
- }
- if (operandTarget && index >= target->getNumOperands()) {
- return emitSilenceableFailure(getLoc())
- << "Index exceeds the number of op operands";
- }
xegpu::LayoutAttr layoutAttr = nullptr;
auto status = getLayoutAttrFromOperands(
@@ -385,13 +372,7 @@ transform::SetOpLayoutAttrOp::apply(transform::TransformRewriter &rewriter,
}
// Set layout attribute
- if (resultTarget) {
- // op result
- xegpu::setDistributeLayoutAttr(target->getResult(index), layout);
- } else if (operandTarget) {
- // op operand
- xegpu::setDistributeLayoutAttr(target->getOpOperand(index), layout);
- } else if (auto dpasOp = dyn_cast<xegpu::DpasOp>(target)) {
+ if (auto dpasOp = dyn_cast<xegpu::DpasOp>(target)) {
// dpas op is a special case where layout needs to be set for A, B, and C
if (index == 0)
dpasOp.getProperties().layout_a = layout;
@@ -419,7 +400,7 @@ transform::SetOpLayoutAttrOp::apply(transform::TransformRewriter &rewriter,
return DiagnosedSilenceableFailure::success();
}
-void transform::SetOpLayoutAttrOp::getEffects(
+void transform::SetAnchorLayoutOp::getEffects(
::llvm::SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
onlyReadsHandle(getTargetMutable(), effects);
onlyReadsHandle(getSgLayoutMutable(), effects);
@@ -428,13 +409,6 @@ void transform::SetOpLayoutAttrOp::getEffects(
modifiesPayload(effects);
}
-LogicalResult transform::SetOpLayoutAttrOp::verify() {
- if (getResult() && getOperand()) {
- return emitOpError("Cannot set both result and operand simultaneously.");
- }
- return success();
-}
-
void transform::SetGPULaunchThreadsOp::build(
OpBuilder &builder, OperationState &ostate, Value target,
ArrayRef<OpFoldResult> mixedThreads) {
diff --git a/mlir/python/mlir/dialects/transform/xegpu.py b/mlir/python/mlir/dialects/transform/xegpu.py
index b0fd0d322f436..ea8a4fc052721 100644
--- a/mlir/python/mlir/dialects/transform/xegpu.py
+++ b/mlir/python/mlir/dialects/transform/xegpu.py
@@ -154,8 +154,8 @@ def set_desc_layout(
@_ods_cext.register_operation(_Dialect, replace=True)
-class SetOpLayoutAttrOp(SetOpLayoutAttrOp):
- """Specialization for SetOpLayoutAttrOp class."""
+class SetAnchorLayoutOp(SetAnchorLayoutOp):
+ """Specialization for SetAnchorLayoutOp class."""
def __init__(
self,
@@ -167,8 +167,6 @@ def __init__(
order: Optional[MixedInt] = None,
slice_dims: Optional[MixedInt] = None,
index: Optional[Union[int, Attribute]] = None,
- result: Optional[Union[bool, Attribute]] = None,
- operand: Optional[Union[bool, Attribute]] = None,
loc=None,
ip=None,
):
@@ -199,14 +197,12 @@ def __init__(
order=order,
slice_dims=slice_dims,
index=index,
- result=result,
- operand=operand,
loc=loc,
ip=ip,
)
-def set_op_layout_attr(
+def set_anchor_layout(
target: Union[Operation, Value],
sg_layout: MixedValues,
sg_data: MixedValues,
@@ -215,12 +211,10 @@ def set_op_layout_attr(
order: Optional[MixedInt] = None,
slice_dims: Optional[MixedInt] = None,
index: Optional[Union[int, Attribute]] = None,
- result: Optional[Union[bool, Attribute]] = None,
- operand: Optional[Union[bool, Attribute]] = None,
loc=None,
ip=None,
-) -> SetOpLayoutAttrOp:
- return SetOpLayoutAttrOp(
+) -> SetAnchorLayoutOp:
+ return SetAnchorLayoutOp(
target,
sg_layout,
sg_data,
@@ -228,8 +222,6 @@ def set_op_layout_attr(
order=order,
slice_dims=slice_dims,
index=index,
- result=result,
- operand=operand,
loc=loc,
ip=ip,
)
diff --git a/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
index 2a147497a893b..4c1ad80131006 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
@@ -16,66 +16,27 @@ module attributes {transform.with_named_sequence} {
// -----
-// CHECK-LABEL: @set_op_layout_attr_bad_result_index
-func.func @set_op_layout_attr_bad_result_index(%arg0: memref<4096x4096xf16>) {
+// CHECK-LABEL: @set_anchor_layout_multiple
+func.func @set_anchor_layout_multiple(%arg0: memref<4096x4096xf16>) {
%0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
%1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
- %2 = arith.extf %1 : vector<256x32xf16> to vector<256x32xf32>
+ %2 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
return
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // expected-error at below {{Index exceeds the number of op results}}
- transform.xegpu.set_op_layout_attr %0 result index = 1 sg_layout = [8, 4] sg_data = [32, 64] : !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-LABEL: @set_op_layout_attr_bad_operand_index
-func.func @set_op_layout_attr_bad_operand_index(%arg0: memref<4096x4096xf16>) {
- %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
- %1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
- %2 = arith.extf %1 : vector<256x32xf16> to vector<256x32xf32>
- return
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // expected-error at below {{Index exceeds the number of op operands}}
- transform.xegpu.set_op_layout_attr %0 operand index = 1 sg_layout = [8, 4] sg_data = [32, 64] : !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-LABEL: @set_op_layout_attr_multiple
-func.func @set_op_layout_attr_multiple(%arg0: memref<4096x4096xf16>) {
- %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
- %1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
- %2 = arith.extf %1 : vector<256x32xf16> to vector<256x32xf32>
- %3 = arith.extf %2 : vector<256x32xf32> to vector<256x32xf64>
- return
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %0 = transform.structured.match ops{["xegpu.load_nd"]} in %arg1 : (!transform.any_op) -> !transform.any_op
// expected-error at below {{Requires exactly one targetOp handle (got 2)}}
- transform.xegpu.set_op_layout_attr %0 operand sg_layout = [8, 4] sg_data = [32, 64] : !transform.any_op
+ transform.xegpu.set_anchor_layout %0 sg_layout = [8, 4] sg_data = [32, 64] : !transform.any_op
transform.yield
}
}
// -----
-// CHECK-LABEL: @set_op_layout_attr_not_anchor_op
-func.func @set_op_layout_attr_not_anchor_op(%arg0: memref<4096x4096xf16>) {
+// CHECK-LABEL: @set_anchor_layout_not_anchor_op
+func.func @set_anchor_layout_not_anchor_op(%arg0: memref<4096x4096xf16>) {
%0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
%1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
%2 = arith.extf %1 : vector<256x32xf16> to vector<256x32xf32> // expected-note {{target op}}
@@ -86,7 +47,7 @@ module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
// expected-error at below {{Cannot set anchor layout to op: arith.extf}}
- transform.xegpu.set_op_layout_attr %0 sg_layout = [8, 4] sg_data = [32, 64] : !transform.any_op
+ transform.xegpu.set_anchor_layout %0 sg_layout = [8, 4] sg_data = [32, 64] : !transform.any_op
transform.yield
}
}
diff --git a/mlir/test/Dialect/XeGPU/transform-ops.mlir b/mlir/test/Dialect/XeGPU/transform-ops.mlir
index 55faa369c5287..2a2f778b26966 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops.mlir
@@ -239,176 +239,94 @@ module attributes {transform.with_named_sequence} {
// -----
-// CHECK-LABEL: @set_op_layout_attr_result_default
-func.func @set_op_layout_attr_result_default(%arg0: memref<4096x4096xf16>) {
+// CHECK-LABEL: @set_anchor_layout
+func.func @set_anchor_layout(%arg0: memref<4096x4096xf16>) {
%0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
+ // CHECK: = xegpu.load_nd %0[0, 0]
+ // CHECK-SAME: <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}>
%1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
- // CHECK: = arith.extf %1
- // CHECK-SAME: {layout_result_0 = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}
- %2 = arith.extf %1 : vector<256x32xf16> to vector<256x32xf32>
return
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
- transform.xegpu.set_op_layout_attr %0 result sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] : !transform.any_op
+ %0 = transform.structured.match ops{["xegpu.load_nd"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ // CHECK: transform.xegpu.set_anchor_layout %{{.*}}
+ transform.xegpu.set_anchor_layout %0 index = 0 sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] : !transform.any_op
transform.yield
}
}
// -----
-// CHECK-LABEL: @set_op_layout_attr_result_sg_param
-func.func @set_op_layout_attr_result_sg_param(%arg0: memref<4096x4096xf16>) {
+// CHECK-LABEL: @set_anchor_layout_param
+func.func @set_anchor_layout_param(%arg0: memref<4096x4096xf16>) {
%0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
+ // CHECK: = xegpu.load_nd %0[0, 0]
+ // CHECK-SAME: <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}>
%1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
- // CHECK: = arith.extf %1
- // CHECK-SAME: {layout_result_0 = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}
- %2 = arith.extf %1 : vector<256x32xf16> to vector<256x32xf32>
return
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
+ %0 = transform.structured.match ops{["xegpu.load_nd"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ // CHECK: transform.xegpu.set_anchor_layout %{{.*}}
%layout0 = transform.param.constant 8 : i64 -> !transform.param<i64>
- transform.xegpu.set_op_layout_attr %0 result sg_layout = [%layout0, 4] sg_data = [32, 64] inst_data = [8, 16] : !transform.any_op, !transform.param<i64>
+ transform.xegpu.set_anchor_layout %0 index = 0 sg_layout = [%layout0, 4] sg_data = [32, 64] inst_data = [8, 16] : !transform.any_op, !transform.param<i64>
transform.yield
}
}
// -----
-// CHECK-LABEL: @set_op_layout_attr_result_sg_param2
-func.func @set_op_layout_attr_result_sg_param2(%arg0: memref<4096x4096xf16>) {
+// CHECK-LABEL: @set_anchor_layout_param2
+func.func @set_anchor_layout_param2(%arg0: memref<4096x4096xf16>) {
%0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
+ // CHECK: = xegpu.load_nd %0[0, 0]
+ // CHECK-SAME: <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}>
%1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
- // CHECK: = arith.extf %1
- // CHECK-SAME: {layout_result_0 = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}
- %2 = arith.extf %1 : vector<256x32xf16> to vector<256x32xf32>
return
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
+ %0 = transform.structured.match ops{["xegpu.load_nd"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ // CHECK: transform.xegpu.set_anchor_layout %{{.*}}
%layout0 = transform.param.constant 8 : i64 -> !transform.param<i64>
%layout1 = transform.param.constant 4 : i64 -> !transform.param<i64>
- transform.xegpu.set_op_layout_attr %0 result sg_layout = [%layout0, %layout1] sg_data = [32, 64] inst_data = [8, 16] : !transform.any_op, !transform.param<i64>, !transform.param<i64>
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-LABEL: @set_op_layout_attr_result_slice
-func.func @set_op_layout_attr_result_slice(%arg0: vector<256xf16>) {
- // CHECK: = arith.extf
- // CHECK-SAME: {layout_result_0 = #xegpu.slice<#xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>, dims = [0]>}
- %2 = arith.extf %arg0 : vector<256xf16> to vector<256xf32>
- return
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
- transform.xegpu.set_op_layout_attr %0 result index = 0 sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] slice_dims = [0] : !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-LABEL: @set_op_layout_attr_result_order
-func.func @set_op_layout_attr_result_order(%arg0: vector<256xf16>) {
- // CHECK: = arith.extf
- // CHECK-SAME: {layout_result_0 = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16], order = [0, 1]>}
- %2 = arith.extf %arg0 : vector<256xf16> to vector<256xf32>
- return
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
- transform.xegpu.set_op_layout_attr %0 result index = 0 sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] order = [0, 1] : !transform.any_op
+ transform.xegpu.set_anchor_layout %0 index = 0 sg_layout = [%layout0, %layout1] sg_data = [32, 64] inst_data = [8, 16] : !transform.any_op, !transform.param<i64>, !transform.param<i64>
transform.yield
}
}
// -----
-// CHECK-LABEL: @set_op_layout_attr_operand_minimal
-func.func @set_op_layout_attr_operand_minimal(%arg0: memref<4096x4096xf16>) {
- %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
- %1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
- // CHECK: = arith.extf %1
- // CHECK-SAME: {layout_operand_0 = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64]>}
- %2 = arith.extf %1 : vector<256x32xf16> to vector<256x32xf32>
- return
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
- transform.xegpu.set_op_layout_attr %0 operand sg_layout = [8, 4] sg_data = [32, 64] : !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-LABEL: @set_op_layout_attr_operand1
-func.func @set_op_layout_attr_operand1(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>) {
- %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
- %1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
- %2 = xegpu.create_nd_tdesc %arg1 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
- %3 = xegpu.load_nd %2[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
- // CHECK: = arith.addf %1, %3
- // CHECK-SAME: {layout_operand_1 = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}
- %6 = arith.addf %1, %3 : vector<256x32xf16>
- return
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["arith.addf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
- transform.xegpu.set_op_layout_attr %0 operand index = 1 sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] : !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-LABEL: @set_op_layout_attr_anchor
-func.func @set_op_layout_attr_anchor(%arg0: memref<4096x4096xf16>) {
- %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
- // CHECK: = xegpu.load_nd %0[0, 0]
- // CHECK-SAME: <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}>
- %1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
+// CHECK-LABEL: @set_anchor_layout_slice
+func.func @set_anchor_layout_slice(%arg0: memref<4096xf32>) {
+ // CHECK: = xegpu.load %1[%0]
+ // CHECK-SAME: <{layout = #xegpu.slice<#xegpu.layout<sg_layout = [8, 8], sg_data = [32, 32], inst_data = [8, 16]>, dims = [0]>}>
+ %cst = arith.constant dense<true> : vector<256xi1>
+ %0 = vector.step : vector<256xindex>
+ %intptr = memref.extract_aligned_pointer_as_index %arg0 : memref<4096xf32> -> index
+ %1 = arith.index_cast %intptr : index to i64
+ %2 = xegpu.load %1[%0], %cst : i64, vector<256xindex>, vector<256xi1> -> vector<256xf32>
return
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["xegpu.load_nd"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
- transform.xegpu.set_op_layout_attr %0 index = 0 sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] : !transform.any_op
+ %0 = transform.structured.match ops{["xegpu.load"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ // CHECK: transform.xegpu.set_anchor_layout %{{.*}}
+ transform.xegpu.set_anchor_layout %0 sg_layout = [8, 8] sg_data = [32, 32] inst_data = [8, 16] slice_dims = [0] : !transform.any_op
transform.yield
}
}
// -----
-// CHECK-LABEL: @set_op_layout_attr_anchor_order
-func.func @set_op_layout_attr_anchor_order(%arg0: memref<4096x4096xf16>) {
+// CHECK-LABEL: @set_anchor_layout_order
+func.func @set_anchor_layout_order(%arg0: memref<4096x4096xf16>) {
%0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
// CHECK: = xegpu.load_nd %0[0, 0]
// CHECK-SAME: <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16], order = [1, 0]>}>
@@ -419,8 +337,8 @@ func.func @set_op_layout_attr_anchor_order(%arg0: memref<4096x4096xf16>) {
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["xegpu.load_nd"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
- transform.xegpu.set_op_layout_attr %0 index = 0 sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] order = [1, 0] : !transform.any_op
+ // CHECK: transform.xegpu.set_anchor_layout %{{.*}}
+ transform.xegpu.set_anchor_layout %0 index = 0 sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] order = [1, 0] : !transform.any_op
transform.yield
}
}
@@ -428,8 +346,8 @@ module attributes {transform.with_named_sequence} {
// -----
-// CHECK-LABEL: @set_op_layout_attr_anchor_dpas_a
-func.func @set_op_layout_attr_anchor_dpas_a(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
+// CHECK-LABEL: @set_anchor_layout_dpas_a
+func.func @set_anchor_layout_dpas_a(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
%0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
%1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
%2 = xegpu.create_nd_tdesc %arg1 : memref<4096x4096xf16> -> !xegpu.tensor_desc<32x256xf16>
@@ -445,16 +363,16 @@ func.func @set_op_layout_attr_anchor_dpas_a(%arg0: memref<4096x4096xf16>, %arg1:
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["xegpu.dpas"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
- transform.xegpu.set_op_layout_attr %0 index = 0 sg_layout = [8, 8] sg_data = [32, 32] inst_data = [8, 16] : !transform.any_op
+ // CHECK: transform.xegpu.set_anchor_layout %{{.*}}
+ transform.xegpu.set_anchor_layout %0 index = 0 sg_layout = [8, 8] sg_data = [32, 32] inst_data = [8, 16] : !transform.any_op
transform.yield
}
}
// -----
-// CHECK-LABEL: @set_op_layout_attr_anchor_dpas_b
-func.func @set_op_layout_attr_anchor_dpas_b(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
+// CHECK-LABEL: @set_anchor_layout_dpas_b
+func.func @set_anchor_layout_dpas_b(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
%0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
%1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
%2 = xegpu.create_nd_tdesc %arg1 : memref<4096x4096xf16> -> !xegpu.tensor_desc<32x256xf16>
@@ -470,16 +388,16 @@ func.func @set_op_layout_attr_anchor_dpas_b(%arg0: memref<4096x4096xf16>, %arg1:
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["xegpu.dpas"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
- transform.xegpu.set_op_layout_attr %0 index = 1 sg_layout = [8, 8] sg_data = [32, 32] inst_data = [16, 16] : !transform.any_op
+ // CHECK: transform.xegpu.set_anchor_layout %{{.*}}
+ transform.xegpu.set_anchor_layout %0 index = 1 sg_layout = [8, 8] sg_data = [32, 32] inst_data = [16, 16] : !transform.any_op
transform.yield
}
}
// -----
-// CHECK-LABEL: @set_op_layout_attr_anchor_dpas_c
-func.func @set_op_layout_attr_anchor_dpas_c(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
+// CHECK-LABEL: @set_anchor_layout_dpas_c
+func.func @set_anchor_layout_dpas_c(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
%0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
%1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
%2 = xegpu.create_nd_tdesc %arg1 : memref<4096x4096xf16> -> !xegpu.tensor_desc<32x256xf16>
@@ -495,8 +413,8 @@ func.func @set_op_layout_attr_anchor_dpas_c(%arg0: memref<4096x4096xf16>, %arg1:
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["xegpu.dpas"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
- transform.xegpu.set_op_layout_attr %0 index = 2 sg_layout = [8, 8] sg_data = [32, 32] inst_data = [8, 16] : !transform.any_op
+ // CHECK: transform.xegpu.set_anchor_layout %{{.*}}
+ transform.xegpu.set_anchor_layout %0 index = 2 sg_layout = [8, 8] sg_data = [32, 32] inst_data = [8, 16] : !transform.any_op
transform.yield
}
}
diff --git a/mlir/test/python/dialects/transform_xegpu_ext.py b/mlir/test/python/dialects/transform_xegpu_ext.py
index b4c96e66e8326..efbe212609f97 100644
--- a/mlir/test/python/dialects/transform_xegpu_ext.py
+++ b/mlir/test/python/dialects/transform_xegpu_ext.py
@@ -120,50 +120,22 @@ def setDescLayoutOrder():
@run
-def setOpLayoutAttrOperandMinimal():
+def setAnchorLayout():
sequence = transform.SequenceOp(
transform.FailurePropagationMode.Propagate,
[],
- transform.OperationType.get("xegpu.dpas"),
- )
- with InsertionPoint(sequence.body):
- xegpu.set_op_layout_attr(
- sequence.bodyTarget,
- sg_layout=[6, 4],
- sg_data=[32, 16],
- operand=True,
- )
- transform.YieldOp()
- # CHECK-LABEL: TEST: setOpLayoutAttr
- # CHECK: transform.xegpu.set_op_layout_attr %
- # CHECK: operand
- # CHECK-NOT: index = 0
- # CHECK-NOT: result
- # CHECK: sg_layout = [6, 4]
- # CHECK: sg_data = [32, 16]
- # CHECK-NOT: inst_data
-
-
- at run
-def setOpLayoutAttrResult():
- sequence = transform.SequenceOp(
- transform.FailurePropagationMode.Propagate,
- [],
- transform.OperationType.get("xegpu.dpas"),
+ transform.OperationType.get("xegpu.load_nd"),
)
with InsertionPoint(sequence.body):
- xegpu.set_op_layout_attr(
+ xegpu.set_anchor_layout(
sequence.bodyTarget,
- index=0,
sg_layout=[6, 4],
sg_data=[32, 16],
inst_data=[8, 16],
- result=True,
)
transform.YieldOp()
- # CHECK-LABEL: TEST: setOpLayoutAttrResult
- # CHECK: transform.xegpu.set_op_layout_attr %
- # CHECK: result
+ # CHECK-LABEL: TEST: setAnchorLayout
+ # CHECK: transform.xegpu.set_anchor_layout %
# CHECK-NOT: index = 0
# CHECK: sg_layout = [6, 4]
# CHECK: sg_data = [32, 16]
@@ -171,85 +143,77 @@ def setOpLayoutAttrResult():
@run
-def setOpLayoutAttrResultSlice():
+def setAnchorLayoutDPAS():
sequence = transform.SequenceOp(
transform.FailurePropagationMode.Propagate,
[],
transform.OperationType.get("xegpu.dpas"),
)
with InsertionPoint(sequence.body):
- xegpu.set_op_layout_attr(
+ xegpu.set_anchor_layout(
sequence.bodyTarget,
- index=0,
+ index=1,
sg_layout=[6, 4],
sg_data=[32, 16],
inst_data=[8, 16],
- slice_dims=[0],
- result=True,
)
transform.YieldOp()
- # CHECK-LABEL: TEST: setOpLayoutAttrResultSlice
- # CHECK: transform.xegpu.set_op_layout_attr %
- # CHECK: result
- # CHECK-NOT: index = 0
+ # CHECK-LABEL: TEST: setAnchorLayoutDPAS
+ # CHECK: transform.xegpu.set_anchor_layout %
+ # CHECK: index = 1
# CHECK: sg_layout = [6, 4]
# CHECK: sg_data = [32, 16]
# CHECK: inst_data = [8, 16]
- # CHECK: slice_dims = [0]
@run
-def setOpLayoutAttrResultOrder():
+def setAnchorLayoutOrder():
sequence = transform.SequenceOp(
transform.FailurePropagationMode.Propagate,
[],
- transform.OperationType.get("xegpu.dpas"),
+ transform.OperationType.get("xegpu.load_nd"),
)
with InsertionPoint(sequence.body):
- xegpu.set_op_layout_attr(
+ xegpu.set_anchor_layout(
sequence.bodyTarget,
- index=0,
sg_layout=[6, 4],
sg_data=[32, 16],
inst_data=[8, 16],
- order=[0, 1],
- result=True,
+ order=[1, 0],
)
transform.YieldOp()
- # CHECK-LABEL: TEST: setOpLayoutAttrResultOrder
- # CHECK: transform.xegpu.set_op_layout_attr %
- # CHECK: result
+ # CHECK-LABEL: TEST: setAnchorLayoutOrder
+ # CHECK: transform.xegpu.set_anchor_layout %
# CHECK-NOT: index = 0
# CHECK: sg_layout = [6, 4]
# CHECK: sg_data = [32, 16]
# CHECK: inst_data = [8, 16]
- # CHECK: order = [0, 1]
+ # CHECK: order = [1, 0]
@run
-def setOpLayoutAttrAnchor():
+def setAnchorLayoutSlice():
sequence = transform.SequenceOp(
transform.FailurePropagationMode.Propagate,
[],
- transform.OperationType.get("xegpu.dpas"),
+ transform.OperationType.get("xegpu.load"),
)
with InsertionPoint(sequence.body):
- xegpu.set_op_layout_attr(
+ xegpu.set_anchor_layout(
sequence.bodyTarget,
- index=0,
sg_layout=[6, 4],
sg_data=[32, 16],
inst_data=[8, 16],
+ slice_dims=[0],
)
transform.YieldOp()
- # CHECK-LABEL: TEST: setOpLayoutAttrAnchor
- # CHECK: transform.xegpu.set_op_layout_attr %
- # CHECK-NOT: result
- # CHECK-NOT: operand
+ # CHECK-LABEL: TEST: setAnchorLayoutSlice
+ # CHECK: transform.xegpu.set_anchor_layout %
# CHECK-NOT: index = 0
# CHECK: sg_layout = [6, 4]
# CHECK: sg_data = [32, 16]
# CHECK: inst_data = [8, 16]
+ # CHECK: slice_dims = [0]
@run
>From 2d566d4de8aeec1afaa96539439a035e9a9447f5 Mon Sep 17 00:00:00 2001
From: Tuomas Karna <tuomas.karna at intel.com>
Date: Thu, 19 Mar 2026 18:40:46 +0200
Subject: [PATCH 3/8] remove get_desc_op
---
.../XeGPU/TransformOps/XeGPUTransformOps.td | 18 ------
.../XeGPU/TransformOps/XeGPUTransformOps.cpp | 23 -------
mlir/python/mlir/dialects/transform/xegpu.py | 29 ---------
mlir/test/Dialect/XeGPU/transform-ops.mlir | 62 -------------------
.../python/dialects/transform_xegpu_ext.py | 15 -----
5 files changed, 147 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
index c7c3e14ceb066..03808ee091dfc 100644
--- a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
@@ -16,24 +16,6 @@ include "mlir/Dialect/Transform/IR/TransformTypes.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/IR/OpBase.td"
-def GetDescOp : Op<Transform_Dialect, "xegpu.get_desc_op", [
- DeclareOpInterfaceMethods<TransformOpInterface>,
- NavigationTransformOpTrait, MemoryEffectsOpInterface
-]> {
-
- let summary = "Get a handle to the descriptor op of a value.";
- let description = [{
- Traces the producers of the given value until an `xegpu.create_nd_tdesc`
- descriptor op is found. Returns a handle to it. Currently traces
- producers by following only the first operand of producer ops.
- }];
-
- let arguments = (ins TransformValueHandleTypeInterface:$target);
-
- let results = (outs TransformHandleTypeInterface:$descHandle);
- let assemblyFormat = "$target attr-dict `:` functional-type(operands, results)";
-}
-
def GetLoadOp : Op<Transform_Dialect, "xegpu.get_load_op", [
DeclareOpInterfaceMethods<TransformOpInterface>,
NavigationTransformOpTrait, MemoryEffectsOpInterface
diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
index 6802087610b41..6f543be356527 100644
--- a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
@@ -187,29 +187,6 @@ setDescLayout(transform::TransformRewriter &rewriter,
return newDescOp;
}
-DiagnosedSilenceableFailure
-transform::GetDescOp::apply(transform::TransformRewriter &rewriter,
- transform::TransformResults &results,
- transform::TransformState &state) {
- auto targetValues = state.getPayloadValues(getTarget());
- if (!llvm::hasSingleElement(targetValues)) {
- return emitDefiniteFailure()
- << "requires exactly one target value handle (got "
- << llvm::range_size(targetValues) << ")";
- }
-
- auto maybeDescOp =
- findProducerOfType<xegpu::CreateNdDescOp>(*targetValues.begin());
- if (!maybeDescOp) {
- return emitSilenceableFailure(getLoc())
- << "Could not find a matching descriptor op when walking the "
- "producer chain of the first operand.";
- }
-
- results.set(llvm::cast<OpResult>(getResult()), {*maybeDescOp});
- return DiagnosedSilenceableFailure::success();
-}
-
DiagnosedSilenceableFailure
transform::GetLoadOp::apply(transform::TransformRewriter &rewriter,
transform::TransformResults &results,
diff --git a/mlir/python/mlir/dialects/transform/xegpu.py b/mlir/python/mlir/dialects/transform/xegpu.py
index ea8a4fc052721..3984a1fc52232 100644
--- a/mlir/python/mlir/dialects/transform/xegpu.py
+++ b/mlir/python/mlir/dialects/transform/xegpu.py
@@ -22,35 +22,6 @@
from typing import Union, Optional
- at _ods_cext.register_operation(_Dialect, replace=True)
-class GetDescOp(GetDescOp):
- """Specialization for GetDescOp class."""
-
- def __init__(
- self,
- target: Value,
- *,
- loc=None,
- ip=None,
- ):
- desc_type = transform.AnyOpType.get()
- super().__init__(
- desc_type,
- target,
- loc=loc,
- ip=ip,
- )
-
-
-def get_desc_op(
- target: Value,
- *,
- loc=None,
- ip=None,
-) -> OpResult:
- return GetDescOp(target, loc=loc, ip=ip).result
-
-
@_ods_cext.register_operation(_Dialect, replace=True)
class GetLoadOp(GetLoadOp):
"""Specialization for GetLoadOp class."""
diff --git a/mlir/test/Dialect/XeGPU/transform-ops.mlir b/mlir/test/Dialect/XeGPU/transform-ops.mlir
index 2a2f778b26966..1c5dc8a2e0641 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops.mlir
@@ -1,67 +1,5 @@
// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics | FileCheck %s
-// CHECK-LABEL: @get_desc_op_a
-func.func @get_desc_op_a(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
- %c32 = arith.constant 32 : index
- %c4096 = arith.constant 4096 : index
- %c0 = arith.constant 0 : index
- %0 = xegpu.create_nd_tdesc %arg2 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x256xf16>
- %1 = xegpu.load_nd %0[%c0, %c0] : !xegpu.tensor_desc<256x256xf16> -> vector<256x256xf16>
- // expected-remark @below {{found desc op}}
- %3 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
- %4 = xegpu.create_nd_tdesc %arg1 : memref<4096x4096xf16> -> !xegpu.tensor_desc<32x256xf16>
- %2 = scf.for %arg3 = %c0 to %c4096 step %c32 iter_args(%arg4 = %1) -> (vector<256x256xf16>) {
- %5 = xegpu.load_nd %3[%c0, %arg3] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
- %6 = xegpu.load_nd %4[%arg3, %c0] : !xegpu.tensor_desc<32x256xf16> -> vector<32x256xf16>
- %7 = xegpu.dpas %5, %6, %arg4 : vector<256x32xf16>, vector<32x256xf16>, vector<256x256xf16> -> vector<256x256xf16>
- scf.yield %7 : vector<256x256xf16>
- }
- return
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["xegpu.dpas"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1 = transform.get_operand %0[0] : (!transform.any_op) -> !transform.any_value
- %2 = transform.xegpu.get_desc_op %1 : (!transform.any_value) -> !transform.any_op
- transform.debug.emit_remark_at %2, "found desc op" : !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-LABEL: @get_desc_op_c
-func.func @get_desc_op_c(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
- %c32 = arith.constant 32 : index
- %c4096 = arith.constant 4096 : index
- %c0 = arith.constant 0 : index
- // expected-remark @below {{found desc op}}
- %0 = xegpu.create_nd_tdesc %arg2 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x256xf16>
- %1 = xegpu.load_nd %0[%c0, %c0] : !xegpu.tensor_desc<256x256xf16> -> vector<256x256xf16>
- %3 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
- %4 = xegpu.create_nd_tdesc %arg1 : memref<4096x4096xf16> -> !xegpu.tensor_desc<32x256xf16>
- %2 = scf.for %arg3 = %c0 to %c4096 step %c32 iter_args(%arg4 = %1) -> (vector<256x256xf16>) {
- %5 = xegpu.load_nd %3[%c0, %arg3] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
- %6 = xegpu.load_nd %4[%arg3, %c0] : !xegpu.tensor_desc<32x256xf16> -> vector<32x256xf16>
- %7 = xegpu.dpas %5, %6, %arg4 : vector<256x32xf16>, vector<32x256xf16>, vector<256x256xf16> -> vector<256x256xf16>
- scf.yield %7 : vector<256x256xf16>
- }
- return
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["xegpu.dpas"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1 = transform.get_operand %0[2] : (!transform.any_op) -> !transform.any_value
- %2 = transform.xegpu.get_desc_op %1 : (!transform.any_value) -> !transform.any_op
- transform.debug.emit_remark_at %2, "found desc op" : !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
// CHECK-LABEL: @get_load_op
func.func @get_load_op(%arg0: memref<4096x4096xf16>) {
%c0 = arith.constant 0 : index
diff --git a/mlir/test/python/dialects/transform_xegpu_ext.py b/mlir/test/python/dialects/transform_xegpu_ext.py
index efbe212609f97..e8e8b0c0077bd 100644
--- a/mlir/test/python/dialects/transform_xegpu_ext.py
+++ b/mlir/test/python/dialects/transform_xegpu_ext.py
@@ -16,21 +16,6 @@ def run(f):
return f
- at run
-def getDescOp():
- sequence = transform.SequenceOp(
- transform.FailurePropagationMode.Propagate,
- [],
- transform.OperationType.get("xegpu.dpas"),
- )
- with InsertionPoint(sequence.body):
- operand = transform.GetOperandOp(AnyValueType.get(), sequence.bodyTarget, [0])
- desc_handle = xegpu.get_desc_op(operand)
- transform.YieldOp()
- # CHECK-LABEL: TEST: getDescOp
- # CHECK: transform.xegpu.get_desc_op %
-
-
@run
def getLoadOp():
sequence = transform.SequenceOp(
>From 0542c482d6bf78e04b26a46565c37f98496a833e Mon Sep 17 00:00:00 2001
From: Tuomas Karna <tuomas.karna at intel.com>
Date: Thu, 19 Mar 2026 18:46:39 +0200
Subject: [PATCH 4/8] remove set_desc_layout
---
.../XeGPU/TransformOps/XeGPUTransformOps.td | 70 ------------
.../XeGPU/TransformOps/XeGPUTransformOps.cpp | 101 ------------------
mlir/python/mlir/dialects/transform/xegpu.py | 73 -------------
.../Dialect/XeGPU/transform-ops-invalid.mlir | 16 ---
mlir/test/Dialect/XeGPU/transform-ops.mlir | 98 -----------------
.../python/dialects/transform_xegpu_ext.py | 73 -------------
6 files changed, 431 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
index 03808ee091dfc..d5b699d49c7e1 100644
--- a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
@@ -34,76 +34,6 @@ def GetLoadOp : Op<Transform_Dialect, "xegpu.get_load_op", [
let assemblyFormat = "$target attr-dict `:` functional-type(operands, results)";
}
-def SetDescLayoutOp : Op<Transform_Dialect, "xegpu.set_desc_layout", [
- AttrSizedOperandSegments,
- DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
- TransformOpInterface
-]> {
-
- let summary = "Set xegpu.layout attribute to a xegpu.create_nd_desc op result.";
- let description = [{
- Given an `xegpu.create_nd_desc` operation, this transform adds
- `xegpu.layout` attribute to the result tensor descriptor. The layout is
- defined by the `sg_layout`, and `sg_data` and optional `inst_data`
- attributes. If `slice_dims` is provided, the `xegpu.layout` attribute is
- wrapped in an `xegpu.slice<..., dims=slice_dims>` attribute. Returns a handle to
- the transformed op.
- }];
-
- let arguments = (ins
- TransformHandleTypeInterface:$target,
- Variadic<TransformAnyParamTypeOrAnyHandle>:$sg_layout,
- Variadic<TransformAnyParamTypeOrAnyHandle>:$sg_data,
- Variadic<TransformAnyParamTypeOrAnyHandle>:$inst_data,
- DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_sg_layout,
- DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_sg_data,
- DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_inst_data,
- DefaultValuedOptionalAttr<DenseI32ArrayAttr, "{}">:$order,
- DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$slice_dims
- );
-
- let results = (outs TransformHandleTypeInterface:$transformed);
- let builders = [
- OpBuilder<(ins "Value":$target,
- "ArrayRef<OpFoldResult>":$mixedSgLayout,
- "ArrayRef<OpFoldResult>":$mixedSgData,
- "ArrayRef<OpFoldResult>":$mixedInstData,
- "ArrayRef<int32_t>":$order,
- "ArrayRef<int64_t>":$sliceDims
- )>,
- ];
-
- let assemblyFormat = [{
- $target
- `sg_layout` `=` custom<DynamicIndexList>($sg_layout, $static_sg_layout)
- `sg_data` `=` custom<DynamicIndexList>($sg_data, $static_sg_data)
- (`inst_data` `=` custom<DynamicIndexList>($inst_data, $static_inst_data)^)?
- (`order` `=` $order^)?
- (`slice_dims` `=` $slice_dims^)?
- attr-dict `:` functional-type(operands, results)
- }];
-
- let extraClassDeclaration = [{
- ::mlir::DiagnosedSilenceableFailure apply(
- ::mlir::transform::TransformRewriter &rewriter,
- ::mlir::transform::TransformResults &transformResults,
- ::mlir::transform::TransformState &state);
-
- ::llvm::SmallVector<::mlir::OpFoldResult> getMixedSgLayout() {
- Builder b(getContext());
- return getMixedValues(getStaticSgLayout(), getSgLayout(), b);
- }
- ::llvm::SmallVector<::mlir::OpFoldResult> getMixedSgData() {
- Builder b(getContext());
- return getMixedValues(getStaticSgData(), getSgData(), b);
- }
- ::llvm::SmallVector<::mlir::OpFoldResult> getMixedInstData() {
- Builder b(getContext());
- return getMixedValues(getStaticInstData(), getInstData(), b);
- }
- }];
-}
-
def SetAnchorLayoutOp : Op<Transform_Dialect, "xegpu.set_anchor_layout", [
AttrSizedOperandSegments,
DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
index 6f543be356527..5b66983774985 100644
--- a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
@@ -165,28 +165,6 @@ getLayoutAttrFromOperands(MLIRContext *ctx, transform::TransformState &state,
return DiagnosedSilenceableFailure::success();
}
-/// Replace xegpu.create_nd_desc op with a new one with the given layout.
-static xegpu::CreateNdDescOp
-setDescLayout(transform::TransformRewriter &rewriter,
- xegpu::CreateNdDescOp descOp,
- xegpu::DistributeLayoutAttr layout) {
- assert(descOp.getMixedOffsets().size() == 0 &&
- "create desc op with offsets is not supported");
- auto oldTensorDesc = descOp.getType();
- auto descType = xegpu::TensorDescType::get(
- oldTensorDesc.getShape(), oldTensorDesc.getElementType(),
- /*array_length=*/oldTensorDesc.getArrayLength(),
- /*boundary_check=*/oldTensorDesc.getBoundaryCheck(),
- /*memory_space=*/oldTensorDesc.getMemorySpace(),
- /*layout=*/layout);
-
- rewriter.setInsertionPointAfter(descOp);
- auto newDescOp = rewriter.replaceOpWithNewOp<xegpu::CreateNdDescOp>(
- descOp, descType, descOp.getSource(), descOp.getMixedSizes(),
- descOp.getMixedStrides());
- return newDescOp;
-}
-
DiagnosedSilenceableFailure
transform::GetLoadOp::apply(transform::TransformRewriter &rewriter,
transform::TransformResults &results,
@@ -218,85 +196,6 @@ transform::GetLoadOp::apply(transform::TransformRewriter &rewriter,
return DiagnosedSilenceableFailure::success();
}
-void transform::SetDescLayoutOp::build(OpBuilder &builder,
- OperationState &result, Value target,
- ArrayRef<OpFoldResult> mixedSgLayout,
- ArrayRef<OpFoldResult> mixedSgData,
- ArrayRef<OpFoldResult> mixedInstData,
- ArrayRef<int32_t> order,
- ArrayRef<int64_t> sliceDims) {
- SmallVector<int64_t> staticSgLayout, staticSgData, staticInstData;
- SmallVector<Value> dynamicSgLayout, dynamicSgData, dynamicInstData;
- dispatchIndexOpFoldResults(mixedSgLayout, dynamicSgLayout, staticSgLayout);
- dispatchIndexOpFoldResults(mixedSgData, dynamicSgData, staticSgData);
- dispatchIndexOpFoldResults(mixedInstData, dynamicInstData, staticInstData);
- build(builder, result, target.getType(),
- /*target=*/target,
- /*sg_layout=*/dynamicSgLayout,
- /*sg_data=*/dynamicSgData,
- /*inst_data=*/dynamicInstData,
- /*static_sg_layout=*/staticSgLayout,
- /*static_sg_data=*/staticSgData,
- /*static_inst_data=*/staticInstData,
- /*order=*/order,
- /*slice_dims=*/sliceDims);
-}
-
-DiagnosedSilenceableFailure
-transform::SetDescLayoutOp::apply(transform::TransformRewriter &rewriter,
- transform::TransformResults &results,
- transform::TransformState &state) {
- auto targetOps = state.getPayloadOps(getTarget());
- if (!llvm::hasSingleElement(targetOps)) {
- return emitDefiniteFailure() << "requires exactly one targetOp handle (got "
- << llvm::range_size(targetOps) << ")";
- }
- Operation *target = *targetOps.begin();
-
- xegpu::LayoutAttr layoutAttr = nullptr;
- auto status = getLayoutAttrFromOperands(
- getContext(), state, (*this), getMixedSgLayout(), getMixedSgData(),
- getMixedInstData(), getOrder(), layoutAttr);
- if (!status.succeeded())
- return status;
-
- xegpu::DistributeLayoutAttr layout = layoutAttr;
- auto sliceDims = getSliceDims();
- if (sliceDims.size() > 0) {
- // Wrap layoutAttr in a slice attribute.
- layout = xegpu::SliceAttr::get(
- getContext(), layout, DenseI64ArrayAttr::get(getContext(), sliceDims));
- }
-
- // For now only create_nd_desc op is supported.
- auto descOp = dyn_cast<xegpu::CreateNdDescOp>(target);
- if (!descOp) {
- auto diag = emitSilenceableFailure(getLoc())
- << "Expected a xegpu.create_nd_desc op, but got: "
- << target->getName();
- diag.attachNote(target->getLoc()) << "target op";
- return diag;
- }
-
- // Set layout attr in desc op's return type. Replaces old desc op.
- auto newdescOp = setDescLayout(rewriter, descOp, layout);
-
- // Map result handles.
- results.set(cast<OpResult>(getTransformed()), {newdescOp.getOperation()});
-
- return DiagnosedSilenceableFailure::success();
-}
-
-void transform::SetDescLayoutOp::getEffects(
- ::llvm::SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
- consumesHandle(getTargetMutable(), effects);
- onlyReadsHandle(getSgLayoutMutable(), effects);
- onlyReadsHandle(getSgDataMutable(), effects);
- onlyReadsHandle(getInstDataMutable(), effects);
- producesHandle(getOperation()->getOpResults(), effects);
- modifiesPayload(effects);
-}
-
void transform::SetAnchorLayoutOp::build(
OpBuilder &builder, OperationState &ostate, Value target, int64_t index,
ArrayRef<OpFoldResult> mixedSgLayout, ArrayRef<OpFoldResult> mixedSgData,
diff --git a/mlir/python/mlir/dialects/transform/xegpu.py b/mlir/python/mlir/dialects/transform/xegpu.py
index 3984a1fc52232..00ffc65900e33 100644
--- a/mlir/python/mlir/dialects/transform/xegpu.py
+++ b/mlir/python/mlir/dialects/transform/xegpu.py
@@ -51,79 +51,6 @@ def get_load_op(
return GetLoadOp(target, loc=loc, ip=ip).result
- at _ods_cext.register_operation(_Dialect, replace=True)
-class SetDescLayoutOp(SetDescLayoutOp):
- """Specialization for SetDescLayoutOp class."""
-
- def __init__(
- self,
- target: Union[Operation, Value],
- sg_layout: MixedValues,
- sg_data: MixedValues,
- *,
- inst_data: Optional[MixedValues] = None,
- order: Optional[MixedInt] = None,
- slice_dims: Optional[MixedInt] = None,
- loc=None,
- ip=None,
- ):
- target_handle = _get_op_result_or_value(target)
- inst_data = [] if inst_data is None else inst_data
- (
- dynamic_sg_layout,
- static_sg_layout,
- _,
- ) = _dispatch_dynamic_index_list(sg_layout)
- (
- dynamic_sg_data,
- static_sg_data,
- _,
- ) = _dispatch_dynamic_index_list(sg_data)
- (
- dynamic_inst_data,
- static_inst_data,
- _,
- ) = _dispatch_dynamic_index_list(inst_data)
-
- super().__init__(
- target_handle.type,
- target_handle,
- dynamic_sg_layout,
- dynamic_sg_data,
- dynamic_inst_data,
- static_sg_layout=static_sg_layout,
- static_sg_data=static_sg_data,
- static_inst_data=static_inst_data,
- order=order,
- slice_dims=slice_dims,
- loc=loc,
- ip=ip,
- )
-
-
-def set_desc_layout(
- target: Union[Operation, Value],
- sg_layout: MixedValues,
- sg_data: MixedValues,
- *,
- inst_data: Optional[MixedValues] = None,
- order: Optional[MixedInt] = None,
- slice_dims: Optional[MixedInt] = None,
- loc=None,
- ip=None,
-) -> OpResult:
- return SetDescLayoutOp(
- target,
- sg_layout,
- sg_data,
- inst_data=inst_data,
- order=order,
- slice_dims=slice_dims,
- loc=loc,
- ip=ip,
- ).result
-
-
@_ods_cext.register_operation(_Dialect, replace=True)
class SetAnchorLayoutOp(SetAnchorLayoutOp):
"""Specialization for SetAnchorLayoutOp class."""
diff --git a/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
index 4c1ad80131006..c6f4dc5fb6bf8 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
@@ -1,21 +1,5 @@
// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics
-func.func @set_desc_layout(%arg0: memref<4096x4096xf16>) {
- %c32 = arith.constant 32 : index // expected-note {{target op}}
- return
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["arith.constant"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // expected-error at below {{Expected a xegpu.create_nd_desc op, but got: arith.constant}}
- %1 = transform.xegpu.set_desc_layout %0 sg_layout = [8, 4] sg_data = [32, 32] : (!transform.any_op) -> !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
// CHECK-LABEL: @set_anchor_layout_multiple
func.func @set_anchor_layout_multiple(%arg0: memref<4096x4096xf16>) {
%0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
diff --git a/mlir/test/Dialect/XeGPU/transform-ops.mlir b/mlir/test/Dialect/XeGPU/transform-ops.mlir
index 1c5dc8a2e0641..f4fa2962f7b53 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops.mlir
@@ -79,104 +79,6 @@ module attributes {transform.with_named_sequence} {
// -----
-// CHECK-LABEL: @set_desc_layout
-func.func @set_desc_layout(%arg0: memref<4096x4096xf16>) {
- // CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0
- // CHECK-SAME: #xegpu.block_tdesc_attr<boundary_check = false>
- // CHECK-SAME: #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], inst_data = [8, 16]>>
- %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16, #xegpu.block_tdesc_attr<boundary_check = false>>
- return
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["xegpu.create_nd_tdesc"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // CHECK: transform.xegpu.set_desc_layout %{{.*}}
- %1 = transform.xegpu.set_desc_layout %0 sg_layout = [8, 4] sg_data = [32, 32] inst_data = [8, 16] : (!transform.any_op) -> !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-LABEL: @set_desc_layout_minimal
-func.func @set_desc_layout_minimal(%arg0: memref<4096x4096xf16>) {
- // CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0
- // CHECK-SAME: #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>>
- %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
- return
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["xegpu.create_nd_tdesc"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // CHECK: transform.xegpu.set_desc_layout %{{.*}}
- %1 = transform.xegpu.set_desc_layout %0 sg_layout = [8, 4] sg_data = [32, 32] : (!transform.any_op) -> !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-LABEL: @set_desc_layout_param
-func.func @set_desc_layout_param(%arg0: memref<4096x4096xf16>) {
- // CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0
- // CHECK-SAME: #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], inst_data = [8, 16]>>
- %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
- return
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["xegpu.create_nd_tdesc"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // CHECK: transform.xegpu.set_desc_layout %{{.*}}
- %layout0 = transform.param.constant 8 : i64 -> !transform.param<i64>
- %1 = transform.xegpu.set_desc_layout %0 sg_layout = [%layout0, 4] sg_data = [32, 32] inst_data = [8, 16] : (!transform.any_op, !transform.param<i64>) -> !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-LABEL: @set_desc_layout_slice
-func.func @set_desc_layout_slice(%arg0: memref<4096xf16>) {
- // CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0
- // CHECK-SAME: #xegpu.slice<#xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>, dims = [0]>
- %0 = xegpu.create_nd_tdesc %arg0 : memref<4096xf16> -> !xegpu.tensor_desc<256xf16>
- return
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["xegpu.create_nd_tdesc"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // CHECK: transform.xegpu.set_desc_layout %{{.*}}
- %1 = transform.xegpu.set_desc_layout %0 sg_layout = [8, 4] sg_data = [32, 32] slice_dims = [0] : (!transform.any_op) -> !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-LABEL: @set_desc_layout_order
-func.func @set_desc_layout_order(%arg0: memref<4096x4096xf16>) {
- // CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0
- // CHECK-SAME: #xegpu.block_tdesc_attr<boundary_check = false>
- // CHECK-SAME: #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], inst_data = [8, 16], order = [1, 0]>
- %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16, #xegpu.block_tdesc_attr<boundary_check = false>>
- return
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["xegpu.create_nd_tdesc"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // CHECK: transform.xegpu.set_desc_layout %{{.*}}
- %1 = transform.xegpu.set_desc_layout %0 sg_layout = [8, 4] sg_data = [32, 32] inst_data = [8, 16] order = [1, 0] : (!transform.any_op) -> !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
// CHECK-LABEL: @set_anchor_layout
func.func @set_anchor_layout(%arg0: memref<4096x4096xf16>) {
%0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
diff --git a/mlir/test/python/dialects/transform_xegpu_ext.py b/mlir/test/python/dialects/transform_xegpu_ext.py
index e8e8b0c0077bd..4eff766b81bb7 100644
--- a/mlir/test/python/dialects/transform_xegpu_ext.py
+++ b/mlir/test/python/dialects/transform_xegpu_ext.py
@@ -31,79 +31,6 @@ def getLoadOp():
# CHECK: transform.xegpu.get_load_op %
- at run
-def setDescLayoutMinimal():
- sequence = transform.SequenceOp(
- transform.FailurePropagationMode.Propagate,
- [],
- transform.OperationType.get("xegpu.create_nd_tdesc"),
- )
- with InsertionPoint(sequence.body):
- xegpu.set_desc_layout(sequence.bodyTarget, sg_layout=[6, 4], sg_data=[32, 16])
- transform.YieldOp()
- # CHECK-LABEL: TEST: setDescLayoutMinimal
- # CHECK: %0 = transform.xegpu.set_desc_layout %
- # CHECK: sg_layout = [6, 4]
- # CHECK: sg_data = [32, 16]
-
-
- at run
-def setDescLayoutInstData():
- sequence = transform.SequenceOp(
- transform.FailurePropagationMode.Propagate,
- [],
- transform.OperationType.get("xegpu.create_nd_tdesc"),
- )
- with InsertionPoint(sequence.body):
- xegpu.set_desc_layout(
- sequence.bodyTarget, sg_layout=[6, 4], sg_data=[32, 16], inst_data=[8, 16]
- )
- transform.YieldOp()
- # CHECK-LABEL: TEST: setDescLayoutInstData
- # CHECK: %0 = transform.xegpu.set_desc_layout %
- # CHECK: sg_layout = [6, 4]
- # CHECK: sg_data = [32, 16]
- # CHECK: inst_data = [8, 16]
-
-
- at run
-def setDescLayoutSlice():
- sequence = transform.SequenceOp(
- transform.FailurePropagationMode.Propagate,
- [],
- transform.OperationType.get("xegpu.create_nd_tdesc"),
- )
- with InsertionPoint(sequence.body):
- xegpu.set_desc_layout(
- sequence.bodyTarget, sg_layout=[6, 4], sg_data=[32, 16], slice_dims=[0]
- )
- transform.YieldOp()
- # CHECK-LABEL: TEST: setDescLayoutSlice
- # CHECK: %0 = transform.xegpu.set_desc_layout %
- # CHECK: sg_layout = [6, 4]
- # CHECK: sg_data = [32, 16]
- # CHECK: slice_dims = [0]
-
-
- at run
-def setDescLayoutOrder():
- sequence = transform.SequenceOp(
- transform.FailurePropagationMode.Propagate,
- [],
- transform.OperationType.get("xegpu.create_nd_tdesc"),
- )
- with InsertionPoint(sequence.body):
- xegpu.set_desc_layout(
- sequence.bodyTarget, sg_layout=[6, 4], sg_data=[32, 16], order=[0, 1]
- )
- transform.YieldOp()
- # CHECK-LABEL: TEST: setDescLayoutOrder
- # CHECK: %0 = transform.xegpu.set_desc_layout %
- # CHECK: sg_layout = [6, 4]
- # CHECK: sg_data = [32, 16]
- # CHECK: order = [0, 1]
-
-
@run
def setAnchorLayout():
sequence = transform.SequenceOp(
>From 3120e1955c3eb246164a59fce9de93b140b7b8e5 Mon Sep 17 00:00:00 2001
From: Tuomas Karna <tuomas.karna at intel.com>
Date: Thu, 19 Mar 2026 20:32:30 +0200
Subject: [PATCH 5/8] set_anchor_layout accepts multiple handles
---
.../XeGPU/TransformOps/XeGPUTransformOps.cpp | 58 +++++++++----------
.../Dialect/XeGPU/transform-ops-invalid.mlir | 19 ------
mlir/test/Dialect/XeGPU/transform-ops.mlir | 23 ++++++++
3 files changed, 51 insertions(+), 49 deletions(-)
diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
index 5b66983774985..a0185dc7d65b0 100644
--- a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
@@ -224,14 +224,9 @@ transform::SetAnchorLayoutOp::apply(transform::TransformRewriter &rewriter,
transform::TransformResults &results,
transform::TransformState &state) {
auto targetOps = state.getPayloadOps(getTarget());
- if (!llvm::hasSingleElement(targetOps)) {
- return emitDefiniteFailure() << "Requires exactly one targetOp handle (got "
- << llvm::range_size(targetOps) << ")";
- }
- Operation *target = *targetOps.begin();
-
int64_t index = getIndex();
+ // Construct layout attribute.
xegpu::LayoutAttr layoutAttr = nullptr;
auto status = getLayoutAttrFromOperands(
getContext(), state, (*this), getMixedSgLayout(), getMixedSgData(),
@@ -247,31 +242,34 @@ transform::SetAnchorLayoutOp::apply(transform::TransformRewriter &rewriter,
getContext(), layout, DenseI64ArrayAttr::get(getContext(), sliceDims));
}
- // Set layout attribute
- if (auto dpasOp = dyn_cast<xegpu::DpasOp>(target)) {
- // dpas op is a special case where layout needs to be set for A, B, and C
- if (index == 0)
- dpasOp.getProperties().layout_a = layout;
- else if (index == 1)
- dpasOp.getProperties().layout_b = layout;
- else if (index == 2)
- dpasOp.getProperties().layout_cd = layout;
- else {
- auto diag = emitSilenceableFailure(getLoc())
- << "Invalid index for setting dpas op layout: " << index;
- diag.attachNote(target->getLoc()) << "target op";
- return diag;
- }
- } else {
- // op's anchor layout.
- auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(target);
- if (!anchorOp) {
- auto diag = emitSilenceableFailure(getLoc())
- << "Cannot set anchor layout to op: " << target->getName();
- diag.attachNote(target->getLoc()) << "target op";
- return diag;
+ // Apply the layout to all target ops.
+ for (Operation *target : targetOps) {
+ // Set layout attribute
+ if (auto dpasOp = dyn_cast<xegpu::DpasOp>(target)) {
+ // dpas op is a special case where layout needs to be set for A, B, and C
+ if (index == 0)
+ dpasOp.getProperties().layout_a = layout;
+ else if (index == 1)
+ dpasOp.getProperties().layout_b = layout;
+ else if (index == 2)
+ dpasOp.getProperties().layout_cd = layout;
+ else {
+ auto diag = emitSilenceableFailure(getLoc())
+ << "Invalid index for setting dpas op layout: " << index;
+ diag.attachNote(target->getLoc()) << "target op";
+ return diag;
+ }
+ } else {
+ // op's anchor layout.
+ auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(target);
+ if (!anchorOp) {
+ auto diag = emitSilenceableFailure(getLoc())
+ << "Cannot set anchor layout to op: " << target->getName();
+ diag.attachNote(target->getLoc()) << "target op";
+ return diag;
+ }
+ anchorOp.setAnchorLayout(layout);
}
- anchorOp.setAnchorLayout(layout);
}
return DiagnosedSilenceableFailure::success();
}
diff --git a/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
index c6f4dc5fb6bf8..5c604f1ba12cf 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
@@ -1,24 +1,5 @@
// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics
-// CHECK-LABEL: @set_anchor_layout_multiple
-func.func @set_anchor_layout_multiple(%arg0: memref<4096x4096xf16>) {
- %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
- %1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
- %2 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
- return
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["xegpu.load_nd"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- // expected-error at below {{Requires exactly one targetOp handle (got 2)}}
- transform.xegpu.set_anchor_layout %0 sg_layout = [8, 4] sg_data = [32, 64] : !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
// CHECK-LABEL: @set_anchor_layout_not_anchor_op
func.func @set_anchor_layout_not_anchor_op(%arg0: memref<4096x4096xf16>) {
%0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
diff --git a/mlir/test/Dialect/XeGPU/transform-ops.mlir b/mlir/test/Dialect/XeGPU/transform-ops.mlir
index f4fa2962f7b53..0ad598a6bcd2b 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops.mlir
@@ -99,6 +99,29 @@ module attributes {transform.with_named_sequence} {
// -----
+// CHECK-LABEL: @set_anchor_layout_multiple
+func.func @set_anchor_layout_multiple(%arg0: memref<4096x4096xf16>) {
+ %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
+ // CHECK: xegpu.prefetch_nd %0[0, 0]
+ // CHECK-SAME: <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}>
+ // CHECK: xegpu.prefetch_nd %0[16, 0]
+ // CHECK-SAME: <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}>
+ xegpu.prefetch_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16>
+ xegpu.prefetch_nd %0[16, 0] : !xegpu.tensor_desc<256x32xf16>
+ return
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["xegpu.prefetch_nd"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ // CHECK: transform.xegpu.set_anchor_layout %{{.*}}
+ transform.xegpu.set_anchor_layout %0 index = 0 sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] : !transform.any_op
+ transform.yield
+ }
+}
+
+// -----
+
// CHECK-LABEL: @set_anchor_layout_param
func.func @set_anchor_layout_param(%arg0: memref<4096x4096xf16>) {
%0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
>From d979b8572242cf2da87f5f6166d5abb785ab28b7 Mon Sep 17 00:00:00 2001
From: Tuomas Karna <tuomas.karna at intel.com>
Date: Thu, 19 Mar 2026 21:04:47 +0200
Subject: [PATCH 6/8] insert_prefetch op takes xegpu.load_np op handle instead
of value
---
.../XeGPU/TransformOps/XeGPUTransformOps.td | 14 ++++----
.../XeGPU/TransformOps/XeGPUTransformOps.cpp | 24 +++++++------
mlir/python/mlir/dialects/transform/xegpu.py | 4 +--
.../Dialect/XeGPU/transform-ops-invalid.mlir | 3 +-
mlir/test/Dialect/XeGPU/transform-ops.mlir | 6 ++--
.../python/dialects/transform_xegpu_ext.py | 36 ++++++-------------
6 files changed, 38 insertions(+), 49 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
index d5b699d49c7e1..2b11d8f8884ed 100644
--- a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
@@ -150,17 +150,15 @@ def InsertPrefetchOp : Op<Transform_Dialect, "xegpu.insert_prefetch", [
TransformOpInterface
]> {
- let summary = "Adds xegpu prefetch ops to matmul operand tiles.";
+ let summary = "Adds xegpu prefetch ops to a load op.";
let description = [{
- Given a target value (e.g., `vector`) residing in a `scf.for` loop, this
- transform finds the corresponding `xegpu.load_nd` op and inserts
- `xegpu.prefetch_nd` operations for the tile. The load op must reside within
- the `scf.for` loop. Number of prefetch steps is set by the `nb_prefetch`
- argument (default value is 1). Returns a handle to the created
- `xegpu.create_nd_desc` op.
+ Inserts `xegpu.prefetch_nd` operations for the given `xegpu.load_nd` op.
+ The load op must reside within the `scf.for` loop. Number of prefetch steps
+ is set by the `nb_prefetch` argument (default value is 1). Returns a handle
+ to the created `xegpu.create_nd_desc` op.
}];
- let arguments = (ins TransformValueHandleTypeInterface:$target,
+ let arguments = (ins TransformHandleTypeInterface:$target,
Optional<TransformAnyParamTypeOrAnyHandle>:$dynamic_nb_prefetch,
DefaultValuedOptionalAttr<I64Attr, "1">:$static_nb_prefetch
);
diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
index a0185dc7d65b0..608ed5c555420 100644
--- a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
@@ -350,12 +350,12 @@ DiagnosedSilenceableFailure
transform::InsertPrefetchOp::apply(transform::TransformRewriter &rewriter,
transform::TransformResults &results,
transform::TransformState &state) {
- auto targetValues = state.getPayloadValues(getTarget());
- if (!llvm::hasSingleElement(targetValues))
+ auto targetOps = state.getPayloadOps(getTarget());
+ if (!llvm::hasSingleElement(targetOps))
return emitDefiniteFailure()
- << "requires exactly one target value handle (got "
- << llvm::range_size(targetValues) << ")";
- auto value = *targetValues.begin();
+ << "requires exactly one target op handle (got "
+ << llvm::range_size(targetOps) << ")";
+ auto target = *targetOps.begin();
int64_t nbPrefetch = getStaticNbPrefetch();
if (getDynamicNbPrefetch()) {
@@ -374,11 +374,13 @@ transform::InsertPrefetchOp::apply(transform::TransformRewriter &rewriter,
return emitSilenceableFailure(getLoc())
<< "nb_prefetch must be a positive integer.";
- // Find load operation of the operand.
- auto maybeLoadOp = findProducerOfType<xegpu::LoadNdOp>(value);
- if (!maybeLoadOp)
- return emitSilenceableFailure(getLoc()) << "Could not find load op.";
- auto loadOp = *maybeLoadOp;
+ // Cast target to load op.
+ auto maybeLoadOp = dyn_cast<xegpu::LoadNdOp>(target);
+ if (!maybeLoadOp) {
+ return emitSilenceableFailure(getLoc()) << "Expected xegpu.load_nd op, got "
+ << target->getName();
+ }
+ auto loadOp = maybeLoadOp;
if (loadOp.getMixedOffsets().size() == 0) {
auto diag = emitSilenceableFailure(getLoc())
<< "Load op must have offsets.";
@@ -396,7 +398,7 @@ transform::InsertPrefetchOp::apply(transform::TransformRewriter &rewriter,
}
// Find descriptor op.
- auto maybeDescOp = findProducerOfType<xegpu::CreateNdDescOp>(value);
+ auto maybeDescOp = findProducerOfType<xegpu::CreateNdDescOp>(loadOp.getResult());
if (!maybeDescOp)
return emitSilenceableFailure(getLoc()) << "Could not find descriptor op.";
auto descOp = *maybeDescOp;
diff --git a/mlir/python/mlir/dialects/transform/xegpu.py b/mlir/python/mlir/dialects/transform/xegpu.py
index 00ffc65900e33..6e27e5c8ecfa6 100644
--- a/mlir/python/mlir/dialects/transform/xegpu.py
+++ b/mlir/python/mlir/dialects/transform/xegpu.py
@@ -168,7 +168,7 @@ class InsertPrefetchOp(InsertPrefetchOp):
def __init__(
self,
- target: Value,
+ target: Union[Operation, Value],
*,
nb_prefetch: Optional[MixedInt] = 1,
loc=None,
@@ -194,7 +194,7 @@ def __init__(
def insert_prefetch(
- target: Value,
+ target: Union[Operation, Value],
*,
nb_prefetch: Optional[MixedInt] = 1,
loc=None,
diff --git a/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
index 5c604f1ba12cf..ba259f311d76e 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
@@ -95,8 +95,9 @@ module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["xegpu.dpas"]} in %arg0 : (!transform.any_op) -> !transform.any_op
%1 = transform.get_operand %0[2] : (!transform.any_op) -> !transform.any_value
+ %2 = transform.xegpu.get_load_op %1 : (!transform.any_value) -> !transform.any_op
// expected-error at below {{Load op is not contained in a scf.for loop.}}
- %2 = transform.xegpu.insert_prefetch %1 nb_prefetch = 1 : (!transform.any_value) -> !transform.any_op
+ %3 = transform.xegpu.insert_prefetch %2 nb_prefetch = 1 : (!transform.any_op) -> !transform.any_op
transform.yield
}
}
diff --git a/mlir/test/Dialect/XeGPU/transform-ops.mlir b/mlir/test/Dialect/XeGPU/transform-ops.mlir
index 0ad598a6bcd2b..acba80d870253 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops.mlir
@@ -372,8 +372,9 @@ module attributes {transform.with_named_sequence} {
%func = transform.structured.match ops{["func.func"]} in %arg0 : (!transform.any_op) -> !transform.any_op
%0 = transform.structured.match ops{["xegpu.dpas"]} in %func : (!transform.any_op) -> !transform.any_op
%1 = transform.get_operand %0[0] : (!transform.any_op) -> !transform.any_value
+ %2 = transform.xegpu.get_load_op %1 : (!transform.any_value) -> !transform.any_op
// CHECK: transform.xegpu.insert_prefetch %{{.*}}
- %2 = transform.xegpu.insert_prefetch %1 nb_prefetch = 1 : (!transform.any_value) -> !transform.any_op
+ %3 = transform.xegpu.insert_prefetch %2 nb_prefetch = 1 : (!transform.any_op) -> !transform.any_op
transform.apply_patterns to %func {
transform.apply_patterns.canonicalization
} : !transform.any_op
@@ -419,9 +420,10 @@ module attributes {transform.with_named_sequence} {
%func = transform.structured.match ops{["func.func"]} in %arg0 : (!transform.any_op) -> !transform.any_op
%0 = transform.structured.match ops{["xegpu.dpas"]} in %func : (!transform.any_op) -> !transform.any_op
%1 = transform.get_operand %0[0] : (!transform.any_op) -> !transform.any_value
+ %2 = transform.xegpu.get_load_op %1 : (!transform.any_value) -> !transform.any_op
%nb = transform.param.constant 2 : i64 -> !transform.param<i64>
// CHECK: transform.xegpu.insert_prefetch %{{.*}}
- %2 = transform.xegpu.insert_prefetch %1 nb_prefetch = %nb : (!transform.any_value, !transform.param<i64>) -> !transform.any_op
+ %3 = transform.xegpu.insert_prefetch %2 nb_prefetch = %nb : (!transform.any_op, !transform.param<i64>) -> !transform.any_op
transform.apply_patterns to %func {
transform.apply_patterns.canonicalization
} : !transform.any_op
diff --git a/mlir/test/python/dialects/transform_xegpu_ext.py b/mlir/test/python/dialects/transform_xegpu_ext.py
index 4eff766b81bb7..5d5db1919af14 100644
--- a/mlir/test/python/dialects/transform_xegpu_ext.py
+++ b/mlir/test/python/dialects/transform_xegpu_ext.py
@@ -144,21 +144,17 @@ def setGPULaunchThreadsOp():
@run
-def insertPrefetch0():
+def insertPrefetch():
sequence = transform.SequenceOp(
transform.FailurePropagationMode.Propagate,
[],
- transform.OperationType.get("xegpu.dpas"),
+ transform.OperationType.get("xegpu.load_nd"),
)
with InsertionPoint(sequence.body):
- operand = transform.GetOperandOp(AnyValueType.get(), sequence.bodyTarget, [0])
- xegpu.insert_prefetch(
- operand,
- )
+ xegpu.insert_prefetch(sequence.bodyTarget)
transform.YieldOp()
- # CHECK-LABEL: TEST: insertPrefetch0
- # CHECK: %[[OPR:.*]] = get_operand
- # CHECK: transform.xegpu.insert_prefetch %[[OPR]]
+ # CHECK-LABEL: TEST: insertPrefetch
+ # CHECK: transform.xegpu.insert_prefetch
@run
@@ -166,18 +162,13 @@ def insertPrefetchNbPrefetch():
sequence = transform.SequenceOp(
transform.FailurePropagationMode.Propagate,
[],
- transform.OperationType.get("xegpu.dpas"),
+ transform.OperationType.get("xegpu.load_nd"),
)
with InsertionPoint(sequence.body):
- operand = transform.GetOperandOp(AnyValueType.get(), sequence.bodyTarget, [0])
- xegpu.insert_prefetch(
- operand,
- nb_prefetch=2,
- )
+ xegpu.insert_prefetch(sequence.bodyTarget, nb_prefetch=2)
transform.YieldOp()
# CHECK-LABEL: TEST: insertPrefetchNbPrefetch
- # CHECK: %[[OPR:.*]] = get_operand
- # CHECK: transform.xegpu.insert_prefetch %[[OPR]]
+ # CHECK: transform.xegpu.insert_prefetch
# CHECK-SAME: nb_prefetch = 2
@@ -186,25 +177,20 @@ def insertPrefetchNbPrefetchParam():
sequence = transform.SequenceOp(
transform.FailurePropagationMode.Propagate,
[],
- transform.OperationType.get("xegpu.dpas"),
+ transform.OperationType.get("xegpu.load_nd"),
)
with InsertionPoint(sequence.body):
- operand = transform.GetOperandOp(AnyValueType.get(), sequence.bodyTarget, [0])
int32_t = IntegerType.get_signless(32)
param_int32_t = transform.ParamType.get(int32_t)
nb_param = transform.ParamConstantOp(
param_int32_t,
IntegerAttr.get(int32_t, 2),
)
- xegpu.insert_prefetch(
- operand,
- nb_prefetch=nb_param,
- )
+ xegpu.insert_prefetch(sequence.bodyTarget, nb_prefetch=nb_param)
transform.YieldOp()
# CHECK-LABEL: TEST: insertPrefetchNbPrefetchParam
- # CHECK: %[[OPR:.*]] = get_operand
# CHECK: %[[PARAM_OP:.*]] = transform.param.constant 2
- # CHECK: transform.xegpu.insert_prefetch %[[OPR]]
+ # CHECK: transform.xegpu.insert_prefetch
# CHECK-SAME: nb_prefetch = %[[PARAM_OP]]
>From 5ce1d3fb59dfe46ae0893fda94fd0c5a9ed9aab7 Mon Sep 17 00:00:00 2001
From: Tuomas Karna <tuomas.karna at intel.com>
Date: Thu, 19 Mar 2026 21:32:13 +0200
Subject: [PATCH 7/8] code formatting
---
.../XeGPU/TransformOps/XeGPUTransformOps.cpp | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
index 608ed5c555420..153ef5b500a1b 100644
--- a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
@@ -167,8 +167,8 @@ getLayoutAttrFromOperands(MLIRContext *ctx, transform::TransformState &state,
DiagnosedSilenceableFailure
transform::GetLoadOp::apply(transform::TransformRewriter &rewriter,
- transform::TransformResults &results,
- transform::TransformState &state) {
+ transform::TransformResults &results,
+ transform::TransformState &state) {
auto targetValues = state.getPayloadValues(getTarget());
if (!llvm::hasSingleElement(targetValues)) {
return emitDefiniteFailure()
@@ -176,18 +176,20 @@ transform::GetLoadOp::apply(transform::TransformRewriter &rewriter,
<< llvm::range_size(targetValues) << ")";
}
- Operation* loadOp = nullptr;
+ Operation *loadOp = nullptr;
auto maybeLoadNdOp =
findProducerOfType<xegpu::LoadNdOp>(*targetValues.begin());
if (maybeLoadNdOp) {
loadOp = maybeLoadNdOp->getOperation();
} else {
- auto maybeLoadOp = findProducerOfType<xegpu::LoadGatherOp>(*targetValues.begin());
+ auto maybeLoadOp =
+ findProducerOfType<xegpu::LoadGatherOp>(*targetValues.begin());
if (maybeLoadOp) {
loadOp = maybeLoadOp->getOperation();
} else {
return emitSilenceableFailure(getLoc())
- << "Could not find a matching xegpu.load_nd or xegpu.load op when walking the "
+ << "Could not find a matching xegpu.load_nd or xegpu.load op when "
+ "walking the "
"producer chain of the first operand.";
}
}
@@ -377,8 +379,8 @@ transform::InsertPrefetchOp::apply(transform::TransformRewriter &rewriter,
// Cast target to load op.
auto maybeLoadOp = dyn_cast<xegpu::LoadNdOp>(target);
if (!maybeLoadOp) {
- return emitSilenceableFailure(getLoc()) << "Expected xegpu.load_nd op, got "
- << target->getName();
+ return emitSilenceableFailure(getLoc())
+ << "Expected xegpu.load_nd op, got " << target->getName();
}
auto loadOp = maybeLoadOp;
if (loadOp.getMixedOffsets().size() == 0) {
@@ -398,7 +400,8 @@ transform::InsertPrefetchOp::apply(transform::TransformRewriter &rewriter,
}
// Find descriptor op.
- auto maybeDescOp = findProducerOfType<xegpu::CreateNdDescOp>(loadOp.getResult());
+ auto maybeDescOp =
+ findProducerOfType<xegpu::CreateNdDescOp>(loadOp.getResult());
if (!maybeDescOp)
return emitSilenceableFailure(getLoc()) << "Could not find descriptor op.";
auto descOp = *maybeDescOp;
>From 5916543120da6b44e2d754c3d1e79f6ae5d17b19 Mon Sep 17 00:00:00 2001
From: Tuomas Karna <tuomas.karna at intel.com>
Date: Fri, 20 Mar 2026 11:27:13 +0200
Subject: [PATCH 8/8] update convert_layout docstring
---
.../mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
index 2b11d8f8884ed..40b9136874e7c 100644
--- a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
@@ -196,9 +196,9 @@ def ConvertLayoutOp : Op<Transform_Dialect, "xegpu.convert_layout", [
let summary = "Convert xegpu.layout attribute for a value.";
let description = [{
Adds an `xegpu.convert_layout` op to convert the `xegpu.layout` attribute
- of a value. The input and target layouts are defined by the `*sg_layout`,
- `*sg_data` and optional `*inst_data` attributes. Returns a handle to the
- emitted `xegpu.convert_layout` op.
+ of a value before its first use. The input and target layouts are defined
+ by the `*sg_layout`, `*sg_data` and optional `*inst_data` and `*order`
+ attributes. Returns a handle to the emitted `xegpu.convert_layout` op.
}];
let arguments = (ins TransformValueHandleTypeInterface:$target,
More information about the Mlir-commits
mailing list