[Mlir-commits] [mlir] [MLIR][XeGPU][TransformOps] Remove obsolete transform ops (PR #187561)

Fri Mar 20 02:27:47 PDT 2026

https://github.com/tkarna updated https://github.com/llvm/llvm-project/pull/187561

>From 846a2edbcbc40b042fbdb8b08876440c3936d0f1 Mon Sep 17 00:00:00 2001
From: Tuomas Karna <tuomas.karna at intel.com>
Date: Thu, 19 Mar 2026 17:05:59 +0200
Subject: [PATCH 1/8] xegpu transform ops: add get_load_op

---
 .../XeGPU/TransformOps/XeGPUTransformOps.td   | 18 +++++
 .../XeGPU/TransformOps/XeGPUTransformOps.cpp  | 31 ++++++++
 mlir/python/mlir/dialects/transform/xegpu.py  | 29 +++++++
 mlir/test/Dialect/XeGPU/transform-ops.mlir    | 79 +++++++++++++++++++
 .../python/dialects/transform_xegpu_ext.py    | 19 ++++-
 5 files changed, 174 insertions(+), 2 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
index f7f45508b6a03..ad636b8b638f9 100644
--- a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
@@ -34,6 +34,24 @@ def GetDescOp : Op<Transform_Dialect, "xegpu.get_desc_op", [
   let assemblyFormat = "$target attr-dict `:` functional-type(operands, results)";
 }
 
+def GetLoadOp : Op<Transform_Dialect, "xegpu.get_load_op", [
+  DeclareOpInterfaceMethods<TransformOpInterface>,
+  NavigationTransformOpTrait, MemoryEffectsOpInterface
+]> {
+
+  let summary = "Get a handle to the load_nd op in producer chain of a value.";
+  let description = [{
+    Traces the producers of the given value until an `xegpu.load_nd` or
+    `xegpu.load` op is found. Returns a handle to it. Currently traces
+    producers by following only the first operand of producer ops.
+  }];
+
+  let arguments = (ins TransformValueHandleTypeInterface:$target);
+
+  let results = (outs TransformHandleTypeInterface:$loadNdHandle);
+  let assemblyFormat = "$target attr-dict `:` functional-type(operands, results)";
+}
+
 def SetDescLayoutOp : Op<Transform_Dialect, "xegpu.set_desc_layout", [
   AttrSizedOperandSegments,
   DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
index 39f9ae0bf1287..c520f0d4ae859 100644
--- a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
@@ -210,6 +210,37 @@ transform::GetDescOp::apply(transform::TransformRewriter &rewriter,
   return DiagnosedSilenceableFailure::success();
 }
 
+DiagnosedSilenceableFailure
+transform::GetLoadOp::apply(transform::TransformRewriter &rewriter,
+                              transform::TransformResults &results,
+                              transform::TransformState &state) {
+  auto targetValues = state.getPayloadValues(getTarget());
+  if (!llvm::hasSingleElement(targetValues)) {
+    return emitDefiniteFailure()
+           << "requires exactly one target value handle (got "
+           << llvm::range_size(targetValues) << ")";
+  }
+
+  Operation* loadOp = nullptr;
+  auto maybeLoadNdOp =
+      findProducerOfType<xegpu::LoadNdOp>(*targetValues.begin());
+  if (maybeLoadNdOp) {
+    loadOp = maybeLoadNdOp->getOperation();
+  } else {
+    auto maybeLoadOp = findProducerOfType<xegpu::LoadGatherOp>(*targetValues.begin());
+    if (maybeLoadOp) {
+      loadOp = maybeLoadOp->getOperation();
+    } else {
+      return emitSilenceableFailure(getLoc())
+            << "Could not find a matching xegpu.load_nd or xegpu.load op when walking the "
+                "producer chain of the first operand.";
+    }
+  }
+
+  results.set(llvm::cast<OpResult>(getResult()), {loadOp});
+  return DiagnosedSilenceableFailure::success();
+}
+
 void transform::SetDescLayoutOp::build(OpBuilder &builder,
                                        OperationState &result, Value target,
                                        ArrayRef<OpFoldResult> mixedSgLayout,
diff --git a/mlir/python/mlir/dialects/transform/xegpu.py b/mlir/python/mlir/dialects/transform/xegpu.py
index 782c9a3f242a0..b0fd0d322f436 100644
--- a/mlir/python/mlir/dialects/transform/xegpu.py
+++ b/mlir/python/mlir/dialects/transform/xegpu.py
@@ -51,6 +51,35 @@ def get_desc_op(
     return GetDescOp(target, loc=loc, ip=ip).result
 
 
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class GetLoadOp(GetLoadOp):
+    """Specialization for GetLoadOp class."""
+
+    def __init__(
+        self,
+        target: Value,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        load_nd_type = transform.AnyOpType.get()
+        super().__init__(
+            load_nd_type,
+            target,
+            loc=loc,
+            ip=ip,
+        )
+
+
+def get_load_op(
+    target: Value,
+    *,
+    loc=None,
+    ip=None,
+) -> OpResult:
+    return GetLoadOp(target, loc=loc, ip=ip).result
+
+
 @_ods_cext.register_operation(_Dialect, replace=True)
 class SetDescLayoutOp(SetDescLayoutOp):
     """Specialization for SetDescLayoutOp class."""
diff --git a/mlir/test/Dialect/XeGPU/transform-ops.mlir b/mlir/test/Dialect/XeGPU/transform-ops.mlir
index 5bb1ab708e301..55faa369c5287 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops.mlir
@@ -62,6 +62,85 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 
+// CHECK-LABEL: @get_load_op
+func.func @get_load_op(%arg0: memref<4096x4096xf16>) {
+  %c0 = arith.constant 0 : index
+  %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
+  // CHECK: xegpu.load_nd
+  // expected-remark @below {{found load_nd op}}
+  %1 = xegpu.load_nd %0[%c0, %c0]  : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
+  %2 = arith.extf %1 : vector<256x32xf16> to vector<256x32xf32>
+  return
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %1 = transform.get_operand %0[0] : (!transform.any_op) -> !transform.any_value
+    %2 = transform.xegpu.get_load_op %1 : (!transform.any_value) -> !transform.any_op
+    transform.debug.emit_remark_at %2, "found load_nd op" : !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
+// CHECK-LABEL: @get_load_op_c
+func.func @get_load_op_c(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
+  %c32 = arith.constant 32 : index
+  %c4096 = arith.constant 4096 : index
+  %c0 = arith.constant 0 : index
+  %0 = xegpu.create_nd_tdesc %arg2 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x256xf16>
+  // expected-remark @below {{found load_nd op}}
+  %1 = xegpu.load_nd %0[%c0, %c0]  : !xegpu.tensor_desc<256x256xf16> -> vector<256x256xf16>
+  %3 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
+  %4 = xegpu.create_nd_tdesc %arg1 : memref<4096x4096xf16> -> !xegpu.tensor_desc<32x256xf16>
+  %2 = scf.for %arg3 = %c0 to %c4096 step %c32 iter_args(%arg4 = %1) -> (vector<256x256xf16>) {
+    %5 = xegpu.load_nd %3[%c0, %arg3] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
+    %6 = xegpu.load_nd %4[%arg3, %c0] : !xegpu.tensor_desc<32x256xf16> -> vector<32x256xf16>
+    %7 = xegpu.dpas %5, %6, %arg4 : vector<256x32xf16>, vector<32x256xf16>, vector<256x256xf16> -> vector<256x256xf16>
+    scf.yield %7 : vector<256x256xf16>
+  }
+  return
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["xegpu.dpas"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %1 = transform.get_operand %0[2] : (!transform.any_op) -> !transform.any_value
+    %2 = transform.xegpu.get_load_op %1 : (!transform.any_value) -> !transform.any_op
+    transform.debug.emit_remark_at %2, "found load_nd op" : !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
+// CHECK-LABEL: @get_load_op_1d
+func.func @get_load_op_1d(%arg0: memref<4096xf32>) {
+  %cst = arith.constant dense<true> : vector<256xi1>
+  %0 = vector.step : vector<256xindex>
+  %intptr = memref.extract_aligned_pointer_as_index %arg0 : memref<4096xf32> -> index
+  %1 = arith.index_cast %intptr : index to i64
+  // CHECK: xegpu.load %1[%0]
+  // expected-remark @below {{found load op}}
+  %2 = xegpu.load %1[%0], %cst : i64, vector<256xindex>, vector<256xi1> -> vector<256xf32>
+  %3 = arith.extf %2 : vector<256xf32> to vector<256xf64>
+  return
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %1 = transform.get_operand %0[0] : (!transform.any_op) -> !transform.any_value
+    %2 = transform.xegpu.get_load_op %1 : (!transform.any_value) -> !transform.any_op
+    transform.debug.emit_remark_at %2, "found load op" : !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
 // CHECK-LABEL: @set_desc_layout
 func.func @set_desc_layout(%arg0: memref<4096x4096xf16>) {
   // CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0
diff --git a/mlir/test/python/dialects/transform_xegpu_ext.py b/mlir/test/python/dialects/transform_xegpu_ext.py
index 346e68eca9201..b4c96e66e8326 100644
--- a/mlir/test/python/dialects/transform_xegpu_ext.py
+++ b/mlir/test/python/dialects/transform_xegpu_ext.py
@@ -17,7 +17,7 @@ def run(f):
 
 
 @run
-def getDescOpDefaultIndex():
+def getDescOp():
     sequence = transform.SequenceOp(
         transform.FailurePropagationMode.Propagate,
         [],
@@ -27,10 +27,25 @@ def getDescOpDefaultIndex():
         operand = transform.GetOperandOp(AnyValueType.get(), sequence.bodyTarget, [0])
         desc_handle = xegpu.get_desc_op(operand)
         transform.YieldOp()
-    # CHECK-LABEL: TEST: getDescOpDefaultIndex
+    # CHECK-LABEL: TEST: getDescOp
     # CHECK: transform.xegpu.get_desc_op %
 
 
+ at run
+def getLoadOp():
+    sequence = transform.SequenceOp(
+        transform.FailurePropagationMode.Propagate,
+        [],
+        transform.OperationType.get("xegpu.dpas"),
+    )
+    with InsertionPoint(sequence.body):
+        operand = transform.GetOperandOp(AnyValueType.get(), sequence.bodyTarget, [0])
+        load_handle = xegpu.get_load_op(operand)
+        transform.YieldOp()
+    # CHECK-LABEL: TEST: getLoadOp
+    # CHECK: transform.xegpu.get_load_op %
+
+
 @run
 def setDescLayoutMinimal():
     sequence = transform.SequenceOp(

>From 71a451e8cd4c65af6146cf3ae1df8dfea8a70263 Mon Sep 17 00:00:00 2001
From: Tuomas Karna <tuomas.karna at intel.com>
Date: Thu, 19 Mar 2026 18:22:10 +0200
Subject: [PATCH 2/8] rename set_op_layout_attr -> set_anchor_layout

---
 .../XeGPU/TransformOps/XeGPUTransformOps.td   |  31 ++-
 .../XeGPU/TransformOps/XeGPUTransformOps.cpp  |  38 +---
 mlir/python/mlir/dialects/transform/xegpu.py  |  18 +-
 .../Dialect/XeGPU/transform-ops-invalid.mlir  |  55 +-----
 mlir/test/Dialect/XeGPU/transform-ops.mlir    | 180 +++++-------------
 .../python/dialects/transform_xegpu_ext.py    |  86 +++------
 6 files changed, 105 insertions(+), 303 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
index ad636b8b638f9..c7c3e14ceb066 100644
--- a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
@@ -122,22 +122,21 @@ def SetDescLayoutOp : Op<Transform_Dialect, "xegpu.set_desc_layout", [
   }];
 }
 
-def SetOpLayoutAttrOp : Op<Transform_Dialect, "xegpu.set_op_layout_attr", [
+def SetAnchorLayoutOp : Op<Transform_Dialect, "xegpu.set_anchor_layout", [
   AttrSizedOperandSegments,
   DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
   TransformOpInterface
 ]> {
 
-  let summary = "Set xegpu.layout attribute of an op.";
+  let summary = "Set anchor layout of an op.";
   let description = [{
-    Sets the `xegpu.layout` attribute of an op. By default it sets the anchor
-    layout for XeGPU ops that support it. If `result=true` or `operand=true`,
-    it sets the `layout_result_{index}` or `layout_operand_{index}` attribute,
-    respectively, applicable to any op. The target operand/result value is
-    defined by the `index` argument. The layout is defined by the `sg_layout`,
-    `sg_data` and optional `inst_data` attributes. If `slice_dims` is provided,
-    the `xegpu.layout` attribute is wrapped in an `xegpu.slice<..., dims=slice_dims>`
-    attribute.
+    Sets the `xegpu.layout` anchor layout for XeGPU ops that support it. The
+    target operand value can be set by the `index` argument (currently only
+    applicable to a DPAS op). The layout is defined by the `sg_layout`,
+    `sg_data` and optional `inst_data` and `order` attributes. If `slice_dims`
+    is provided, the `xegpu.layout` attribute is wrapped in an
+    `xegpu.slice<..., dims=slice_dims>` attribute. Emits a silenceable failure
+    if the target op does not support anchor layouts.
   }];
 
   let arguments = (ins TransformHandleTypeInterface:$target,
@@ -149,9 +148,7 @@ def SetOpLayoutAttrOp : Op<Transform_Dialect, "xegpu.set_op_layout_attr", [
                    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_sg_data,
                    DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_inst_data,
                    DefaultValuedOptionalAttr<DenseI32ArrayAttr, "{}">:$order,
-                   DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$slice_dims,
-                   DefaultValuedAttr<UnitAttr, "false">:$result,
-                   DefaultValuedAttr<UnitAttr, "false">:$operand
+                   DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$slice_dims
                    );
 
   let results = (outs);
@@ -162,14 +159,12 @@ def SetOpLayoutAttrOp : Op<Transform_Dialect, "xegpu.set_op_layout_attr", [
                    "ArrayRef<OpFoldResult>":$mixedSgData,
                    "ArrayRef<OpFoldResult>":$mixedInstData,
                    "ArrayRef<int32_t>":$order,
-                   "ArrayRef<int64_t>":$sliceDims,
-                   CArg<"bool", "false">:$result,
-                   CArg<"bool", "false">:$operand
+                   "ArrayRef<int64_t>":$sliceDims
                    )>,
   ];
 
   let assemblyFormat = [{
-    $target (`result` $result^)? (`operand` $operand^)? (`index` `=` $index^)?
+    $target (`index` `=` $index^)?
     `sg_layout` `=` custom<DynamicIndexList>($sg_layout, $static_sg_layout)
     `sg_data` `=` custom<DynamicIndexList>($sg_data, $static_sg_data)
     (`inst_data` `=` custom<DynamicIndexList>($inst_data, $static_inst_data)^)?
@@ -197,8 +192,6 @@ def SetOpLayoutAttrOp : Op<Transform_Dialect, "xegpu.set_op_layout_attr", [
       return getMixedValues(getStaticInstData(), getInstData(), b);
     }
   }];
-
-  let hasVerifier = 1;
 }
 
 def SetGPULaunchThreadsOp
diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
index c520f0d4ae859..6802087610b41 100644
--- a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
@@ -320,11 +320,11 @@ void transform::SetDescLayoutOp::getEffects(
   modifiesPayload(effects);
 }
 
-void transform::SetOpLayoutAttrOp::build(
+void transform::SetAnchorLayoutOp::build(
     OpBuilder &builder, OperationState &ostate, Value target, int64_t index,
     ArrayRef<OpFoldResult> mixedSgLayout, ArrayRef<OpFoldResult> mixedSgData,
     ArrayRef<OpFoldResult> mixedInstData, ArrayRef<int32_t> order,
-    ArrayRef<int64_t> sliceDims, bool result, bool operand) {
+    ArrayRef<int64_t> sliceDims) {
   SmallVector<int64_t> staticSgLayout, staticSgData, staticInstData;
   SmallVector<Value> dynamicSgLayout, dynamicSgData, dynamicInstData;
   dispatchIndexOpFoldResults(mixedSgLayout, dynamicSgLayout, staticSgLayout);
@@ -340,13 +340,11 @@ void transform::SetOpLayoutAttrOp::build(
         /*static_sg_data=*/staticSgData,
         /*static_inst_data=*/staticInstData,
         /*order=*/order,
-        /*slice_dims=*/sliceDims,
-        /*result=*/result,
-        /*operand=*/operand);
+        /*slice_dims=*/sliceDims);
 }
 
 DiagnosedSilenceableFailure
-transform::SetOpLayoutAttrOp::apply(transform::TransformRewriter &rewriter,
+transform::SetAnchorLayoutOp::apply(transform::TransformRewriter &rewriter,
                                     transform::TransformResults &results,
                                     transform::TransformState &state) {
   auto targetOps = state.getPayloadOps(getTarget());
@@ -356,18 +354,7 @@ transform::SetOpLayoutAttrOp::apply(transform::TransformRewriter &rewriter,
   }
   Operation *target = *targetOps.begin();
 
-  bool resultTarget = getResult();
-  bool operandTarget = getOperand();
-
   int64_t index = getIndex();
-  if (resultTarget && index >= target->getNumResults()) {
-    return emitSilenceableFailure(getLoc())
-           << "Index exceeds the number of op results";
-  }
-  if (operandTarget && index >= target->getNumOperands()) {
-    return emitSilenceableFailure(getLoc())
-           << "Index exceeds the number of op operands";
-  }
 
   xegpu::LayoutAttr layoutAttr = nullptr;
   auto status = getLayoutAttrFromOperands(
@@ -385,13 +372,7 @@ transform::SetOpLayoutAttrOp::apply(transform::TransformRewriter &rewriter,
   }
 
   // Set layout attribute
-  if (resultTarget) {
-    // op result
-    xegpu::setDistributeLayoutAttr(target->getResult(index), layout);
-  } else if (operandTarget) {
-    // op operand
-    xegpu::setDistributeLayoutAttr(target->getOpOperand(index), layout);
-  } else if (auto dpasOp = dyn_cast<xegpu::DpasOp>(target)) {
+  if (auto dpasOp = dyn_cast<xegpu::DpasOp>(target)) {
     // dpas op is a special case where layout needs to be set for A, B, and C
     if (index == 0)
       dpasOp.getProperties().layout_a = layout;
@@ -419,7 +400,7 @@ transform::SetOpLayoutAttrOp::apply(transform::TransformRewriter &rewriter,
   return DiagnosedSilenceableFailure::success();
 }
 
-void transform::SetOpLayoutAttrOp::getEffects(
+void transform::SetAnchorLayoutOp::getEffects(
     ::llvm::SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
   onlyReadsHandle(getTargetMutable(), effects);
   onlyReadsHandle(getSgLayoutMutable(), effects);
@@ -428,13 +409,6 @@ void transform::SetOpLayoutAttrOp::getEffects(
   modifiesPayload(effects);
 }
 
-LogicalResult transform::SetOpLayoutAttrOp::verify() {
-  if (getResult() && getOperand()) {
-    return emitOpError("Cannot set both result and operand simultaneously.");
-  }
-  return success();
-}
-
 void transform::SetGPULaunchThreadsOp::build(
     OpBuilder &builder, OperationState &ostate, Value target,
     ArrayRef<OpFoldResult> mixedThreads) {
diff --git a/mlir/python/mlir/dialects/transform/xegpu.py b/mlir/python/mlir/dialects/transform/xegpu.py
index b0fd0d322f436..ea8a4fc052721 100644
--- a/mlir/python/mlir/dialects/transform/xegpu.py
+++ b/mlir/python/mlir/dialects/transform/xegpu.py
@@ -154,8 +154,8 @@ def set_desc_layout(
 
 
 @_ods_cext.register_operation(_Dialect, replace=True)
-class SetOpLayoutAttrOp(SetOpLayoutAttrOp):
-    """Specialization for SetOpLayoutAttrOp class."""
+class SetAnchorLayoutOp(SetAnchorLayoutOp):
+    """Specialization for SetAnchorLayoutOp class."""
 
     def __init__(
         self,
@@ -167,8 +167,6 @@ def __init__(
         order: Optional[MixedInt] = None,
         slice_dims: Optional[MixedInt] = None,
         index: Optional[Union[int, Attribute]] = None,
-        result: Optional[Union[bool, Attribute]] = None,
-        operand: Optional[Union[bool, Attribute]] = None,
         loc=None,
         ip=None,
     ):
@@ -199,14 +197,12 @@ def __init__(
             order=order,
             slice_dims=slice_dims,
             index=index,
-            result=result,
-            operand=operand,
             loc=loc,
             ip=ip,
         )
 
 
-def set_op_layout_attr(
+def set_anchor_layout(
     target: Union[Operation, Value],
     sg_layout: MixedValues,
     sg_data: MixedValues,
@@ -215,12 +211,10 @@ def set_op_layout_attr(
     order: Optional[MixedInt] = None,
     slice_dims: Optional[MixedInt] = None,
     index: Optional[Union[int, Attribute]] = None,
-    result: Optional[Union[bool, Attribute]] = None,
-    operand: Optional[Union[bool, Attribute]] = None,
     loc=None,
     ip=None,
-) -> SetOpLayoutAttrOp:
-    return SetOpLayoutAttrOp(
+) -> SetAnchorLayoutOp:
+    return SetAnchorLayoutOp(
         target,
         sg_layout,
         sg_data,
@@ -228,8 +222,6 @@ def set_op_layout_attr(
         order=order,
         slice_dims=slice_dims,
         index=index,
-        result=result,
-        operand=operand,
         loc=loc,
         ip=ip,
     )
diff --git a/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
index 2a147497a893b..4c1ad80131006 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
@@ -16,66 +16,27 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 
-// CHECK-LABEL: @set_op_layout_attr_bad_result_index
-func.func @set_op_layout_attr_bad_result_index(%arg0: memref<4096x4096xf16>) {
+// CHECK-LABEL: @set_anchor_layout_multiple
+func.func @set_anchor_layout_multiple(%arg0: memref<4096x4096xf16>) {
   %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
   %1 = xegpu.load_nd %0[0, 0]  : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
-  %2 = arith.extf %1 : vector<256x32xf16> to vector<256x32xf32>
+  %2 = xegpu.load_nd %0[0, 0]  : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
   return
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // expected-error at below {{Index exceeds the number of op results}}
-    transform.xegpu.set_op_layout_attr %0 result index = 1 sg_layout = [8, 4] sg_data = [32, 64] : !transform.any_op
-    transform.yield
-  }
-}
-
-// -----
-
-// CHECK-LABEL: @set_op_layout_attr_bad_operand_index
-func.func @set_op_layout_attr_bad_operand_index(%arg0: memref<4096x4096xf16>) {
-  %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
-  %1 = xegpu.load_nd %0[0, 0]  : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
-  %2 = arith.extf %1 : vector<256x32xf16> to vector<256x32xf32>
-  return
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // expected-error at below {{Index exceeds the number of op operands}}
-    transform.xegpu.set_op_layout_attr %0 operand index = 1 sg_layout = [8, 4] sg_data = [32, 64] : !transform.any_op
-    transform.yield
-  }
-}
-
-// -----
-
-// CHECK-LABEL: @set_op_layout_attr_multiple
-func.func @set_op_layout_attr_multiple(%arg0: memref<4096x4096xf16>) {
-  %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
-  %1 = xegpu.load_nd %0[0, 0]  : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
-  %2 = arith.extf %1 : vector<256x32xf16> to vector<256x32xf32>
-  %3 = arith.extf %2 : vector<256x32xf32> to vector<256x32xf64>
-  return
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %0 = transform.structured.match ops{["xegpu.load_nd"]} in %arg1 : (!transform.any_op) -> !transform.any_op
     // expected-error at below {{Requires exactly one targetOp handle (got 2)}}
-    transform.xegpu.set_op_layout_attr %0 operand sg_layout = [8, 4] sg_data = [32, 64] : !transform.any_op
+    transform.xegpu.set_anchor_layout %0 sg_layout = [8, 4] sg_data = [32, 64] : !transform.any_op
     transform.yield
   }
 }
 
 // -----
 
-// CHECK-LABEL: @set_op_layout_attr_not_anchor_op
-func.func @set_op_layout_attr_not_anchor_op(%arg0: memref<4096x4096xf16>) {
+// CHECK-LABEL: @set_anchor_layout_not_anchor_op
+func.func @set_anchor_layout_not_anchor_op(%arg0: memref<4096x4096xf16>) {
   %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
   %1 = xegpu.load_nd %0[0, 0]  : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
   %2 = arith.extf %1 : vector<256x32xf16> to vector<256x32xf32> // expected-note {{target op}}
@@ -86,7 +47,7 @@ module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
     %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
     // expected-error at below {{Cannot set anchor layout to op: arith.extf}}
-    transform.xegpu.set_op_layout_attr %0 sg_layout = [8, 4] sg_data = [32, 64] : !transform.any_op
+    transform.xegpu.set_anchor_layout %0 sg_layout = [8, 4] sg_data = [32, 64] : !transform.any_op
     transform.yield
   }
 }
diff --git a/mlir/test/Dialect/XeGPU/transform-ops.mlir b/mlir/test/Dialect/XeGPU/transform-ops.mlir
index 55faa369c5287..2a2f778b26966 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops.mlir
@@ -239,176 +239,94 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 
-// CHECK-LABEL: @set_op_layout_attr_result_default
-func.func @set_op_layout_attr_result_default(%arg0: memref<4096x4096xf16>) {
+// CHECK-LABEL: @set_anchor_layout
+func.func @set_anchor_layout(%arg0: memref<4096x4096xf16>) {
   %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
+  // CHECK: = xegpu.load_nd %0[0, 0]
+  // CHECK-SAME: <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}>
   %1 = xegpu.load_nd %0[0, 0]  : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
-  // CHECK: = arith.extf %1
-  // CHECK-SAME: {layout_result_0 = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}
-  %2 = arith.extf %1 : vector<256x32xf16> to vector<256x32xf32>
   return
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
-    transform.xegpu.set_op_layout_attr %0 result sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] : !transform.any_op
+    %0 = transform.structured.match ops{["xegpu.load_nd"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    // CHECK: transform.xegpu.set_anchor_layout %{{.*}}
+    transform.xegpu.set_anchor_layout %0 index = 0 sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] : !transform.any_op
     transform.yield
   }
 }
 
 // -----
 
-// CHECK-LABEL: @set_op_layout_attr_result_sg_param
-func.func @set_op_layout_attr_result_sg_param(%arg0: memref<4096x4096xf16>) {
+// CHECK-LABEL: @set_anchor_layout_param
+func.func @set_anchor_layout_param(%arg0: memref<4096x4096xf16>) {
   %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
+  // CHECK: = xegpu.load_nd %0[0, 0]
+  // CHECK-SAME: <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}>
   %1 = xegpu.load_nd %0[0, 0]  : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
-  // CHECK: = arith.extf %1
-  // CHECK-SAME: {layout_result_0 = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}
-  %2 = arith.extf %1 : vector<256x32xf16> to vector<256x32xf32>
   return
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
+    %0 = transform.structured.match ops{["xegpu.load_nd"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    // CHECK: transform.xegpu.set_anchor_layout %{{.*}}
     %layout0 = transform.param.constant 8 : i64 -> !transform.param<i64>
-    transform.xegpu.set_op_layout_attr %0 result sg_layout = [%layout0, 4] sg_data = [32, 64] inst_data = [8, 16] : !transform.any_op, !transform.param<i64>
+    transform.xegpu.set_anchor_layout %0 index = 0 sg_layout = [%layout0, 4] sg_data = [32, 64] inst_data = [8, 16] : !transform.any_op, !transform.param<i64>
     transform.yield
   }
 }
 
 // -----
 
-// CHECK-LABEL: @set_op_layout_attr_result_sg_param2
-func.func @set_op_layout_attr_result_sg_param2(%arg0: memref<4096x4096xf16>) {
+// CHECK-LABEL: @set_anchor_layout_param2
+func.func @set_anchor_layout_param2(%arg0: memref<4096x4096xf16>) {
   %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
+  // CHECK: = xegpu.load_nd %0[0, 0]
+  // CHECK-SAME: <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}>
   %1 = xegpu.load_nd %0[0, 0]  : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
-  // CHECK: = arith.extf %1
-  // CHECK-SAME: {layout_result_0 = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}
-  %2 = arith.extf %1 : vector<256x32xf16> to vector<256x32xf32>
   return
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
+    %0 = transform.structured.match ops{["xegpu.load_nd"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    // CHECK: transform.xegpu.set_anchor_layout %{{.*}}
     %layout0 = transform.param.constant 8 : i64 -> !transform.param<i64>
     %layout1 = transform.param.constant 4 : i64 -> !transform.param<i64>
-    transform.xegpu.set_op_layout_attr %0 result sg_layout = [%layout0, %layout1] sg_data = [32, 64] inst_data = [8, 16] : !transform.any_op, !transform.param<i64>, !transform.param<i64>
-    transform.yield
-  }
-}
-
-// -----
-
-// CHECK-LABEL: @set_op_layout_attr_result_slice
-func.func @set_op_layout_attr_result_slice(%arg0: vector<256xf16>) {
-  // CHECK: = arith.extf
-  // CHECK-SAME: {layout_result_0 = #xegpu.slice<#xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>, dims = [0]>}
-  %2 = arith.extf %arg0 : vector<256xf16> to vector<256xf32>
-  return
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
-    transform.xegpu.set_op_layout_attr %0 result index = 0 sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] slice_dims = [0] : !transform.any_op
-    transform.yield
-  }
-}
-
-// -----
-
-// CHECK-LABEL: @set_op_layout_attr_result_order
-func.func @set_op_layout_attr_result_order(%arg0: vector<256xf16>) {
-  // CHECK: = arith.extf
-  // CHECK-SAME: {layout_result_0 = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16], order = [0, 1]>}
-  %2 = arith.extf %arg0 : vector<256xf16> to vector<256xf32>
-  return
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
-    transform.xegpu.set_op_layout_attr %0 result index = 0 sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] order = [0, 1] : !transform.any_op
+    transform.xegpu.set_anchor_layout %0 index = 0 sg_layout = [%layout0, %layout1] sg_data = [32, 64] inst_data = [8, 16] : !transform.any_op, !transform.param<i64>, !transform.param<i64>
     transform.yield
   }
 }
 
 // -----
 
-// CHECK-LABEL: @set_op_layout_attr_operand_minimal
-func.func @set_op_layout_attr_operand_minimal(%arg0: memref<4096x4096xf16>) {
-  %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
-  %1 = xegpu.load_nd %0[0, 0]  : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
-  // CHECK: = arith.extf %1
-  // CHECK-SAME: {layout_operand_0 = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64]>}
-  %2 = arith.extf %1 : vector<256x32xf16> to vector<256x32xf32>
-  return
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["arith.extf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
-    transform.xegpu.set_op_layout_attr %0 operand sg_layout = [8, 4] sg_data = [32, 64] : !transform.any_op
-    transform.yield
-  }
-}
-
-// -----
-
-// CHECK-LABEL: @set_op_layout_attr_operand1
-func.func @set_op_layout_attr_operand1(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>) {
-  %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
-  %1 = xegpu.load_nd %0[0, 0]  : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
-  %2 = xegpu.create_nd_tdesc %arg1 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
-  %3 = xegpu.load_nd %2[0, 0]  : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
-  // CHECK: = arith.addf %1, %3
-  // CHECK-SAME: {layout_operand_1 = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}
-  %6 = arith.addf %1, %3 : vector<256x32xf16>
-  return
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["arith.addf"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
-    transform.xegpu.set_op_layout_attr %0 operand index = 1 sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] : !transform.any_op
-    transform.yield
-  }
-}
-
-// -----
-
-// CHECK-LABEL: @set_op_layout_attr_anchor
-func.func @set_op_layout_attr_anchor(%arg0: memref<4096x4096xf16>) {
-  %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
-  // CHECK: = xegpu.load_nd %0[0, 0]
-  // CHECK-SAME: <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}>
-  %1 = xegpu.load_nd %0[0, 0]  : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
+// CHECK-LABEL: @set_anchor_layout_slice
+func.func @set_anchor_layout_slice(%arg0: memref<4096xf32>) {
+  // CHECK: = xegpu.load %1[%0]
+  // CHECK-SAME: <{layout = #xegpu.slice<#xegpu.layout<sg_layout = [8, 8], sg_data = [32, 32], inst_data = [8, 16]>, dims = [0]>}>
+  %cst = arith.constant dense<true> : vector<256xi1>
+  %0 = vector.step : vector<256xindex>
+  %intptr = memref.extract_aligned_pointer_as_index %arg0 : memref<4096xf32> -> index
+  %1 = arith.index_cast %intptr : index to i64
+  %2 = xegpu.load %1[%0], %cst : i64, vector<256xindex>, vector<256xi1> -> vector<256xf32>
   return
 }
 
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["xegpu.load_nd"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
-    transform.xegpu.set_op_layout_attr %0 index = 0 sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] : !transform.any_op
+    %0 = transform.structured.match ops{["xegpu.load"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    // CHECK: transform.xegpu.set_anchor_layout %{{.*}}
+    transform.xegpu.set_anchor_layout %0 sg_layout = [8, 8] sg_data = [32, 32] inst_data = [8, 16] slice_dims = [0] : !transform.any_op
     transform.yield
   }
 }
 
 // -----
 
-// CHECK-LABEL: @set_op_layout_attr_anchor_order
-func.func @set_op_layout_attr_anchor_order(%arg0: memref<4096x4096xf16>) {
+// CHECK-LABEL: @set_anchor_layout_order
+func.func @set_anchor_layout_order(%arg0: memref<4096x4096xf16>) {
   %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
   // CHECK: = xegpu.load_nd %0[0, 0]
   // CHECK-SAME: <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16], order = [1, 0]>}>
@@ -419,8 +337,8 @@ func.func @set_op_layout_attr_anchor_order(%arg0: memref<4096x4096xf16>) {
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
     %0 = transform.structured.match ops{["xegpu.load_nd"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
-    transform.xegpu.set_op_layout_attr %0 index = 0 sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] order = [1, 0] : !transform.any_op
+    // CHECK: transform.xegpu.set_anchor_layout %{{.*}}
+    transform.xegpu.set_anchor_layout %0 index = 0 sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] order = [1, 0] : !transform.any_op
     transform.yield
   }
 }
@@ -428,8 +346,8 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 
-// CHECK-LABEL: @set_op_layout_attr_anchor_dpas_a
-func.func @set_op_layout_attr_anchor_dpas_a(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
+// CHECK-LABEL: @set_anchor_layout_dpas_a
+func.func @set_anchor_layout_dpas_a(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
   %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
   %1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
   %2 = xegpu.create_nd_tdesc %arg1 : memref<4096x4096xf16> -> !xegpu.tensor_desc<32x256xf16>
@@ -445,16 +363,16 @@ func.func @set_op_layout_attr_anchor_dpas_a(%arg0: memref<4096x4096xf16>, %arg1:
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
     %0 = transform.structured.match ops{["xegpu.dpas"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
-    transform.xegpu.set_op_layout_attr %0 index = 0 sg_layout = [8, 8] sg_data = [32, 32] inst_data = [8, 16] : !transform.any_op
+    // CHECK: transform.xegpu.set_anchor_layout %{{.*}}
+    transform.xegpu.set_anchor_layout %0 index = 0 sg_layout = [8, 8] sg_data = [32, 32] inst_data = [8, 16] : !transform.any_op
     transform.yield
   }
 }
 
 // -----
 
-// CHECK-LABEL: @set_op_layout_attr_anchor_dpas_b
-func.func @set_op_layout_attr_anchor_dpas_b(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
+// CHECK-LABEL: @set_anchor_layout_dpas_b
+func.func @set_anchor_layout_dpas_b(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
   %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
   %1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
   %2 = xegpu.create_nd_tdesc %arg1 : memref<4096x4096xf16> -> !xegpu.tensor_desc<32x256xf16>
@@ -470,16 +388,16 @@ func.func @set_op_layout_attr_anchor_dpas_b(%arg0: memref<4096x4096xf16>, %arg1:
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
     %0 = transform.structured.match ops{["xegpu.dpas"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
-    transform.xegpu.set_op_layout_attr %0 index = 1 sg_layout = [8, 8] sg_data = [32, 32] inst_data = [16, 16] : !transform.any_op
+    // CHECK: transform.xegpu.set_anchor_layout %{{.*}}
+    transform.xegpu.set_anchor_layout %0 index = 1 sg_layout = [8, 8] sg_data = [32, 32] inst_data = [16, 16] : !transform.any_op
     transform.yield
   }
 }
 
 // -----
 
-// CHECK-LABEL: @set_op_layout_attr_anchor_dpas_c
-func.func @set_op_layout_attr_anchor_dpas_c(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
+// CHECK-LABEL: @set_anchor_layout_dpas_c
+func.func @set_anchor_layout_dpas_c(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
   %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
   %1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
   %2 = xegpu.create_nd_tdesc %arg1 : memref<4096x4096xf16> -> !xegpu.tensor_desc<32x256xf16>
@@ -495,8 +413,8 @@ func.func @set_op_layout_attr_anchor_dpas_c(%arg0: memref<4096x4096xf16>, %arg1:
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
     %0 = transform.structured.match ops{["xegpu.dpas"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // CHECK: transform.xegpu.set_op_layout_attr %{{.*}}
-    transform.xegpu.set_op_layout_attr %0 index = 2 sg_layout = [8, 8] sg_data = [32, 32] inst_data = [8, 16] : !transform.any_op
+    // CHECK: transform.xegpu.set_anchor_layout %{{.*}}
+    transform.xegpu.set_anchor_layout %0 index = 2 sg_layout = [8, 8] sg_data = [32, 32] inst_data = [8, 16] : !transform.any_op
     transform.yield
   }
 }
diff --git a/mlir/test/python/dialects/transform_xegpu_ext.py b/mlir/test/python/dialects/transform_xegpu_ext.py
index b4c96e66e8326..efbe212609f97 100644
--- a/mlir/test/python/dialects/transform_xegpu_ext.py
+++ b/mlir/test/python/dialects/transform_xegpu_ext.py
@@ -120,50 +120,22 @@ def setDescLayoutOrder():
 
 
 @run
-def setOpLayoutAttrOperandMinimal():
+def setAnchorLayout():
     sequence = transform.SequenceOp(
         transform.FailurePropagationMode.Propagate,
         [],
-        transform.OperationType.get("xegpu.dpas"),
-    )
-    with InsertionPoint(sequence.body):
-        xegpu.set_op_layout_attr(
-            sequence.bodyTarget,
-            sg_layout=[6, 4],
-            sg_data=[32, 16],
-            operand=True,
-        )
-        transform.YieldOp()
-    # CHECK-LABEL: TEST: setOpLayoutAttr
-    # CHECK: transform.xegpu.set_op_layout_attr %
-    # CHECK: operand
-    # CHECK-NOT: index = 0
-    # CHECK-NOT: result
-    # CHECK: sg_layout = [6, 4]
-    # CHECK: sg_data = [32, 16]
-    # CHECK-NOT: inst_data
-
-
- at run
-def setOpLayoutAttrResult():
-    sequence = transform.SequenceOp(
-        transform.FailurePropagationMode.Propagate,
-        [],
-        transform.OperationType.get("xegpu.dpas"),
+        transform.OperationType.get("xegpu.load_nd"),
     )
     with InsertionPoint(sequence.body):
-        xegpu.set_op_layout_attr(
+        xegpu.set_anchor_layout(
             sequence.bodyTarget,
-            index=0,
             sg_layout=[6, 4],
             sg_data=[32, 16],
             inst_data=[8, 16],
-            result=True,
         )
         transform.YieldOp()
-    # CHECK-LABEL: TEST: setOpLayoutAttrResult
-    # CHECK: transform.xegpu.set_op_layout_attr %
-    # CHECK: result
+    # CHECK-LABEL: TEST: setAnchorLayout
+    # CHECK: transform.xegpu.set_anchor_layout %
     # CHECK-NOT: index = 0
     # CHECK: sg_layout = [6, 4]
     # CHECK: sg_data = [32, 16]
@@ -171,85 +143,77 @@ def setOpLayoutAttrResult():
 
 
 @run
-def setOpLayoutAttrResultSlice():
+def setAnchorLayoutDPAS():
     sequence = transform.SequenceOp(
         transform.FailurePropagationMode.Propagate,
         [],
         transform.OperationType.get("xegpu.dpas"),
     )
     with InsertionPoint(sequence.body):
-        xegpu.set_op_layout_attr(
+        xegpu.set_anchor_layout(
             sequence.bodyTarget,
-            index=0,
+            index=1,
             sg_layout=[6, 4],
             sg_data=[32, 16],
             inst_data=[8, 16],
-            slice_dims=[0],
-            result=True,
         )
         transform.YieldOp()
-    # CHECK-LABEL: TEST: setOpLayoutAttrResultSlice
-    # CHECK: transform.xegpu.set_op_layout_attr %
-    # CHECK: result
-    # CHECK-NOT: index = 0
+    # CHECK-LABEL: TEST: setAnchorLayoutDPAS
+    # CHECK: transform.xegpu.set_anchor_layout %
+    # CHECK: index = 1
     # CHECK: sg_layout = [6, 4]
     # CHECK: sg_data = [32, 16]
     # CHECK: inst_data = [8, 16]
-    # CHECK: slice_dims = [0]
 
 
 @run
-def setOpLayoutAttrResultOrder():
+def setAnchorLayoutOrder():
     sequence = transform.SequenceOp(
         transform.FailurePropagationMode.Propagate,
         [],
-        transform.OperationType.get("xegpu.dpas"),
+        transform.OperationType.get("xegpu.load_nd"),
     )
     with InsertionPoint(sequence.body):
-        xegpu.set_op_layout_attr(
+        xegpu.set_anchor_layout(
             sequence.bodyTarget,
-            index=0,
             sg_layout=[6, 4],
             sg_data=[32, 16],
             inst_data=[8, 16],
-            order=[0, 1],
-            result=True,
+            order=[1, 0],
         )
         transform.YieldOp()
-    # CHECK-LABEL: TEST: setOpLayoutAttrResultOrder
-    # CHECK: transform.xegpu.set_op_layout_attr %
-    # CHECK: result
+    # CHECK-LABEL: TEST: setAnchorLayoutOrder
+    # CHECK: transform.xegpu.set_anchor_layout %
     # CHECK-NOT: index = 0
     # CHECK: sg_layout = [6, 4]
     # CHECK: sg_data = [32, 16]
     # CHECK: inst_data = [8, 16]
-    # CHECK: order = [0, 1]
+    # CHECK: order = [1, 0]
 
 
 @run
-def setOpLayoutAttrAnchor():
+def setAnchorLayoutSlice():
     sequence = transform.SequenceOp(
         transform.FailurePropagationMode.Propagate,
         [],
-        transform.OperationType.get("xegpu.dpas"),
+        transform.OperationType.get("xegpu.load"),
     )
     with InsertionPoint(sequence.body):
-        xegpu.set_op_layout_attr(
+        xegpu.set_anchor_layout(
             sequence.bodyTarget,
-            index=0,
             sg_layout=[6, 4],
             sg_data=[32, 16],
             inst_data=[8, 16],
+            slice_dims=[0],
         )
         transform.YieldOp()
-    # CHECK-LABEL: TEST: setOpLayoutAttrAnchor
-    # CHECK: transform.xegpu.set_op_layout_attr %
-    # CHECK-NOT: result
-    # CHECK-NOT: operand
+    # CHECK-LABEL: TEST: setAnchorLayoutSlice
+    # CHECK: transform.xegpu.set_anchor_layout %
     # CHECK-NOT: index = 0
     # CHECK: sg_layout = [6, 4]
     # CHECK: sg_data = [32, 16]
     # CHECK: inst_data = [8, 16]
+    # CHECK: slice_dims = [0]
 
 
 @run

>From 2d566d4de8aeec1afaa96539439a035e9a9447f5 Mon Sep 17 00:00:00 2001
From: Tuomas Karna <tuomas.karna at intel.com>
Date: Thu, 19 Mar 2026 18:40:46 +0200
Subject: [PATCH 3/8] remove get_desc_op

---
 .../XeGPU/TransformOps/XeGPUTransformOps.td   | 18 ------
 .../XeGPU/TransformOps/XeGPUTransformOps.cpp  | 23 -------
 mlir/python/mlir/dialects/transform/xegpu.py  | 29 ---------
 mlir/test/Dialect/XeGPU/transform-ops.mlir    | 62 -------------------
 .../python/dialects/transform_xegpu_ext.py    | 15 -----
 5 files changed, 147 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
index c7c3e14ceb066..03808ee091dfc 100644
--- a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
@@ -16,24 +16,6 @@ include "mlir/Dialect/Transform/IR/TransformTypes.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/IR/OpBase.td"
 
-def GetDescOp : Op<Transform_Dialect, "xegpu.get_desc_op", [
-  DeclareOpInterfaceMethods<TransformOpInterface>,
-  NavigationTransformOpTrait, MemoryEffectsOpInterface
-]> {
-
-  let summary = "Get a handle to the descriptor op of a value.";
-  let description = [{
-    Traces the producers of the given value until an `xegpu.create_nd_tdesc`
-    descriptor op is found. Returns a handle to it. Currently traces
-    producers by following only the first operand of producer ops.
-  }];
-
-  let arguments = (ins TransformValueHandleTypeInterface:$target);
-
-  let results = (outs TransformHandleTypeInterface:$descHandle);
-  let assemblyFormat = "$target attr-dict `:` functional-type(operands, results)";
-}
-
 def GetLoadOp : Op<Transform_Dialect, "xegpu.get_load_op", [
   DeclareOpInterfaceMethods<TransformOpInterface>,
   NavigationTransformOpTrait, MemoryEffectsOpInterface
diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
index 6802087610b41..6f543be356527 100644
--- a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
@@ -187,29 +187,6 @@ setDescLayout(transform::TransformRewriter &rewriter,
   return newDescOp;
 }
 
-DiagnosedSilenceableFailure
-transform::GetDescOp::apply(transform::TransformRewriter &rewriter,
-                            transform::TransformResults &results,
-                            transform::TransformState &state) {
-  auto targetValues = state.getPayloadValues(getTarget());
-  if (!llvm::hasSingleElement(targetValues)) {
-    return emitDefiniteFailure()
-           << "requires exactly one target value handle (got "
-           << llvm::range_size(targetValues) << ")";
-  }
-
-  auto maybeDescOp =
-      findProducerOfType<xegpu::CreateNdDescOp>(*targetValues.begin());
-  if (!maybeDescOp) {
-    return emitSilenceableFailure(getLoc())
-           << "Could not find a matching descriptor op when walking the "
-              "producer chain of the first operand.";
-  }
-
-  results.set(llvm::cast<OpResult>(getResult()), {*maybeDescOp});
-  return DiagnosedSilenceableFailure::success();
-}
-
 DiagnosedSilenceableFailure
 transform::GetLoadOp::apply(transform::TransformRewriter &rewriter,
                               transform::TransformResults &results,
diff --git a/mlir/python/mlir/dialects/transform/xegpu.py b/mlir/python/mlir/dialects/transform/xegpu.py
index ea8a4fc052721..3984a1fc52232 100644
--- a/mlir/python/mlir/dialects/transform/xegpu.py
+++ b/mlir/python/mlir/dialects/transform/xegpu.py
@@ -22,35 +22,6 @@
 from typing import Union, Optional
 
 
- at _ods_cext.register_operation(_Dialect, replace=True)
-class GetDescOp(GetDescOp):
-    """Specialization for GetDescOp class."""
-
-    def __init__(
-        self,
-        target: Value,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        desc_type = transform.AnyOpType.get()
-        super().__init__(
-            desc_type,
-            target,
-            loc=loc,
-            ip=ip,
-        )
-
-
-def get_desc_op(
-    target: Value,
-    *,
-    loc=None,
-    ip=None,
-) -> OpResult:
-    return GetDescOp(target, loc=loc, ip=ip).result
-
-
 @_ods_cext.register_operation(_Dialect, replace=True)
 class GetLoadOp(GetLoadOp):
     """Specialization for GetLoadOp class."""
diff --git a/mlir/test/Dialect/XeGPU/transform-ops.mlir b/mlir/test/Dialect/XeGPU/transform-ops.mlir
index 2a2f778b26966..1c5dc8a2e0641 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops.mlir
@@ -1,67 +1,5 @@
 // RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics | FileCheck %s
 
-// CHECK-LABEL: @get_desc_op_a
-func.func @get_desc_op_a(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
-  %c32 = arith.constant 32 : index
-  %c4096 = arith.constant 4096 : index
-  %c0 = arith.constant 0 : index
-  %0 = xegpu.create_nd_tdesc %arg2 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x256xf16>
-  %1 = xegpu.load_nd %0[%c0, %c0]  : !xegpu.tensor_desc<256x256xf16> -> vector<256x256xf16>
-  // expected-remark @below {{found desc op}}
-  %3 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
-  %4 = xegpu.create_nd_tdesc %arg1 : memref<4096x4096xf16> -> !xegpu.tensor_desc<32x256xf16>
-  %2 = scf.for %arg3 = %c0 to %c4096 step %c32 iter_args(%arg4 = %1) -> (vector<256x256xf16>) {
-    %5 = xegpu.load_nd %3[%c0, %arg3] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
-    %6 = xegpu.load_nd %4[%arg3, %c0] : !xegpu.tensor_desc<32x256xf16> -> vector<32x256xf16>
-    %7 = xegpu.dpas %5, %6, %arg4 : vector<256x32xf16>, vector<32x256xf16>, vector<256x256xf16> -> vector<256x256xf16>
-    scf.yield %7 : vector<256x256xf16>
-  }
-  return
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["xegpu.dpas"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    %1 = transform.get_operand %0[0] : (!transform.any_op) -> !transform.any_value
-    %2 = transform.xegpu.get_desc_op %1 : (!transform.any_value) -> !transform.any_op
-    transform.debug.emit_remark_at %2, "found desc op" : !transform.any_op
-    transform.yield
-  }
-}
-
-// -----
-
-// CHECK-LABEL: @get_desc_op_c
-func.func @get_desc_op_c(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
-  %c32 = arith.constant 32 : index
-  %c4096 = arith.constant 4096 : index
-  %c0 = arith.constant 0 : index
-  // expected-remark @below {{found desc op}}
-  %0 = xegpu.create_nd_tdesc %arg2 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x256xf16>
-  %1 = xegpu.load_nd %0[%c0, %c0]  : !xegpu.tensor_desc<256x256xf16> -> vector<256x256xf16>
-  %3 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
-  %4 = xegpu.create_nd_tdesc %arg1 : memref<4096x4096xf16> -> !xegpu.tensor_desc<32x256xf16>
-  %2 = scf.for %arg3 = %c0 to %c4096 step %c32 iter_args(%arg4 = %1) -> (vector<256x256xf16>) {
-    %5 = xegpu.load_nd %3[%c0, %arg3] : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
-    %6 = xegpu.load_nd %4[%arg3, %c0] : !xegpu.tensor_desc<32x256xf16> -> vector<32x256xf16>
-    %7 = xegpu.dpas %5, %6, %arg4 : vector<256x32xf16>, vector<32x256xf16>, vector<256x256xf16> -> vector<256x256xf16>
-    scf.yield %7 : vector<256x256xf16>
-  }
-  return
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["xegpu.dpas"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    %1 = transform.get_operand %0[2] : (!transform.any_op) -> !transform.any_value
-    %2 = transform.xegpu.get_desc_op %1 : (!transform.any_value) -> !transform.any_op
-    transform.debug.emit_remark_at %2, "found desc op" : !transform.any_op
-    transform.yield
-  }
-}
-
-// -----
-
 // CHECK-LABEL: @get_load_op
 func.func @get_load_op(%arg0: memref<4096x4096xf16>) {
   %c0 = arith.constant 0 : index
diff --git a/mlir/test/python/dialects/transform_xegpu_ext.py b/mlir/test/python/dialects/transform_xegpu_ext.py
index efbe212609f97..e8e8b0c0077bd 100644
--- a/mlir/test/python/dialects/transform_xegpu_ext.py
+++ b/mlir/test/python/dialects/transform_xegpu_ext.py
@@ -16,21 +16,6 @@ def run(f):
     return f
 
 
- at run
-def getDescOp():
-    sequence = transform.SequenceOp(
-        transform.FailurePropagationMode.Propagate,
-        [],
-        transform.OperationType.get("xegpu.dpas"),
-    )
-    with InsertionPoint(sequence.body):
-        operand = transform.GetOperandOp(AnyValueType.get(), sequence.bodyTarget, [0])
-        desc_handle = xegpu.get_desc_op(operand)
-        transform.YieldOp()
-    # CHECK-LABEL: TEST: getDescOp
-    # CHECK: transform.xegpu.get_desc_op %
-
-
 @run
 def getLoadOp():
     sequence = transform.SequenceOp(

>From 0542c482d6bf78e04b26a46565c37f98496a833e Mon Sep 17 00:00:00 2001
From: Tuomas Karna <tuomas.karna at intel.com>
Date: Thu, 19 Mar 2026 18:46:39 +0200
Subject: [PATCH 4/8] remove set_desc_layout

---
 .../XeGPU/TransformOps/XeGPUTransformOps.td   |  70 ------------
 .../XeGPU/TransformOps/XeGPUTransformOps.cpp  | 101 ------------------
 mlir/python/mlir/dialects/transform/xegpu.py  |  73 -------------
 .../Dialect/XeGPU/transform-ops-invalid.mlir  |  16 ---
 mlir/test/Dialect/XeGPU/transform-ops.mlir    |  98 -----------------
 .../python/dialects/transform_xegpu_ext.py    |  73 -------------
 6 files changed, 431 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
index 03808ee091dfc..d5b699d49c7e1 100644
--- a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
@@ -34,76 +34,6 @@ def GetLoadOp : Op<Transform_Dialect, "xegpu.get_load_op", [
   let assemblyFormat = "$target attr-dict `:` functional-type(operands, results)";
 }
 
-def SetDescLayoutOp : Op<Transform_Dialect, "xegpu.set_desc_layout", [
-  AttrSizedOperandSegments,
-  DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
-  TransformOpInterface
-]> {
-
-  let summary = "Set xegpu.layout attribute to a xegpu.create_nd_desc op result.";
-  let description = [{
-    Given an `xegpu.create_nd_desc` operation, this transform adds
-    `xegpu.layout` attribute to the result tensor descriptor. The layout is
-    defined by the `sg_layout`, and `sg_data` and optional `inst_data`
-    attributes. If `slice_dims` is provided, the `xegpu.layout` attribute is
-    wrapped in an `xegpu.slice<..., dims=slice_dims>` attribute. Returns a handle to
-    the transformed op.
-  }];
-
-  let arguments = (ins
-                   TransformHandleTypeInterface:$target,
-                   Variadic<TransformAnyParamTypeOrAnyHandle>:$sg_layout,
-                   Variadic<TransformAnyParamTypeOrAnyHandle>:$sg_data,
-                   Variadic<TransformAnyParamTypeOrAnyHandle>:$inst_data,
-                   DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_sg_layout,
-                   DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_sg_data,
-                   DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$static_inst_data,
-                   DefaultValuedOptionalAttr<DenseI32ArrayAttr, "{}">:$order,
-                   DefaultValuedOptionalAttr<DenseI64ArrayAttr, "{}">:$slice_dims
-                   );
-
-  let results = (outs TransformHandleTypeInterface:$transformed);
-  let builders = [
-    OpBuilder<(ins "Value":$target,
-                   "ArrayRef<OpFoldResult>":$mixedSgLayout,
-                   "ArrayRef<OpFoldResult>":$mixedSgData,
-                   "ArrayRef<OpFoldResult>":$mixedInstData,
-                   "ArrayRef<int32_t>":$order,
-                   "ArrayRef<int64_t>":$sliceDims
-                   )>,
-  ];
-
-  let assemblyFormat = [{
-    $target
-    `sg_layout` `=` custom<DynamicIndexList>($sg_layout, $static_sg_layout)
-    `sg_data` `=` custom<DynamicIndexList>($sg_data, $static_sg_data)
-    (`inst_data` `=` custom<DynamicIndexList>($inst_data, $static_inst_data)^)?
-    (`order` `=` $order^)?
-    (`slice_dims` `=` $slice_dims^)?
-    attr-dict `:` functional-type(operands, results)
-  }];
-
-  let extraClassDeclaration = [{
-    ::mlir::DiagnosedSilenceableFailure apply(
-        ::mlir::transform::TransformRewriter &rewriter,
-        ::mlir::transform::TransformResults &transformResults,
-        ::mlir::transform::TransformState &state);
-
-    ::llvm::SmallVector<::mlir::OpFoldResult> getMixedSgLayout() {
-      Builder b(getContext());
-      return getMixedValues(getStaticSgLayout(), getSgLayout(), b);
-    }
-    ::llvm::SmallVector<::mlir::OpFoldResult> getMixedSgData() {
-      Builder b(getContext());
-      return getMixedValues(getStaticSgData(), getSgData(), b);
-    }
-    ::llvm::SmallVector<::mlir::OpFoldResult> getMixedInstData() {
-      Builder b(getContext());
-      return getMixedValues(getStaticInstData(), getInstData(), b);
-    }
-  }];
-}
-
 def SetAnchorLayoutOp : Op<Transform_Dialect, "xegpu.set_anchor_layout", [
   AttrSizedOperandSegments,
   DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
index 6f543be356527..5b66983774985 100644
--- a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
@@ -165,28 +165,6 @@ getLayoutAttrFromOperands(MLIRContext *ctx, transform::TransformState &state,
   return DiagnosedSilenceableFailure::success();
 }
 
-/// Replace xegpu.create_nd_desc op with a new one with the given layout.
-static xegpu::CreateNdDescOp
-setDescLayout(transform::TransformRewriter &rewriter,
-              xegpu::CreateNdDescOp descOp,
-              xegpu::DistributeLayoutAttr layout) {
-  assert(descOp.getMixedOffsets().size() == 0 &&
-         "create desc op with offsets is not supported");
-  auto oldTensorDesc = descOp.getType();
-  auto descType = xegpu::TensorDescType::get(
-      oldTensorDesc.getShape(), oldTensorDesc.getElementType(),
-      /*array_length=*/oldTensorDesc.getArrayLength(),
-      /*boundary_check=*/oldTensorDesc.getBoundaryCheck(),
-      /*memory_space=*/oldTensorDesc.getMemorySpace(),
-      /*layout=*/layout);
-
-  rewriter.setInsertionPointAfter(descOp);
-  auto newDescOp = rewriter.replaceOpWithNewOp<xegpu::CreateNdDescOp>(
-      descOp, descType, descOp.getSource(), descOp.getMixedSizes(),
-      descOp.getMixedStrides());
-  return newDescOp;
-}
-
 DiagnosedSilenceableFailure
 transform::GetLoadOp::apply(transform::TransformRewriter &rewriter,
                               transform::TransformResults &results,
@@ -218,85 +196,6 @@ transform::GetLoadOp::apply(transform::TransformRewriter &rewriter,
   return DiagnosedSilenceableFailure::success();
 }
 
-void transform::SetDescLayoutOp::build(OpBuilder &builder,
-                                       OperationState &result, Value target,
-                                       ArrayRef<OpFoldResult> mixedSgLayout,
-                                       ArrayRef<OpFoldResult> mixedSgData,
-                                       ArrayRef<OpFoldResult> mixedInstData,
-                                       ArrayRef<int32_t> order,
-                                       ArrayRef<int64_t> sliceDims) {
-  SmallVector<int64_t> staticSgLayout, staticSgData, staticInstData;
-  SmallVector<Value> dynamicSgLayout, dynamicSgData, dynamicInstData;
-  dispatchIndexOpFoldResults(mixedSgLayout, dynamicSgLayout, staticSgLayout);
-  dispatchIndexOpFoldResults(mixedSgData, dynamicSgData, staticSgData);
-  dispatchIndexOpFoldResults(mixedInstData, dynamicInstData, staticInstData);
-  build(builder, result, target.getType(),
-        /*target=*/target,
-        /*sg_layout=*/dynamicSgLayout,
-        /*sg_data=*/dynamicSgData,
-        /*inst_data=*/dynamicInstData,
-        /*static_sg_layout=*/staticSgLayout,
-        /*static_sg_data=*/staticSgData,
-        /*static_inst_data=*/staticInstData,
-        /*order=*/order,
-        /*slice_dims=*/sliceDims);
-}
-
-DiagnosedSilenceableFailure
-transform::SetDescLayoutOp::apply(transform::TransformRewriter &rewriter,
-                                  transform::TransformResults &results,
-                                  transform::TransformState &state) {
-  auto targetOps = state.getPayloadOps(getTarget());
-  if (!llvm::hasSingleElement(targetOps)) {
-    return emitDefiniteFailure() << "requires exactly one targetOp handle (got "
-                                 << llvm::range_size(targetOps) << ")";
-  }
-  Operation *target = *targetOps.begin();
-
-  xegpu::LayoutAttr layoutAttr = nullptr;
-  auto status = getLayoutAttrFromOperands(
-      getContext(), state, (*this), getMixedSgLayout(), getMixedSgData(),
-      getMixedInstData(), getOrder(), layoutAttr);
-  if (!status.succeeded())
-    return status;
-
-  xegpu::DistributeLayoutAttr layout = layoutAttr;
-  auto sliceDims = getSliceDims();
-  if (sliceDims.size() > 0) {
-    // Wrap layoutAttr in a slice attribute.
-    layout = xegpu::SliceAttr::get(
-        getContext(), layout, DenseI64ArrayAttr::get(getContext(), sliceDims));
-  }
-
-  // For now only create_nd_desc op is supported.
-  auto descOp = dyn_cast<xegpu::CreateNdDescOp>(target);
-  if (!descOp) {
-    auto diag = emitSilenceableFailure(getLoc())
-                << "Expected a xegpu.create_nd_desc op, but got: "
-                << target->getName();
-    diag.attachNote(target->getLoc()) << "target op";
-    return diag;
-  }
-
-  // Set layout attr in desc op's return type. Replaces old desc op.
-  auto newdescOp = setDescLayout(rewriter, descOp, layout);
-
-  // Map result handles.
-  results.set(cast<OpResult>(getTransformed()), {newdescOp.getOperation()});
-
-  return DiagnosedSilenceableFailure::success();
-}
-
-void transform::SetDescLayoutOp::getEffects(
-    ::llvm::SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
-  consumesHandle(getTargetMutable(), effects);
-  onlyReadsHandle(getSgLayoutMutable(), effects);
-  onlyReadsHandle(getSgDataMutable(), effects);
-  onlyReadsHandle(getInstDataMutable(), effects);
-  producesHandle(getOperation()->getOpResults(), effects);
-  modifiesPayload(effects);
-}
-
 void transform::SetAnchorLayoutOp::build(
     OpBuilder &builder, OperationState &ostate, Value target, int64_t index,
     ArrayRef<OpFoldResult> mixedSgLayout, ArrayRef<OpFoldResult> mixedSgData,
diff --git a/mlir/python/mlir/dialects/transform/xegpu.py b/mlir/python/mlir/dialects/transform/xegpu.py
index 3984a1fc52232..00ffc65900e33 100644
--- a/mlir/python/mlir/dialects/transform/xegpu.py
+++ b/mlir/python/mlir/dialects/transform/xegpu.py
@@ -51,79 +51,6 @@ def get_load_op(
     return GetLoadOp(target, loc=loc, ip=ip).result
 
 
- at _ods_cext.register_operation(_Dialect, replace=True)
-class SetDescLayoutOp(SetDescLayoutOp):
-    """Specialization for SetDescLayoutOp class."""
-
-    def __init__(
-        self,
-        target: Union[Operation, Value],
-        sg_layout: MixedValues,
-        sg_data: MixedValues,
-        *,
-        inst_data: Optional[MixedValues] = None,
-        order: Optional[MixedInt] = None,
-        slice_dims: Optional[MixedInt] = None,
-        loc=None,
-        ip=None,
-    ):
-        target_handle = _get_op_result_or_value(target)
-        inst_data = [] if inst_data is None else inst_data
-        (
-            dynamic_sg_layout,
-            static_sg_layout,
-            _,
-        ) = _dispatch_dynamic_index_list(sg_layout)
-        (
-            dynamic_sg_data,
-            static_sg_data,
-            _,
-        ) = _dispatch_dynamic_index_list(sg_data)
-        (
-            dynamic_inst_data,
-            static_inst_data,
-            _,
-        ) = _dispatch_dynamic_index_list(inst_data)
-
-        super().__init__(
-            target_handle.type,
-            target_handle,
-            dynamic_sg_layout,
-            dynamic_sg_data,
-            dynamic_inst_data,
-            static_sg_layout=static_sg_layout,
-            static_sg_data=static_sg_data,
-            static_inst_data=static_inst_data,
-            order=order,
-            slice_dims=slice_dims,
-            loc=loc,
-            ip=ip,
-        )
-
-
-def set_desc_layout(
-    target: Union[Operation, Value],
-    sg_layout: MixedValues,
-    sg_data: MixedValues,
-    *,
-    inst_data: Optional[MixedValues] = None,
-    order: Optional[MixedInt] = None,
-    slice_dims: Optional[MixedInt] = None,
-    loc=None,
-    ip=None,
-) -> OpResult:
-    return SetDescLayoutOp(
-        target,
-        sg_layout,
-        sg_data,
-        inst_data=inst_data,
-        order=order,
-        slice_dims=slice_dims,
-        loc=loc,
-        ip=ip,
-    ).result
-
-
 @_ods_cext.register_operation(_Dialect, replace=True)
 class SetAnchorLayoutOp(SetAnchorLayoutOp):
     """Specialization for SetAnchorLayoutOp class."""
diff --git a/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
index 4c1ad80131006..c6f4dc5fb6bf8 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
@@ -1,21 +1,5 @@
 // RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics
 
-func.func @set_desc_layout(%arg0: memref<4096x4096xf16>) {
-  %c32 = arith.constant 32 : index // expected-note {{target op}}
-  return
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["arith.constant"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // expected-error at below {{Expected a xegpu.create_nd_desc op, but got: arith.constant}}
-    %1 = transform.xegpu.set_desc_layout %0 sg_layout = [8, 4] sg_data = [32, 32] : (!transform.any_op) -> !transform.any_op
-    transform.yield
-  }
-}
-
-// -----
-
 // CHECK-LABEL: @set_anchor_layout_multiple
 func.func @set_anchor_layout_multiple(%arg0: memref<4096x4096xf16>) {
   %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
diff --git a/mlir/test/Dialect/XeGPU/transform-ops.mlir b/mlir/test/Dialect/XeGPU/transform-ops.mlir
index 1c5dc8a2e0641..f4fa2962f7b53 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops.mlir
@@ -79,104 +79,6 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 
-// CHECK-LABEL: @set_desc_layout
-func.func @set_desc_layout(%arg0: memref<4096x4096xf16>) {
-  // CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0
-  // CHECK-SAME: #xegpu.block_tdesc_attr<boundary_check = false>
-  // CHECK-SAME: #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], inst_data = [8, 16]>>
-  %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16, #xegpu.block_tdesc_attr<boundary_check = false>>
-  return
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["xegpu.create_nd_tdesc"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // CHECK: transform.xegpu.set_desc_layout %{{.*}}
-    %1 = transform.xegpu.set_desc_layout %0 sg_layout = [8, 4] sg_data = [32, 32] inst_data = [8, 16] : (!transform.any_op) -> !transform.any_op
-    transform.yield
-  }
-}
-
-// -----
-
-// CHECK-LABEL: @set_desc_layout_minimal
-func.func @set_desc_layout_minimal(%arg0: memref<4096x4096xf16>) {
-  // CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0
-  // CHECK-SAME: #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>>
-  %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
-  return
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["xegpu.create_nd_tdesc"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // CHECK: transform.xegpu.set_desc_layout %{{.*}}
-    %1 = transform.xegpu.set_desc_layout %0 sg_layout = [8, 4] sg_data = [32, 32] : (!transform.any_op) -> !transform.any_op
-    transform.yield
-  }
-}
-
-// -----
-
-// CHECK-LABEL: @set_desc_layout_param
-func.func @set_desc_layout_param(%arg0: memref<4096x4096xf16>) {
-  // CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0
-  // CHECK-SAME: #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], inst_data = [8, 16]>>
-  %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
-  return
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["xegpu.create_nd_tdesc"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // CHECK: transform.xegpu.set_desc_layout %{{.*}}
-    %layout0 = transform.param.constant 8 : i64 -> !transform.param<i64>
-    %1 = transform.xegpu.set_desc_layout %0 sg_layout = [%layout0, 4] sg_data = [32, 32] inst_data = [8, 16] : (!transform.any_op, !transform.param<i64>) -> !transform.any_op
-    transform.yield
-  }
-}
-
-// -----
-
-// CHECK-LABEL: @set_desc_layout_slice
-func.func @set_desc_layout_slice(%arg0: memref<4096xf16>) {
-  // CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0
-  // CHECK-SAME: #xegpu.slice<#xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32]>, dims = [0]>
-  %0 = xegpu.create_nd_tdesc %arg0 : memref<4096xf16> -> !xegpu.tensor_desc<256xf16>
-  return
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["xegpu.create_nd_tdesc"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // CHECK: transform.xegpu.set_desc_layout %{{.*}}
-    %1 = transform.xegpu.set_desc_layout %0 sg_layout = [8, 4] sg_data = [32, 32] slice_dims = [0] : (!transform.any_op) -> !transform.any_op
-    transform.yield
-  }
-}
-
-// -----
-
-// CHECK-LABEL: @set_desc_layout_order
-func.func @set_desc_layout_order(%arg0: memref<4096x4096xf16>) {
-  // CHECK: %[[V0:.+]] = xegpu.create_nd_tdesc %arg0
-  // CHECK-SAME: #xegpu.block_tdesc_attr<boundary_check = false>
-  // CHECK-SAME: #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], inst_data = [8, 16], order = [1, 0]>
-  %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16, #xegpu.block_tdesc_attr<boundary_check = false>>
-  return
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["xegpu.create_nd_tdesc"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // CHECK: transform.xegpu.set_desc_layout %{{.*}}
-    %1 = transform.xegpu.set_desc_layout %0 sg_layout = [8, 4] sg_data = [32, 32] inst_data = [8, 16] order = [1, 0] : (!transform.any_op) -> !transform.any_op
-    transform.yield
-  }
-}
-
-// -----
-
 // CHECK-LABEL: @set_anchor_layout
 func.func @set_anchor_layout(%arg0: memref<4096x4096xf16>) {
   %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
diff --git a/mlir/test/python/dialects/transform_xegpu_ext.py b/mlir/test/python/dialects/transform_xegpu_ext.py
index e8e8b0c0077bd..4eff766b81bb7 100644
--- a/mlir/test/python/dialects/transform_xegpu_ext.py
+++ b/mlir/test/python/dialects/transform_xegpu_ext.py
@@ -31,79 +31,6 @@ def getLoadOp():
     # CHECK: transform.xegpu.get_load_op %
 
 
- at run
-def setDescLayoutMinimal():
-    sequence = transform.SequenceOp(
-        transform.FailurePropagationMode.Propagate,
-        [],
-        transform.OperationType.get("xegpu.create_nd_tdesc"),
-    )
-    with InsertionPoint(sequence.body):
-        xegpu.set_desc_layout(sequence.bodyTarget, sg_layout=[6, 4], sg_data=[32, 16])
-        transform.YieldOp()
-    # CHECK-LABEL: TEST: setDescLayoutMinimal
-    # CHECK: %0 = transform.xegpu.set_desc_layout %
-    # CHECK: sg_layout = [6, 4]
-    # CHECK: sg_data = [32, 16]
-
-
- at run
-def setDescLayoutInstData():
-    sequence = transform.SequenceOp(
-        transform.FailurePropagationMode.Propagate,
-        [],
-        transform.OperationType.get("xegpu.create_nd_tdesc"),
-    )
-    with InsertionPoint(sequence.body):
-        xegpu.set_desc_layout(
-            sequence.bodyTarget, sg_layout=[6, 4], sg_data=[32, 16], inst_data=[8, 16]
-        )
-        transform.YieldOp()
-    # CHECK-LABEL: TEST: setDescLayoutInstData
-    # CHECK: %0 = transform.xegpu.set_desc_layout %
-    # CHECK: sg_layout = [6, 4]
-    # CHECK: sg_data = [32, 16]
-    # CHECK: inst_data = [8, 16]
-
-
- at run
-def setDescLayoutSlice():
-    sequence = transform.SequenceOp(
-        transform.FailurePropagationMode.Propagate,
-        [],
-        transform.OperationType.get("xegpu.create_nd_tdesc"),
-    )
-    with InsertionPoint(sequence.body):
-        xegpu.set_desc_layout(
-            sequence.bodyTarget, sg_layout=[6, 4], sg_data=[32, 16], slice_dims=[0]
-        )
-        transform.YieldOp()
-    # CHECK-LABEL: TEST: setDescLayoutSlice
-    # CHECK: %0 = transform.xegpu.set_desc_layout %
-    # CHECK: sg_layout = [6, 4]
-    # CHECK: sg_data = [32, 16]
-    # CHECK: slice_dims = [0]
-
-
- at run
-def setDescLayoutOrder():
-    sequence = transform.SequenceOp(
-        transform.FailurePropagationMode.Propagate,
-        [],
-        transform.OperationType.get("xegpu.create_nd_tdesc"),
-    )
-    with InsertionPoint(sequence.body):
-        xegpu.set_desc_layout(
-            sequence.bodyTarget, sg_layout=[6, 4], sg_data=[32, 16], order=[0, 1]
-        )
-        transform.YieldOp()
-    # CHECK-LABEL: TEST: setDescLayoutOrder
-    # CHECK: %0 = transform.xegpu.set_desc_layout %
-    # CHECK: sg_layout = [6, 4]
-    # CHECK: sg_data = [32, 16]
-    # CHECK: order = [0, 1]
-
-
 @run
 def setAnchorLayout():
     sequence = transform.SequenceOp(

>From 3120e1955c3eb246164a59fce9de93b140b7b8e5 Mon Sep 17 00:00:00 2001
From: Tuomas Karna <tuomas.karna at intel.com>
Date: Thu, 19 Mar 2026 20:32:30 +0200
Subject: [PATCH 5/8] set_anchor_layout accepts multiple handles

---
 .../XeGPU/TransformOps/XeGPUTransformOps.cpp  | 58 +++++++++----------
 .../Dialect/XeGPU/transform-ops-invalid.mlir  | 19 ------
 mlir/test/Dialect/XeGPU/transform-ops.mlir    | 23 ++++++++
 3 files changed, 51 insertions(+), 49 deletions(-)

diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
index 5b66983774985..a0185dc7d65b0 100644
--- a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
@@ -224,14 +224,9 @@ transform::SetAnchorLayoutOp::apply(transform::TransformRewriter &rewriter,
                                     transform::TransformResults &results,
                                     transform::TransformState &state) {
   auto targetOps = state.getPayloadOps(getTarget());
-  if (!llvm::hasSingleElement(targetOps)) {
-    return emitDefiniteFailure() << "Requires exactly one targetOp handle (got "
-                                 << llvm::range_size(targetOps) << ")";
-  }
-  Operation *target = *targetOps.begin();
-
   int64_t index = getIndex();
 
+  // Construct layout attribute.
   xegpu::LayoutAttr layoutAttr = nullptr;
   auto status = getLayoutAttrFromOperands(
       getContext(), state, (*this), getMixedSgLayout(), getMixedSgData(),
@@ -247,31 +242,34 @@ transform::SetAnchorLayoutOp::apply(transform::TransformRewriter &rewriter,
         getContext(), layout, DenseI64ArrayAttr::get(getContext(), sliceDims));
   }
 
-  // Set layout attribute
-  if (auto dpasOp = dyn_cast<xegpu::DpasOp>(target)) {
-    // dpas op is a special case where layout needs to be set for A, B, and C
-    if (index == 0)
-      dpasOp.getProperties().layout_a = layout;
-    else if (index == 1)
-      dpasOp.getProperties().layout_b = layout;
-    else if (index == 2)
-      dpasOp.getProperties().layout_cd = layout;
-    else {
-      auto diag = emitSilenceableFailure(getLoc())
-                  << "Invalid index for setting dpas op layout: " << index;
-      diag.attachNote(target->getLoc()) << "target op";
-      return diag;
-    }
-  } else {
-    // op's anchor layout.
-    auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(target);
-    if (!anchorOp) {
-      auto diag = emitSilenceableFailure(getLoc())
-                  << "Cannot set anchor layout to op: " << target->getName();
-      diag.attachNote(target->getLoc()) << "target op";
-      return diag;
+  // Apply the layout to all target ops.
+  for (Operation *target : targetOps) {
+    // Set layout attribute
+    if (auto dpasOp = dyn_cast<xegpu::DpasOp>(target)) {
+      // dpas op is a special case where layout needs to be set for A, B, and C
+      if (index == 0)
+        dpasOp.getProperties().layout_a = layout;
+      else if (index == 1)
+        dpasOp.getProperties().layout_b = layout;
+      else if (index == 2)
+        dpasOp.getProperties().layout_cd = layout;
+      else {
+        auto diag = emitSilenceableFailure(getLoc())
+                    << "Invalid index for setting dpas op layout: " << index;
+        diag.attachNote(target->getLoc()) << "target op";
+        return diag;
+      }
+    } else {
+      // op's anchor layout.
+      auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(target);
+      if (!anchorOp) {
+        auto diag = emitSilenceableFailure(getLoc())
+                    << "Cannot set anchor layout to op: " << target->getName();
+        diag.attachNote(target->getLoc()) << "target op";
+        return diag;
+      }
+      anchorOp.setAnchorLayout(layout);
     }
-    anchorOp.setAnchorLayout(layout);
   }
   return DiagnosedSilenceableFailure::success();
 }
diff --git a/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
index c6f4dc5fb6bf8..5c604f1ba12cf 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
@@ -1,24 +1,5 @@
 // RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics
 
-// CHECK-LABEL: @set_anchor_layout_multiple
-func.func @set_anchor_layout_multiple(%arg0: memref<4096x4096xf16>) {
-  %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
-  %1 = xegpu.load_nd %0[0, 0]  : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
-  %2 = xegpu.load_nd %0[0, 0]  : !xegpu.tensor_desc<256x32xf16> -> vector<256x32xf16>
-  return
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["xegpu.load_nd"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    // expected-error at below {{Requires exactly one targetOp handle (got 2)}}
-    transform.xegpu.set_anchor_layout %0 sg_layout = [8, 4] sg_data = [32, 64] : !transform.any_op
-    transform.yield
-  }
-}
-
-// -----
-
 // CHECK-LABEL: @set_anchor_layout_not_anchor_op
 func.func @set_anchor_layout_not_anchor_op(%arg0: memref<4096x4096xf16>) {
   %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
diff --git a/mlir/test/Dialect/XeGPU/transform-ops.mlir b/mlir/test/Dialect/XeGPU/transform-ops.mlir
index f4fa2962f7b53..0ad598a6bcd2b 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops.mlir
@@ -99,6 +99,29 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 
+// CHECK-LABEL: @set_anchor_layout_multiple
+func.func @set_anchor_layout_multiple(%arg0: memref<4096x4096xf16>) {
+  %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>
+  // CHECK: xegpu.prefetch_nd %0[0, 0]
+  // CHECK-SAME: <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}>
+  // CHECK: xegpu.prefetch_nd %0[16, 0]
+  // CHECK-SAME: <{layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 64], inst_data = [8, 16]>}>
+  xegpu.prefetch_nd %0[0, 0] : !xegpu.tensor_desc<256x32xf16>
+  xegpu.prefetch_nd %0[16, 0] : !xegpu.tensor_desc<256x32xf16>
+  return
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["xegpu.prefetch_nd"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    // CHECK: transform.xegpu.set_anchor_layout %{{.*}}
+    transform.xegpu.set_anchor_layout %0 index = 0 sg_layout = [8, 4] sg_data = [32, 64] inst_data = [8, 16] : !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
 // CHECK-LABEL: @set_anchor_layout_param
 func.func @set_anchor_layout_param(%arg0: memref<4096x4096xf16>) {
   %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf16> -> !xegpu.tensor_desc<256x32xf16>

>From d979b8572242cf2da87f5f6166d5abb785ab28b7 Mon Sep 17 00:00:00 2001
From: Tuomas Karna <tuomas.karna at intel.com>
Date: Thu, 19 Mar 2026 21:04:47 +0200
Subject: [PATCH 6/8] insert_prefetch op takes xegpu.load_np op handle instead
 of value

---
 .../XeGPU/TransformOps/XeGPUTransformOps.td   | 14 ++++----
 .../XeGPU/TransformOps/XeGPUTransformOps.cpp  | 24 +++++++------
 mlir/python/mlir/dialects/transform/xegpu.py  |  4 +--
 .../Dialect/XeGPU/transform-ops-invalid.mlir  |  3 +-
 mlir/test/Dialect/XeGPU/transform-ops.mlir    |  6 ++--
 .../python/dialects/transform_xegpu_ext.py    | 36 ++++++-------------
 6 files changed, 38 insertions(+), 49 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
index d5b699d49c7e1..2b11d8f8884ed 100644
--- a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
@@ -150,17 +150,15 @@ def InsertPrefetchOp : Op<Transform_Dialect, "xegpu.insert_prefetch", [
   TransformOpInterface
 ]> {
 
-  let summary = "Adds xegpu prefetch ops to matmul operand tiles.";
+  let summary = "Adds xegpu prefetch ops to a load op.";
   let description = [{
-    Given a target value (e.g., `vector`) residing in a `scf.for` loop, this
-    transform finds the corresponding `xegpu.load_nd` op and inserts
-    `xegpu.prefetch_nd` operations for the tile. The load op must reside within
-    the `scf.for` loop. Number of prefetch steps is set by the `nb_prefetch`
-    argument (default value is 1). Returns a handle to the created
-    `xegpu.create_nd_desc` op.
+    Inserts `xegpu.prefetch_nd` operations for the given `xegpu.load_nd` op.
+    The load op must reside within the `scf.for` loop. Number of prefetch steps
+    is set by the `nb_prefetch` argument (default value is 1). Returns a handle
+    to the created `xegpu.create_nd_desc` op.
   }];
 
-  let arguments = (ins TransformValueHandleTypeInterface:$target,
+  let arguments = (ins TransformHandleTypeInterface:$target,
                    Optional<TransformAnyParamTypeOrAnyHandle>:$dynamic_nb_prefetch,
                    DefaultValuedOptionalAttr<I64Attr, "1">:$static_nb_prefetch
                    );
diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
index a0185dc7d65b0..608ed5c555420 100644
--- a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
@@ -350,12 +350,12 @@ DiagnosedSilenceableFailure
 transform::InsertPrefetchOp::apply(transform::TransformRewriter &rewriter,
                                    transform::TransformResults &results,
                                    transform::TransformState &state) {
-  auto targetValues = state.getPayloadValues(getTarget());
-  if (!llvm::hasSingleElement(targetValues))
+  auto targetOps = state.getPayloadOps(getTarget());
+  if (!llvm::hasSingleElement(targetOps))
     return emitDefiniteFailure()
-           << "requires exactly one target value handle (got "
-           << llvm::range_size(targetValues) << ")";
-  auto value = *targetValues.begin();
+           << "requires exactly one target op handle (got "
+           << llvm::range_size(targetOps) << ")";
+  auto target = *targetOps.begin();
 
   int64_t nbPrefetch = getStaticNbPrefetch();
   if (getDynamicNbPrefetch()) {
@@ -374,11 +374,13 @@ transform::InsertPrefetchOp::apply(transform::TransformRewriter &rewriter,
     return emitSilenceableFailure(getLoc())
            << "nb_prefetch must be a positive integer.";
 
-  // Find load operation of the operand.
-  auto maybeLoadOp = findProducerOfType<xegpu::LoadNdOp>(value);
-  if (!maybeLoadOp)
-    return emitSilenceableFailure(getLoc()) << "Could not find load op.";
-  auto loadOp = *maybeLoadOp;
+  // Cast target to load op.
+  auto maybeLoadOp = dyn_cast<xegpu::LoadNdOp>(target);
+  if (!maybeLoadOp) {
+    return emitSilenceableFailure(getLoc()) << "Expected xegpu.load_nd op, got "
+                                          << target->getName();
+  }
+  auto loadOp = maybeLoadOp;
   if (loadOp.getMixedOffsets().size() == 0) {
     auto diag = emitSilenceableFailure(getLoc())
                 << "Load op must have offsets.";
@@ -396,7 +398,7 @@ transform::InsertPrefetchOp::apply(transform::TransformRewriter &rewriter,
   }
 
   // Find descriptor op.
-  auto maybeDescOp = findProducerOfType<xegpu::CreateNdDescOp>(value);
+  auto maybeDescOp = findProducerOfType<xegpu::CreateNdDescOp>(loadOp.getResult());
   if (!maybeDescOp)
     return emitSilenceableFailure(getLoc()) << "Could not find descriptor op.";
   auto descOp = *maybeDescOp;
diff --git a/mlir/python/mlir/dialects/transform/xegpu.py b/mlir/python/mlir/dialects/transform/xegpu.py
index 00ffc65900e33..6e27e5c8ecfa6 100644
--- a/mlir/python/mlir/dialects/transform/xegpu.py
+++ b/mlir/python/mlir/dialects/transform/xegpu.py
@@ -168,7 +168,7 @@ class InsertPrefetchOp(InsertPrefetchOp):
 
     def __init__(
         self,
-        target: Value,
+        target: Union[Operation, Value],
         *,
         nb_prefetch: Optional[MixedInt] = 1,
         loc=None,
@@ -194,7 +194,7 @@ def __init__(
 
 
 def insert_prefetch(
-    target: Value,
+    target: Union[Operation, Value],
     *,
     nb_prefetch: Optional[MixedInt] = 1,
     loc=None,
diff --git a/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
index 5c604f1ba12cf..ba259f311d76e 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops-invalid.mlir
@@ -95,8 +95,9 @@ module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
     %0 = transform.structured.match ops{["xegpu.dpas"]} in %arg0 : (!transform.any_op) -> !transform.any_op
     %1 = transform.get_operand %0[2] : (!transform.any_op) -> !transform.any_value
+    %2 = transform.xegpu.get_load_op %1 : (!transform.any_value) -> !transform.any_op
     // expected-error at below {{Load op is not contained in a scf.for loop.}}
-    %2 = transform.xegpu.insert_prefetch %1 nb_prefetch = 1 : (!transform.any_value) -> !transform.any_op
+    %3 = transform.xegpu.insert_prefetch %2 nb_prefetch = 1 : (!transform.any_op) -> !transform.any_op
     transform.yield
   }
 }
diff --git a/mlir/test/Dialect/XeGPU/transform-ops.mlir b/mlir/test/Dialect/XeGPU/transform-ops.mlir
index 0ad598a6bcd2b..acba80d870253 100644
--- a/mlir/test/Dialect/XeGPU/transform-ops.mlir
+++ b/mlir/test/Dialect/XeGPU/transform-ops.mlir
@@ -372,8 +372,9 @@ module attributes {transform.with_named_sequence} {
     %func = transform.structured.match ops{["func.func"]} in %arg0 : (!transform.any_op) -> !transform.any_op
     %0 = transform.structured.match ops{["xegpu.dpas"]} in %func : (!transform.any_op) -> !transform.any_op
     %1 = transform.get_operand %0[0] : (!transform.any_op) -> !transform.any_value
+    %2 = transform.xegpu.get_load_op %1 : (!transform.any_value) -> !transform.any_op
     // CHECK: transform.xegpu.insert_prefetch %{{.*}}
-    %2 = transform.xegpu.insert_prefetch %1 nb_prefetch = 1 : (!transform.any_value) -> !transform.any_op
+    %3 = transform.xegpu.insert_prefetch %2 nb_prefetch = 1 : (!transform.any_op) -> !transform.any_op
     transform.apply_patterns to %func {
       transform.apply_patterns.canonicalization
     } : !transform.any_op
@@ -419,9 +420,10 @@ module attributes {transform.with_named_sequence} {
     %func = transform.structured.match ops{["func.func"]} in %arg0 : (!transform.any_op) -> !transform.any_op
     %0 = transform.structured.match ops{["xegpu.dpas"]} in %func : (!transform.any_op) -> !transform.any_op
     %1 = transform.get_operand %0[0] : (!transform.any_op) -> !transform.any_value
+    %2 = transform.xegpu.get_load_op %1 : (!transform.any_value) -> !transform.any_op
     %nb = transform.param.constant 2 : i64 -> !transform.param<i64>
     // CHECK: transform.xegpu.insert_prefetch %{{.*}}
-    %2 = transform.xegpu.insert_prefetch %1 nb_prefetch = %nb :  (!transform.any_value, !transform.param<i64>) -> !transform.any_op
+    %3 = transform.xegpu.insert_prefetch %2 nb_prefetch = %nb :  (!transform.any_op, !transform.param<i64>) -> !transform.any_op
     transform.apply_patterns to %func {
       transform.apply_patterns.canonicalization
     } : !transform.any_op
diff --git a/mlir/test/python/dialects/transform_xegpu_ext.py b/mlir/test/python/dialects/transform_xegpu_ext.py
index 4eff766b81bb7..5d5db1919af14 100644
--- a/mlir/test/python/dialects/transform_xegpu_ext.py
+++ b/mlir/test/python/dialects/transform_xegpu_ext.py
@@ -144,21 +144,17 @@ def setGPULaunchThreadsOp():
 
 
 @run
-def insertPrefetch0():
+def insertPrefetch():
     sequence = transform.SequenceOp(
         transform.FailurePropagationMode.Propagate,
         [],
-        transform.OperationType.get("xegpu.dpas"),
+        transform.OperationType.get("xegpu.load_nd"),
     )
     with InsertionPoint(sequence.body):
-        operand = transform.GetOperandOp(AnyValueType.get(), sequence.bodyTarget, [0])
-        xegpu.insert_prefetch(
-            operand,
-        )
+        xegpu.insert_prefetch(sequence.bodyTarget)
         transform.YieldOp()
-    # CHECK-LABEL: TEST: insertPrefetch0
-    # CHECK: %[[OPR:.*]] = get_operand
-    # CHECK: transform.xegpu.insert_prefetch %[[OPR]]
+    # CHECK-LABEL: TEST: insertPrefetch
+    # CHECK: transform.xegpu.insert_prefetch
 
 
 @run
@@ -166,18 +162,13 @@ def insertPrefetchNbPrefetch():
     sequence = transform.SequenceOp(
         transform.FailurePropagationMode.Propagate,
         [],
-        transform.OperationType.get("xegpu.dpas"),
+        transform.OperationType.get("xegpu.load_nd"),
     )
     with InsertionPoint(sequence.body):
-        operand = transform.GetOperandOp(AnyValueType.get(), sequence.bodyTarget, [0])
-        xegpu.insert_prefetch(
-            operand,
-            nb_prefetch=2,
-        )
+        xegpu.insert_prefetch(sequence.bodyTarget, nb_prefetch=2)
         transform.YieldOp()
     # CHECK-LABEL: TEST: insertPrefetchNbPrefetch
-    # CHECK: %[[OPR:.*]] = get_operand
-    # CHECK: transform.xegpu.insert_prefetch %[[OPR]]
+    # CHECK: transform.xegpu.insert_prefetch
     # CHECK-SAME: nb_prefetch = 2
 
 
@@ -186,25 +177,20 @@ def insertPrefetchNbPrefetchParam():
     sequence = transform.SequenceOp(
         transform.FailurePropagationMode.Propagate,
         [],
-        transform.OperationType.get("xegpu.dpas"),
+        transform.OperationType.get("xegpu.load_nd"),
     )
     with InsertionPoint(sequence.body):
-        operand = transform.GetOperandOp(AnyValueType.get(), sequence.bodyTarget, [0])
         int32_t = IntegerType.get_signless(32)
         param_int32_t = transform.ParamType.get(int32_t)
         nb_param = transform.ParamConstantOp(
             param_int32_t,
             IntegerAttr.get(int32_t, 2),
         )
-        xegpu.insert_prefetch(
-            operand,
-            nb_prefetch=nb_param,
-        )
+        xegpu.insert_prefetch(sequence.bodyTarget, nb_prefetch=nb_param)
         transform.YieldOp()
     # CHECK-LABEL: TEST: insertPrefetchNbPrefetchParam
-    # CHECK: %[[OPR:.*]] = get_operand
     # CHECK: %[[PARAM_OP:.*]] = transform.param.constant 2
-    # CHECK: transform.xegpu.insert_prefetch %[[OPR]]
+    # CHECK: transform.xegpu.insert_prefetch
     # CHECK-SAME: nb_prefetch = %[[PARAM_OP]]
 
 

>From 5ce1d3fb59dfe46ae0893fda94fd0c5a9ed9aab7 Mon Sep 17 00:00:00 2001
From: Tuomas Karna <tuomas.karna at intel.com>
Date: Thu, 19 Mar 2026 21:32:13 +0200
Subject: [PATCH 7/8] code formatting

---
 .../XeGPU/TransformOps/XeGPUTransformOps.cpp  | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
index 608ed5c555420..153ef5b500a1b 100644
--- a/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/TransformOps/XeGPUTransformOps.cpp
@@ -167,8 +167,8 @@ getLayoutAttrFromOperands(MLIRContext *ctx, transform::TransformState &state,
 
 DiagnosedSilenceableFailure
 transform::GetLoadOp::apply(transform::TransformRewriter &rewriter,
-                              transform::TransformResults &results,
-                              transform::TransformState &state) {
+                            transform::TransformResults &results,
+                            transform::TransformState &state) {
   auto targetValues = state.getPayloadValues(getTarget());
   if (!llvm::hasSingleElement(targetValues)) {
     return emitDefiniteFailure()
@@ -176,18 +176,20 @@ transform::GetLoadOp::apply(transform::TransformRewriter &rewriter,
            << llvm::range_size(targetValues) << ")";
   }
 
-  Operation* loadOp = nullptr;
+  Operation *loadOp = nullptr;
   auto maybeLoadNdOp =
       findProducerOfType<xegpu::LoadNdOp>(*targetValues.begin());
   if (maybeLoadNdOp) {
     loadOp = maybeLoadNdOp->getOperation();
   } else {
-    auto maybeLoadOp = findProducerOfType<xegpu::LoadGatherOp>(*targetValues.begin());
+    auto maybeLoadOp =
+        findProducerOfType<xegpu::LoadGatherOp>(*targetValues.begin());
     if (maybeLoadOp) {
       loadOp = maybeLoadOp->getOperation();
     } else {
       return emitSilenceableFailure(getLoc())
-            << "Could not find a matching xegpu.load_nd or xegpu.load op when walking the "
+             << "Could not find a matching xegpu.load_nd or xegpu.load op when "
+                "walking the "
                 "producer chain of the first operand.";
     }
   }
@@ -377,8 +379,8 @@ transform::InsertPrefetchOp::apply(transform::TransformRewriter &rewriter,
   // Cast target to load op.
   auto maybeLoadOp = dyn_cast<xegpu::LoadNdOp>(target);
   if (!maybeLoadOp) {
-    return emitSilenceableFailure(getLoc()) << "Expected xegpu.load_nd op, got "
-                                          << target->getName();
+    return emitSilenceableFailure(getLoc())
+           << "Expected xegpu.load_nd op, got " << target->getName();
   }
   auto loadOp = maybeLoadOp;
   if (loadOp.getMixedOffsets().size() == 0) {
@@ -398,7 +400,8 @@ transform::InsertPrefetchOp::apply(transform::TransformRewriter &rewriter,
   }
 
   // Find descriptor op.
-  auto maybeDescOp = findProducerOfType<xegpu::CreateNdDescOp>(loadOp.getResult());
+  auto maybeDescOp =
+      findProducerOfType<xegpu::CreateNdDescOp>(loadOp.getResult());
   if (!maybeDescOp)
     return emitSilenceableFailure(getLoc()) << "Could not find descriptor op.";
   auto descOp = *maybeDescOp;

>From 5916543120da6b44e2d754c3d1e79f6ae5d17b19 Mon Sep 17 00:00:00 2001
From: Tuomas Karna <tuomas.karna at intel.com>
Date: Fri, 20 Mar 2026 11:27:13 +0200
Subject: [PATCH 8/8] update convert_layout docstring

---
 .../mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td    | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
index 2b11d8f8884ed..40b9136874e7c 100644
--- a/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/TransformOps/XeGPUTransformOps.td
@@ -196,9 +196,9 @@ def ConvertLayoutOp : Op<Transform_Dialect, "xegpu.convert_layout", [
   let summary = "Convert xegpu.layout attribute for a value.";
   let description = [{
     Adds an `xegpu.convert_layout` op to convert the `xegpu.layout` attribute
-    of a value. The input and target layouts are defined by the `*sg_layout`,
-    `*sg_data` and optional `*inst_data` attributes. Returns a handle to the
-    emitted `xegpu.convert_layout` op.
+    of a value before its first use. The input and target layouts are defined
+    by the `*sg_layout`, `*sg_data` and optional `*inst_data` and `*order`
+    attributes. Returns a handle to the emitted `xegpu.convert_layout` op.
   }];
 
   let arguments = (ins TransformValueHandleTypeInterface:$target,