[Mlir-commits] [mlir] [MLIR][XeGPU] bug fix: removing slice layout at the pass end (PR #172589)

Tue Dec 23 09:22:29 PST 2025

https://github.com/dchigarev updated https://github.com/llvm/llvm-project/pull/172589

>From 6136cc07e10c74fcd62d76effd9896038459cdf3 Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Wed, 17 Dec 2025 03:29:48 +0000
Subject: [PATCH 1/3] fix removing slice layout at the pass end

---
 mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp         | 4 ++--
 mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
index ec5feb8bc8c4a..950ad0c937b9d 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
@@ -413,14 +413,14 @@ void XeGPUBlockingPass::runOnOperation() {
     // Remove the layout attributes cached per operands.
     for (OpOperand &opr : op->getOpOperands()) {
       std::string name = xegpu::getLayoutName(opr);
-      if (op->hasAttrOfType<xegpu::LayoutAttr>(name))
+      if (op->hasAttrOfType<xegpu::DistributeLayoutAttr>(name))
         op->removeAttr(name);
     }
 
     // Update the layout attributes per result.
     for (OpResult result : op->getOpResults()) {
       std::string name = xegpu::getLayoutName(result);
-      if (auto layout = op->getAttrOfType<xegpu::LayoutAttr>(name)) {
+      if (auto layout = op->getAttrOfType<xegpu::DistributeLayoutAttr>(name)) {
         op->removeAttr(name);
         if (!isa<LoopLikeOpInterface>(op))
           xegpu::setDistributeLayoutAttr(result, layout.dropInstData());
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index 95e27e46d90ab..56ed2ce5417f9 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -1597,7 +1597,7 @@ void XeGPUWgToSgDistributePass::runOnOperation() {
   getOperation()->walk([](Operation *op) {
     for (OpResult result : op->getOpResults()) {
       std::string name = xegpu::getLayoutName(result);
-      if (auto layout = op->getAttrOfType<xegpu::LayoutAttr>(name)) {
+      if (auto layout = op->getAttrOfType<xegpu::DistributeLayoutAttr>(name)) {
         op->removeAttr(name);
         if (!isa<scf::IfOp, scf::ForOp, scf::WhileOp, scf::ConditionOp>(op)) {
           if (auto newLayout = layout.dropSgLayoutAndData())

>From f54891e483096793c3b0353837a90d01d4ec999e Mon Sep 17 00:00:00 2001
From: Dmitry Chigarev <dmitry.chigarev at intel.com>
Date: Tue, 23 Dec 2025 17:05:15 +0000
Subject: [PATCH 2/3] Ensure anchor layouts are updated after
 xegpu-blocking/unroll/wg-to-sg passes

Signed-off-by: Dmitry Chigarev <dmitry.chigarev at intel.com>
---
 .../mlir/Dialect/XeGPU/Utils/XeGPUUtils.h     |  6 ++++
 .../Dialect/XeGPU/Transforms/XeGPUUnroll.cpp  |  8 ++---
 .../Transforms/XeGPUWgToSgDistribute.cpp      | 15 ++------
 mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp   | 36 +++++++++++++++++++
 mlir/test/Dialect/XeGPU/xegpu-blocking.mlir   |  6 ++--
 mlir/test/Dialect/XeGPU/xegpu-wg-to-sg.mlir   |  4 +--
 6 files changed, 54 insertions(+), 21 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
index 46d52516cbc15..181d9bc02c353 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
+++ b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
@@ -147,6 +147,12 @@ void removeLayoutAttr(const T &operandOrResult);
 /// applied recursively to the contained operations
 void removeLayoutAttrs(Operation *op);
 
+SmallVector<NamedAttribute>
+dropSgLayoutAndDataOnAttrs(ArrayRef<NamedAttribute> attrs);
+
+SmallVector<NamedAttribute>
+dropInstDataOnAttrs(ArrayRef<NamedAttribute> attrs);
+
 /// [to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult
 /// user should use setAnchorLayout instead
 void setDistributeLayoutAttr(const OpResult &Result,
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp
index af63f09cd5e4a..5cf9edf3f6ef9 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp
@@ -253,7 +253,7 @@ struct UnrollPrefetchNdOp : public UnrollPattern<xegpu::PrefetchNdOp> {
     if (!hasOffsets) {
       for (auto t : convertedTdesc)
         xegpu::PrefetchNdOp::create(rewriter, loc, TypeRange(), t,
-                                    op->getAttrs());
+                                    xegpu::dropInstDataOnAttrs(op->getAttrs()));
     } else {
       auto createPrefetch = [&](SmallVector<OpFoldResult> offsets) -> Value {
         xegpu::PrefetchNdOp::create(rewriter, loc, convertedTdesc[0], offsets,
@@ -304,7 +304,7 @@ struct UnrollLoadNdOp : public UnrollPattern<xegpu::LoadNdOp> {
     if (!hasOffsets) {
       for (auto t : convertedTdescs) {
         auto newOp = xegpu::LoadNdOp::create(rewriter, loc, newValueTy, t,
-                                             op->getAttrs());
+                                             xegpu::dropInstDataOnAttrs(op->getAttrs()));
         newOps.push_back(newOp);
       }
     } else {
@@ -463,7 +463,7 @@ struct UnrollDpasOp : public UnrollPattern<xegpu::DpasOp> {
             operands.push_back(tmpC);
 
           tmpC = xegpu::DpasOp::create(rewriter, loc, vecTy, operands,
-                                       op->getAttrs());
+                                       xegpu::dropInstDataOnAttrs(op->getAttrs()));
         }
         newOps.push_back(tmpC);
       }
@@ -825,7 +825,7 @@ struct UnrollPrefetchOp : public UnrollPattern<xegpu::PrefetchOp> {
         op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
 
     for (auto t : convertedTdesc)
-      xegpu::PrefetchOp::create(rewriter, loc, TypeRange(), t, op->getAttrs());
+      xegpu::PrefetchOp::create(rewriter, loc, TypeRange(), t, xegpu::dropInstDataOnAttrs(op->getAttrs()));
 
     rewriter.eraseOp(op);
     return success();
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index 2d42a51f0e642..d3540367be31e 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -277,7 +277,7 @@ struct WgToSgLoadNdOp : public OpConversionPattern<xegpu::LoadNdOp> {
       ArrayRef<int64_t> srcShape = tdescTy.getShape();
       VectorType newResTy = VectorType::get(srcShape, tdescTy.getElementType());
       auto newLoadOp = xegpu::LoadNdOp::create(rewriter, op.getLoc(), newResTy,
-                                               src, op->getAttrs());
+                                               src, xegpu::dropSgLayoutAndDataOnAttrs(op->getAttrs()));
       newLoadOps.push_back(newLoadOp);
     }
     rewriter.replaceOpWithMultiple(op, {newLoadOps});
@@ -474,7 +474,7 @@ struct WgToSgPrefetchNdOp : public OpConversionPattern<xegpu::PrefetchNdOp> {
 
     for (auto src : adaptor.getTensorDesc())
       xegpu::PrefetchNdOp::create(rewriter, op.getLoc(), TypeRange(), src,
-                                  op->getAttrs());
+                                  xegpu::dropSgLayoutAndDataOnAttrs(op->getAttrs()));
     rewriter.eraseOp(op);
     return success();
   }
@@ -563,16 +563,7 @@ struct WgToSgElementwiseOp : public ConversionPattern {
       state.addTypes(newResultType);
       // Copy all attributes, but update "layout_result_0" to drop
       // sgLayout/sgData
-      for (auto attr : op->getAttrs()) {
-        if (auto layout =
-                dyn_cast<xegpu::DistributeLayoutAttr>(attr.getValue())) {
-          if (!layout.getEffectiveLaneLayoutAsInt().empty() ||
-              !layout.getEffectiveInstDataAsInt().empty())
-            state.addAttribute(attr.getName(), layout.dropSgLayoutAndData());
-        } else {
-          state.addAttribute(attr.getName(), attr.getValue());
-        }
-      }
+      state.addAttributes(xegpu::dropSgLayoutAndDataOnAttrs(op->getAttrs()));
       Operation *newOp = rewriter.create(state);
       newResults.push_back(newOp->getResult(0));
     }
diff --git a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
index d3906e37ffbf1..51783b41c4c96 100644
--- a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
+++ b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp
@@ -380,6 +380,42 @@ void xegpu::removeLayoutAttr(const T &operandOrResult) {
     owner->removeAttr(name);
 }
 
+SmallVector<NamedAttribute>
+xegpu::dropSgLayoutAndDataOnAttrs(ArrayRef<NamedAttribute> attrs) {
+  SmallVector<NamedAttribute> out;
+  out.reserve(attrs.size());
+
+  for (auto attr : attrs) {
+    if (auto dist = dyn_cast<xegpu::DistributeLayoutAttr>(attr.getValue())) {
+      auto newLayout = dist.dropSgLayoutAndData();
+      if (newLayout)
+        out.emplace_back(attr.getName(), newLayout);
+    } else {
+      out.push_back(attr);
+    }
+  }
+
+  return out;
+}
+
+SmallVector<NamedAttribute>
+xegpu::dropInstDataOnAttrs(ArrayRef<NamedAttribute> attrs) {
+  SmallVector<NamedAttribute> out;
+  out.reserve(attrs.size());
+
+  for (auto attr : attrs) {
+    if (auto dist = dyn_cast<xegpu::DistributeLayoutAttr>(attr.getValue())) {
+      auto newLayout = dist.dropInstData();
+      if (newLayout)
+        out.emplace_back(attr.getName(), newLayout);
+    } else {
+      out.push_back(attr);
+    }
+  }
+
+  return out;
+}
+
 // Explicit instantiation for OpResult
 template void
 xegpu::removeLayoutAttr<mlir::OpResult>(const mlir::OpResult &result);
diff --git a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
index 9f9edcd416ddf..c2a4cdb995f55 100644
--- a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
+++ b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
@@ -405,7 +405,7 @@ gpu.module @test_kernel   {
   // CHECK-LABEL: test_prefetch_load_store_update_chunk
   // CHECK-SAME: [[arg0:%.+]]: ui64
   // CHECK-COUNT-4: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2 : i64>>
-  // CHECK-COUNT-4: xegpu.prefetch {{.*}}  <{layout = #xegpu.layout<inst_data = [16, 2]>}> : !xegpu.tensor_desc<16x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2 : i64>>
+  // CHECK-COUNT-4: xegpu.prefetch {{.*}} : !xegpu.tensor_desc<16x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2 : i64>>
   // CHECK-COUNT-4: xegpu.update_offset {{.*}} : !xegpu.tensor_desc<16x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2 : i64>>, vector<16xindex>
   // CHECK-COUNT-4: xegpu.load  {{.*}} <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<16x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2 : i64>>, vector<16xi1> -> vector<16x2xf32> 
   // CHECK-COUNT-4: xegpu.store  {{.*}} <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : vector<16x2xf32>, !xegpu.tensor_desc<16x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2 : i64>>, vector<16xi1>
@@ -513,8 +513,8 @@ gpu.module @test_kernel {
   //CHECK: [[c0:%.+]] = arith.constant 0 : index
   //CHECK: [[a:%.+]] = xegpu.create_nd_tdesc [[arg0]][[[c0]], [[c0]]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [16, 1]>>
   //CHECK: [[b:%.+]] = xegpu.create_nd_tdesc [[arg1]][[[c0]], [[c0]]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [16, 1]>>
-  //CHECK: [[load_a:%.+]] = xegpu.load_nd [[a]] <{layout = #xegpu.layout<inst_data = [16, 16], lane_layout = [1, 16], lane_data = [16, 1]>}> : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [16, 1]>> -> vector<16x16xf16>
-  //CHECK: [[load_b:%.+]] = xegpu.load_nd [[b]] <{layout = #xegpu.layout<inst_data = [16, 16], lane_layout = [1, 16], lane_data = [16, 1]>}> : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [16, 1]>> -> vector<16x16xf16>
+  //CHECK: [[load_a:%.+]] = xegpu.load_nd [[a]] <{layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [16, 1]>}> : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [16, 1]>> -> vector<16x16xf16>
+  //CHECK: [[load_b:%.+]] = xegpu.load_nd [[b]] <{layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [16, 1]>}> : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [16, 1]>> -> vector<16x16xf16>
   //CHECK: [[cvt:%.+]] = xegpu.convert_layout [[load_a]] <{input_layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [16, 1]>, target_layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [8, 1]>}> : vector<16x16xf16>
   //CHECK: [[a0:%.+]] = vector.extract_strided_slice [[cvt]] {offsets = [0, 0], sizes = [8, 16], strides = [1, 1]} : vector<16x16xf16> to vector<8x16xf16>
   //CHECK: [[a1:%.+]] = vector.extract_strided_slice [[cvt]] {offsets = [8, 0], sizes = [8, 16], strides = [1, 1]} : vector<16x16xf16> to vector<8x16xf16>
diff --git a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg.mlir b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg.mlir
index 50081ed34fe78..4f29a686d301f 100644
--- a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg.mlir
+++ b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg.mlir
@@ -203,8 +203,8 @@ gpu.module @test_1_1_assignment {
     // CHECK: %[[SCF:.*]]:3 = scf.for %[[ARG3:.*]] = %[[C0]] to %[[C1024]] step %[[C128]]
     // CHECK-SAME: iter_args(%[[ARG4:.*]] = {{.*}}, %[[ARG5:.*]] = {{.*}}, %[[ARG6:.*]] = {{.*}}) ->
     // CHECK-SAME: (!xegpu.tensor_desc<16x128xf16>, !xegpu.tensor_desc<128x16xf16>, vector<16x16xf32>)
-    // CHECK: %[[A:.*]] = xegpu.load_nd %[[ARG4]] <{layout = #xegpu.layout<sg_layout = [8, 8], sg_data = [16, 128]>}> : !xegpu.tensor_desc<16x128xf16> -> vector<16x128xf16>
-    // CHECK: %[[B:.*]] = xegpu.load_nd %[[ARG5]] <{layout = #xegpu.layout<sg_layout = [8, 8], sg_data = [128, 16]>}> : !xegpu.tensor_desc<128x16xf16> -> vector<128x16xf16>
+    // CHECK: %[[A:.*]] = xegpu.load_nd %[[ARG4]] : !xegpu.tensor_desc<16x128xf16> -> vector<16x128xf16>
+    // CHECK: %[[B:.*]] = xegpu.load_nd %[[ARG5]] : !xegpu.tensor_desc<128x16xf16> -> vector<128x16xf16>
     // CHECK: %[[C:.*]] = xegpu.dpas %[[A]], %[[B]], %[[ARG6]] : vector<16x128xf16>, vector<128x16xf16>, vector<16x16xf32> -> vector<16x16xf32>
     // CHECK: %[[AT:.*]] = xegpu.update_nd_offset %[[ARG4]], [%[[C0]], %[[C128]]] : !xegpu.tensor_desc<16x128xf16>
     // CHECK: %[[BT:.*]] = xegpu.update_nd_offset %[[ARG5]], [%[[C128]], %[[C0]]] : !xegpu.tensor_desc<128x16xf16>

>From 32c34cf02239ab7e064eb01413aecd0b33f86817 Mon Sep 17 00:00:00 2001
From: Dmitry Chigarev <dmitry.chigarev at intel.com>
Date: Tue, 23 Dec 2025 17:16:33 +0000
Subject: [PATCH 3/3] clang-format

Signed-off-by: Dmitry Chigarev <dmitry.chigarev at intel.com>
---
 mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h  |  3 +--
 mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp   | 13 ++++++++-----
 .../XeGPU/Transforms/XeGPUWgToSgDistribute.cpp      | 10 ++++++----
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
index 181d9bc02c353..b720ac9e387cf 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
+++ b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h
@@ -150,8 +150,7 @@ void removeLayoutAttrs(Operation *op);
 SmallVector<NamedAttribute>
 dropSgLayoutAndDataOnAttrs(ArrayRef<NamedAttribute> attrs);
 
-SmallVector<NamedAttribute>
-dropInstDataOnAttrs(ArrayRef<NamedAttribute> attrs);
+SmallVector<NamedAttribute> dropInstDataOnAttrs(ArrayRef<NamedAttribute> attrs);
 
 /// [to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult
 /// user should use setAnchorLayout instead
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp
index 5cf9edf3f6ef9..8f4e2bb0451d8 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp
@@ -303,8 +303,9 @@ struct UnrollLoadNdOp : public UnrollPattern<xegpu::LoadNdOp> {
 
     if (!hasOffsets) {
       for (auto t : convertedTdescs) {
-        auto newOp = xegpu::LoadNdOp::create(rewriter, loc, newValueTy, t,
-                                             xegpu::dropInstDataOnAttrs(op->getAttrs()));
+        auto newOp =
+            xegpu::LoadNdOp::create(rewriter, loc, newValueTy, t,
+                                    xegpu::dropInstDataOnAttrs(op->getAttrs()));
         newOps.push_back(newOp);
       }
     } else {
@@ -462,8 +463,9 @@ struct UnrollDpasOp : public UnrollPattern<xegpu::DpasOp> {
           if (tmpC)
             operands.push_back(tmpC);
 
-          tmpC = xegpu::DpasOp::create(rewriter, loc, vecTy, operands,
-                                       xegpu::dropInstDataOnAttrs(op->getAttrs()));
+          tmpC =
+              xegpu::DpasOp::create(rewriter, loc, vecTy, operands,
+                                    xegpu::dropInstDataOnAttrs(op->getAttrs()));
         }
         newOps.push_back(tmpC);
       }
@@ -825,7 +827,8 @@ struct UnrollPrefetchOp : public UnrollPattern<xegpu::PrefetchOp> {
         op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
 
     for (auto t : convertedTdesc)
-      xegpu::PrefetchOp::create(rewriter, loc, TypeRange(), t, xegpu::dropInstDataOnAttrs(op->getAttrs()));
+      xegpu::PrefetchOp::create(rewriter, loc, TypeRange(), t,
+                                xegpu::dropInstDataOnAttrs(op->getAttrs()));
 
     rewriter.eraseOp(op);
     return success();
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index d3540367be31e..db31d93da663c 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -276,8 +276,9 @@ struct WgToSgLoadNdOp : public OpConversionPattern<xegpu::LoadNdOp> {
           dyn_cast<xegpu::TensorDescType>(src.getType());
       ArrayRef<int64_t> srcShape = tdescTy.getShape();
       VectorType newResTy = VectorType::get(srcShape, tdescTy.getElementType());
-      auto newLoadOp = xegpu::LoadNdOp::create(rewriter, op.getLoc(), newResTy,
-                                               src, xegpu::dropSgLayoutAndDataOnAttrs(op->getAttrs()));
+      auto newLoadOp = xegpu::LoadNdOp::create(
+          rewriter, op.getLoc(), newResTy, src,
+          xegpu::dropSgLayoutAndDataOnAttrs(op->getAttrs()));
       newLoadOps.push_back(newLoadOp);
     }
     rewriter.replaceOpWithMultiple(op, {newLoadOps});
@@ -473,8 +474,9 @@ struct WgToSgPrefetchNdOp : public OpConversionPattern<xegpu::PrefetchNdOp> {
       return failure();
 
     for (auto src : adaptor.getTensorDesc())
-      xegpu::PrefetchNdOp::create(rewriter, op.getLoc(), TypeRange(), src,
-                                  xegpu::dropSgLayoutAndDataOnAttrs(op->getAttrs()));
+      xegpu::PrefetchNdOp::create(
+          rewriter, op.getLoc(), TypeRange(), src,
+          xegpu::dropSgLayoutAndDataOnAttrs(op->getAttrs()));
     rewriter.eraseOp(op);
     return success();
   }