[Mlir-commits] [mlir] [mlir][xegpu] Add initial support for layout conflict handling. (PR #173090)

Wed Jan 28 08:24:15 PST 2026

https://github.com/charithaintc updated https://github.com/llvm/llvm-project/pull/173090

>From a646df0e59d11dab90525b7ae4cfc87769e4aebd Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Tue, 16 Dec 2025 19:37:57 +0000
Subject: [PATCH 1/8] save work

---
 .../Dialect/XeGPU/Transforms/Transforms.h     |  8 ++
 .../XeGPU/Transforms/XeGPUPropagateLayout.cpp | 74 +++++++++++--------
 .../lib/Dialect/XeGPU/TestXeGPUTransforms.cpp | 29 ++++++++
 3 files changed, 81 insertions(+), 30 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h b/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h
index 1776a209d0bf1..b97a7e4aa3211 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h
@@ -9,6 +9,8 @@
 #ifndef MLIR_DIALECT_XEGPU_TRANSFORMS_TRANSFORMS_H
 #define MLIR_DIALECT_XEGPU_TRANSFORMS_TRANSFORMS_H
 
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/IR/Builders.h"
 #include "mlir/IR/Operation.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/LogicalResult.h"
@@ -91,6 +93,12 @@ void populateXeGPUWgToSgDistributePatterns(RewritePatternSet &patterns);
 void populateXeGPUUnrollPatterns(RewritePatternSet &patterns,
                                  const UnrollOptions &options);
 
+enum class LayoutKind { Lane, InstData };
+LogicalResult propagateLayouts(OpBuilder &builder, Operation *target,
+                               LayoutKind layoutKind, bool printOnly = false);
+
+LogicalResult resolveLayoutConflicts(OpBuilder &builder, Operation *target);
+
 } // namespace xegpu
 } // namespace mlir
 
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
index dc9eb96c169b4..eae2cdfde0e32 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
@@ -15,6 +15,7 @@
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "mlir/Dialect/XeGPU/IR/XeGPU.h"
 #include "mlir/Dialect/XeGPU/Transforms/Passes.h"
+#include "mlir/Dialect/XeGPU/Transforms/Transforms.h"
 #include "mlir/Dialect/XeGPU/Utils/XeGPUUtils.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"
@@ -53,8 +54,6 @@ using namespace mlir::dataflow;
 
 namespace {
 
-enum class LayoutKind { Lane, InstData };
-
 //===----------------------------------------------------------------------===//
 // LayoutInfo
 //===----------------------------------------------------------------------===//
@@ -336,7 +335,7 @@ getSIMTLayoutInfoForDPASOperand(VectorType vectorTy, unsigned operandNum,
 class LayoutInfoPropagation
     : public SparseBackwardDataFlowAnalysis<LayoutInfoLattice> {
 private:
-  LayoutKind layoutKind;
+  xegpu::LayoutKind layoutKind;
   void visitDpasOp(xegpu::DpasOp dpas, ArrayRef<LayoutInfoLattice *> operands,
                    ArrayRef<const LayoutInfoLattice *> results);
 
@@ -392,7 +391,7 @@ class LayoutInfoPropagation
 public:
   LayoutInfoPropagation(DataFlowSolver &solver,
                         SymbolTableCollection &symbolTable,
-                        LayoutKind layoutKind)
+                        xegpu::LayoutKind layoutKind)
       : SparseBackwardDataFlowAnalysis(solver, symbolTable),
         layoutKind(layoutKind) {}
   using SparseBackwardDataFlowAnalysis::SparseBackwardDataFlowAnalysis;
@@ -482,9 +481,9 @@ bool LayoutInfoPropagation::hasParamsOfLayoutKind(
   if (anchorLayout == nullptr) {
     return false;
   }
-  if (layoutKind == LayoutKind::InstData) {
+  if (layoutKind == xegpu::LayoutKind::InstData) {
     return !(anchorLayout.getEffectiveInstDataAsInt().empty());
-  } else if (layoutKind == LayoutKind::Lane) {
+  } else if (layoutKind == xegpu::LayoutKind::Lane) {
     return !(anchorLayout.getEffectiveLaneLayoutAsInt().empty() ||
              anchorLayout.getEffectiveLaneDataAsInt().empty());
   }
@@ -532,7 +531,7 @@ void LayoutInfoPropagation::visitPrefetchNdOp(
       instData = {instHeight, instWidth};
     }
 
-    if (layoutKind == LayoutKind::InstData)
+    if (layoutKind == xegpu::LayoutKind::InstData)
       prefetchLayout =
           LayoutInfo(xegpu::LayoutAttr::get(tdescTy.getContext(), instData));
     else
@@ -705,7 +704,7 @@ void LayoutInfoPropagation::visitDpasOp(
     SmallVector<int> instDataA = {maxALen, subgroupSize};
     SmallVector<int> instDataB = {subgroupSize, maxBLen};
 
-    if (layoutKind == LayoutKind::InstData) {
+    if (layoutKind == xegpu::LayoutKind::InstData) {
       dpasALayout =
           LayoutInfo(xegpu::LayoutAttr::get(dpas.getContext(), instDataA));
       dpasBLayout =
@@ -719,7 +718,7 @@ void LayoutInfoPropagation::visitDpasOp(
 
     if (operands.size() > 2) {
       VectorType cTy = dpas.getAccType();
-      if (layoutKind == LayoutKind::InstData) {
+      if (layoutKind == xegpu::LayoutKind::InstData) {
         const unsigned dataCLen = bTy.getShape().back();
         auto supportedCLen =
             uArchInstruction->getSupportedN(bTy.getElementType());
@@ -789,7 +788,7 @@ void LayoutInfoPropagation::visitStoreNdOp(
       instData = {instHeight, instWidth};
     }
 
-    if (layoutKind == LayoutKind::InstData)
+    if (layoutKind == xegpu::LayoutKind::InstData)
       storeLayout =
           LayoutInfo(xegpu::LayoutAttr::get(dataTy.getContext(), instData));
     else
@@ -949,7 +948,7 @@ void LayoutInfoPropagation::visitLoadGatherOp(
         instData.push_back(chunkSize);
     }
 
-    if (layoutKind == LayoutKind::InstData)
+    if (layoutKind == xegpu::LayoutKind::InstData)
       loadLayout =
           LayoutInfo(xegpu::LayoutAttr::get(load.getContext(), instData));
     else
@@ -1012,7 +1011,7 @@ void LayoutInfoPropagation::visitStoreScatterOp(
     auto uArch = getUArch(getChipStr(storeScatter).value_or(""));
     const int subgroupSize = uArch->getSubgroupSize();
 
-    if (layoutKind == LayoutKind::InstData) {
+    if (layoutKind == xegpu::LayoutKind::InstData) {
       SmallVector<int> instData{subgroupSize};
       if (auto chunkSize = storeScatter.getChunkSize().value_or(0);
           chunkSize > 1)
@@ -1063,7 +1062,8 @@ class RunLayoutInfoPropagation {
 public:
   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(RunLayoutInfoPropagation)
 
-  RunLayoutInfoPropagation(Operation *op, LayoutKind layoutKind) : target(op) {
+  RunLayoutInfoPropagation(Operation *op, xegpu::LayoutKind layoutKind)
+      : target(op) {
     SymbolTableCollection symbolTable;
     loadBaselineAnalyses(solver);
     solver.load<LayoutInfoPropagation>(symbolTable, layoutKind);
@@ -1305,24 +1305,14 @@ struct XeGPUPropagateLayoutPass final
 
 } // namespace
 
-void XeGPUPropagateLayoutPass::runOnOperation() {
-  LayoutKind layoutKind;
-  if (this->layoutKind == "lane") {
-    layoutKind = LayoutKind::Lane;
-  } else if (this->layoutKind == "inst") {
-    layoutKind = LayoutKind::InstData;
-  } else {
-    getOperation()->emitError("Unsupported layout kind option: " +
-                              this->layoutKind);
-    signalPassFailure();
-    return;
-  }
-  RunLayoutInfoPropagation analysis(getOperation(), layoutKind);
+LogicalResult xegpu::propagateLayouts(OpBuilder &builder, Operation *target,
+                                      LayoutKind layoutKind, bool printOnly) {
+  RunLayoutInfoPropagation analysis(target, layoutKind);
   // Print the analysis result and exit. (for debugging purposes)
   if (printOnly) {
     auto &os = llvm::outs();
     analysis.printAnalysisResult(os);
-    return;
+    return success();
   }
   // Helper to convert LayoutInfo to xegpu::LayoutAttr.
   auto getXeGPULayoutForValue = [&](Value val) -> xegpu::DistributeLayoutAttr {
@@ -1336,8 +1326,7 @@ void XeGPUPropagateLayoutPass::runOnOperation() {
     return cast<xegpu::LayoutAttr>(layoutAttr);
   };
 
-  mlir::OpBuilder builder(&getContext());
-  Operation *op = getOperation();
+  Operation *op = target;
   auto walkResult = op->walk([&](mlir::Block *block) -> WalkResult {
     for (mlir::Operation &op : llvm::reverse(block->getOperations())) {
       LogicalResult r = success();
@@ -1362,7 +1351,32 @@ void XeGPUPropagateLayoutPass::runOnOperation() {
     }
     return WalkResult::advance();
   });
-  if (walkResult.wasInterrupted()) {
+  if (walkResult.wasInterrupted())
+    return failure();
+
+  return success();
+}
+
+LogicalResult xegpu::resolveLayoutConflicts(OpBuilder &builder,
+                                            Operation *target) {
+  return success();
+}
+
+void XeGPUPropagateLayoutPass::runOnOperation() {
+  xegpu::LayoutKind layoutKind;
+  if (this->layoutKind == "lane") {
+    layoutKind = xegpu::LayoutKind::Lane;
+  } else if (this->layoutKind == "inst") {
+    layoutKind = xegpu::LayoutKind::InstData;
+  } else {
+    getOperation()->emitError("Unsupported layout kind option: " +
+                              this->layoutKind);
+    signalPassFailure();
+    return;
+  }
+  OpBuilder builder(&getContext());
+  if (failed(xegpu::propagateLayouts(builder, getOperation(), layoutKind,
+                                     this->printOnly))) {
     signalPassFailure();
     return;
   }
diff --git a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
index 93d51441f5b81..610564910daed 100644
--- a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
+++ b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
@@ -278,6 +278,35 @@ struct TestXeGPUMoveFuncBodyToWarpOp
   }
 };
 
+struct TestXeGPUPropagateLayouts
+    : public PassWrapper<TestXeGPUPropagateLayouts,
+                         OperationPass<gpu::GPUModuleOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestXeGPUPropagateLayouts)
+
+  StringRef getArgument() const final { return "test-xegpu-propagate-layouts"; }
+
+  StringRef getDescription() const final {
+    return "Test the implementation of XeGPU propagate layouts.";
+  }
+
+  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
+    registry.insert<xegpu::XeGPUDialect>();
+    registry.insert<gpu::GPUDialect>();
+  }
+
+  TestXeGPUPropagateLayouts() = default;
+  TestXeGPUPropagateLayouts(const TestXeGPUPropagateLayouts &pass) = default;
+
+  void runOnOperation() override {
+    OpBuilder builder(getOperation());
+    if (xegpu::propagateLayouts(OpBuilder(getOperation()), getOperation(),
+                                LayoutKind::Lane)
+            .failed()) {
+      signalPassFailure();
+    }
+  }
+};
+
 struct TestXeGPULayoutInterface
     : public PassWrapper<TestXeGPULayoutInterface,
                          OperationPass<gpu::GPUModuleOp>> {

>From c61bfe6038766fc8152ae5a72620737fd9bc3bb9 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Thu, 18 Dec 2025 23:49:22 +0000
Subject: [PATCH 2/8] save work

---
 .../Dialect/XeGPU/Transforms/Transforms.h     |  2 +-
 .../XeGPU/Transforms/XeGPUPropagateLayout.cpp | 55 ++++++++++++++++++-
 .../lib/Dialect/XeGPU/TestXeGPUTransforms.cpp | 24 ++++++--
 3 files changed, 74 insertions(+), 7 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h b/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h
index b97a7e4aa3211..e3ce853dc2859 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h
@@ -93,7 +93,7 @@ void populateXeGPUWgToSgDistributePatterns(RewritePatternSet &patterns);
 void populateXeGPUUnrollPatterns(RewritePatternSet &patterns,
                                  const UnrollOptions &options);
 
-enum class LayoutKind { Lane, InstData };
+enum class LayoutKind { Lane, InstData, Subgroup };
 LogicalResult propagateLayouts(OpBuilder &builder, Operation *target,
                                LayoutKind layoutKind, bool printOnly = false);
 
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
index a122e63247416..463b94ae649af 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
@@ -530,7 +530,7 @@ bool LayoutInfoPropagation::hasParamsOfLayoutKind(
   } else if (layoutKind == xegpu::LayoutKind::Lane) {
     return !(anchorLayout.getEffectiveLaneLayoutAsInt().empty() ||
              anchorLayout.getEffectiveLaneDataAsInt().empty());
-  } else if (layoutKind == LayoutKind::Subgroup) {
+  } else if (layoutKind == xegpu::LayoutKind::Subgroup) {
     return !(anchorLayout.getEffectiveSgLayoutAsInt().empty() ||
              anchorLayout.getEffectiveSgDataAsInt().empty());
   }
@@ -1402,6 +1402,57 @@ LogicalResult xegpu::propagateLayouts(OpBuilder &builder, Operation *target,
 
 LogicalResult xegpu::resolveLayoutConflicts(OpBuilder &builder,
                                             Operation *target) {
+  auto r = target->walk([&](xegpu::LoadNdOp loadNdOp) -> WalkResult {
+    // Load op has a conflict if tensor desc layout is different from the its
+    // result layout.
+    auto getResultLayout = [](OpResult result) {
+      auto resultLayoutName = xegpu::getTemporaryLayoutName(result);
+      return result.getOwner()->getAttrOfType<xegpu::DistributeLayoutAttr>(
+          resultLayoutName);
+    };
+    auto hasConflict = [&getResultLayout](xegpu::LoadNdOp loadNdOp) -> bool {
+      auto tdescType = loadNdOp.getTensorDescType();
+      auto tdescLayout = tdescType.getLayout();
+      auto resultLayoutName =
+          xegpu::getTemporaryLayoutName(loadNdOp->getOpResult(0));
+      auto resultLayout = getResultLayout(loadNdOp->getOpResult(0));
+      return tdescLayout && resultLayout && tdescLayout != resultLayout;
+    };
+    if (hasConflict(loadNdOp)) {
+      OpBuilder builder(loadNdOp);
+      // Try to get the defining createNdDesc op.
+      auto createNdOp =
+          loadNdOp.getTensorDesc().getDefiningOp<xegpu::CreateNdDescOp>();
+      if (!createNdOp) {
+        DBGS() << "Failed to resolve LoadNdOp layout conflict: " << *loadNdOp
+               << "\n";
+        return WalkResult::interrupt();
+      }
+
+      builder.setInsertionPointAfter(createNdOp);
+      auto tdescType = loadNdOp.getTensorDescType();
+      auto expectedLayout = getResultLayout(loadNdOp->getOpResult(0));
+      auto newTensorDescType = xegpu::TensorDescType::get(
+          createNdOp.getContext(), tdescType.getShape(),
+          tdescType.getElementType(), tdescType.getEncoding(), expectedLayout);
+      auto newOp = xegpu::CreateNdDescOp::create(
+          builder, loadNdOp.getLoc(), newTensorDescType,
+          createNdOp->getOperands(), createNdOp->getAttrs());
+      // Replace only the conflicting uses of the createNdOp that can be
+      // resolved using the new layout.
+      createNdOp->replaceUsesWithIf(
+          ArrayRef<Value>(newOp.getResult()), [&](OpOperand &opnd) {
+            auto userLoadNdOp = dyn_cast<xegpu::LoadNdOp>(opnd.getOwner());
+            if (!userLoadNdOp)
+              return false;
+            auto resultLayout = getResultLayout(userLoadNdOp->getOpResult(0));
+            return hasConflict(userLoadNdOp) && resultLayout == expectedLayout;
+          });
+    }
+    return WalkResult::advance();
+  });
+  if (r.wasInterrupted())
+    return failure();
   return success();
 }
 
@@ -1412,7 +1463,7 @@ void XeGPUPropagateLayoutPass::runOnOperation() {
   } else if (this->layoutKind == "inst") {
     layoutKind = xegpu::LayoutKind::InstData;
   } else if (this->layoutKind == "subgroup") {
-    layoutKind = LayoutKind::Subgroup;
+    layoutKind = xegpu::LayoutKind::Subgroup;
   } else {
     getOperation()->emitError("Unsupported layout kind option: " +
                               this->layoutKind);
diff --git a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
index cd9e8a6daa42a..cfe8f32fdaf70 100644
--- a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
+++ b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
@@ -294,13 +294,29 @@ struct TestXeGPUPropagateLayouts
   }
 
   TestXeGPUPropagateLayouts() = default;
-  TestXeGPUPropagateLayouts(const TestXeGPUPropagateLayouts &pass) = default;
+  TestXeGPUPropagateLayouts(const TestXeGPUPropagateLayouts &pass)
+      : PassWrapper(pass) {}
+
+  Option<std::string> layoutKind{
+      *this, "layout-kind",
+      llvm::cl::desc("Propagate `subgroup` / `inst` / `lane` level of xegpu "
+                     "layouts."),
+      llvm::cl::init("lane")};
 
   void runOnOperation() override {
     OpBuilder builder(getOperation());
-    if (xegpu::propagateLayouts(OpBuilder(getOperation()), getOperation(),
-                                LayoutKind::Lane)
-            .failed()) {
+    LayoutKind kind;
+    if (layoutKind == "subgroup")
+      kind = LayoutKind::Subgroup;
+    else if (layoutKind == "inst")
+      kind = LayoutKind::InstData;
+    else if (layoutKind == "lane")
+      kind = LayoutKind::Lane;
+    else {
+      signalPassFailure();
+      return;
+    }
+    if (xegpu::propagateLayouts(builder, getOperation(), kind).failed()) {
       signalPassFailure();
     }
   }

>From b76ad1dc5f5a179e4112bc3061e45fc1de4cb4e7 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Fri, 19 Dec 2025 19:28:16 +0000
Subject: [PATCH 3/8] save work

---
 .../Dialect/XeGPU/Transforms/Transforms.h     |   2 +-
 .../XeGPU/Transforms/XeGPUPropagateLayout.cpp | 192 +++++++++++++-----
 .../XeGPU/resolve-layout-conflicts.mlir       |  23 +++
 .../lib/Dialect/XeGPU/TestXeGPUTransforms.cpp |  33 ++-
 4 files changed, 194 insertions(+), 56 deletions(-)
 create mode 100644 mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir

diff --git a/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h b/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h
index e3ce853dc2859..80ea1e3407058 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h
@@ -97,7 +97,7 @@ enum class LayoutKind { Lane, InstData, Subgroup };
 LogicalResult propagateLayouts(OpBuilder &builder, Operation *target,
                                LayoutKind layoutKind, bool printOnly = false);
 
-LogicalResult resolveLayoutConflicts(OpBuilder &builder, Operation *target);
+LogicalResult resolveLayoutConflicts(Operation *target);
 
 } // namespace xegpu
 } // namespace mlir
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
index 463b94ae649af..c8138a4d16016 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
@@ -38,6 +38,7 @@
 #include "llvm/Support/raw_ostream.h"
 
 #include "mlir/Dialect/XeGPU/uArch/IntelGpuXe2.h"
+#include "mlir/Support/WalkResult.h"
 
 namespace mlir {
 namespace xegpu {
@@ -1180,6 +1181,77 @@ void RunLayoutInfoPropagation::printAnalysisResult(llvm::raw_ostream &os) {
     printFunctionResult(funcOp);
 }
 
+namespace {
+
+//===----------------------------------------------------------------------===//
+// ResolveLayoutConflicts
+//===----------------------------------------------------------------------===//
+struct ResolveLayoutConflicts {
+  ResolveLayoutConflicts(Operation *parentOp)
+      : parentOp(parentOp), builder(parentOp->getContext()) {}
+  LogicalResult run();
+
+private:
+  Operation *parentOp;
+  OpBuilder builder;
+  LogicalResult resolveLoadNdOp(xegpu::LoadNdOp loadNdOp);
+};
+
+}; // namespace
+
+LogicalResult ResolveLayoutConflicts::run() {
+  auto r = parentOp->walk([&](Operation *op) -> WalkResult {
+    TypeSwitch<Operation *>(op).Case([&](xegpu::LoadNdOp loadNdOp) {
+      return failed(resolveLoadNdOp(loadNdOp)) ? WalkResult::interrupt()
+                                               : WalkResult::advance();
+    });
+    // TODO: Add other layout conflict resolution methods as needed.
+    return WalkResult::advance();
+  });
+
+  return r.wasInterrupted() ? failure() : success();
+}
+
+/// LoadNd has a conflict if the tensor descriptor layout is different from the
+/// load's anchor layout.
+LogicalResult
+ResolveLayoutConflicts::resolveLoadNdOp(xegpu::LoadNdOp loadNdOp) {
+  Attribute anchorLayout = loadNdOp.getLayoutAttr();
+  Attribute tdescLayout = loadNdOp.getTensorDescType().getLayout();
+
+  if (anchorLayout && tdescLayout && anchorLayout != tdescLayout) {
+    // Try to get the defining CreateNdDescOp of the tensor descriptor.
+    auto conflictingCreateNdOp =
+        loadNdOp.getTensorDesc().getDefiningOp<xegpu::CreateNdDescOp>();
+    if (!conflictingCreateNdOp) {
+      DBGS() << "Unable to find defining CreateNdDescOp for tensor descriptor: "
+             << loadNdOp.getTensorDesc() << "\n";
+      return failure();
+    }
+    // Duplicate the CreateNdDescOp with the expected layout.
+    builder.setInsertionPointAfter(conflictingCreateNdOp);
+    xegpu::TensorDescType tdescType = loadNdOp.getTensorDescType();
+    auto expectedLayout = anchorLayout;
+    auto newTensorDescType = xegpu::TensorDescType::get(
+        conflictingCreateNdOp.getContext(), tdescType.getShape(),
+        tdescType.getElementType(), tdescType.getEncoding(), expectedLayout);
+    xegpu::CreateNdDescOp newOp = xegpu::CreateNdDescOp::create(
+        builder, loadNdOp.getLoc(), newTensorDescType,
+        conflictingCreateNdOp->getOperands(),
+        conflictingCreateNdOp->getAttrs());
+    // Replace only the conflicting uses of the createNdOp that can be
+    // resolved using the new layout.
+    conflictingCreateNdOp->replaceUsesWithIf(
+        ArrayRef<Value>(newOp.getResult()), [&](OpOperand &opnd) {
+          auto userLoadNdOp = dyn_cast<xegpu::LoadNdOp>(opnd.getOwner());
+          if (!userLoadNdOp)
+            return false;
+          return userLoadNdOp.getLayoutAttr() == expectedLayout;
+        });
+  }
+  return success();
+}
+
 using GetLayoutFnTy = function_ref<xegpu::DistributeLayoutAttr(Value)>;
 /// Update an operation with the layout of its results. If the result type is
 /// a vector type, a temporary layout attribute is added to the operation. If
@@ -1400,60 +1472,67 @@ LogicalResult xegpu::propagateLayouts(OpBuilder &builder, Operation *target,
   return success();
 }
 
-LogicalResult xegpu::resolveLayoutConflicts(OpBuilder &builder,
-                                            Operation *target) {
-  auto r = target->walk([&](xegpu::LoadNdOp loadNdOp) -> WalkResult {
-    // Load op has a conflict if tensor desc layout is different from the its
-    // result layout.
-    auto getResultLayout = [](OpResult result) {
-      auto resultLayoutName = xegpu::getTemporaryLayoutName(result);
-      return result.getOwner()->getAttrOfType<xegpu::DistributeLayoutAttr>(
-          resultLayoutName);
-    };
-    auto hasConflict = [&getResultLayout](xegpu::LoadNdOp loadNdOp) -> bool {
-      auto tdescType = loadNdOp.getTensorDescType();
-      auto tdescLayout = tdescType.getLayout();
-      auto resultLayoutName =
-          xegpu::getTemporaryLayoutName(loadNdOp->getOpResult(0));
-      auto resultLayout = getResultLayout(loadNdOp->getOpResult(0));
-      return tdescLayout && resultLayout && tdescLayout != resultLayout;
-    };
-    if (hasConflict(loadNdOp)) {
-      OpBuilder builder(loadNdOp);
-      // Try to get the defining createNdDesc op.
-      auto createNdOp =
-          loadNdOp.getTensorDesc().getDefiningOp<xegpu::CreateNdDescOp>();
-      if (!createNdOp) {
-        DBGS() << "Failed to resolve LoadNdOp layout conflict: " << *loadNdOp
-               << "\n";
-        return WalkResult::interrupt();
-      }
-
-      builder.setInsertionPointAfter(createNdOp);
-      auto tdescType = loadNdOp.getTensorDescType();
-      auto expectedLayout = getResultLayout(loadNdOp->getOpResult(0));
-      auto newTensorDescType = xegpu::TensorDescType::get(
-          createNdOp.getContext(), tdescType.getShape(),
-          tdescType.getElementType(), tdescType.getEncoding(), expectedLayout);
-      auto newOp = xegpu::CreateNdDescOp::create(
-          builder, loadNdOp.getLoc(), newTensorDescType,
-          createNdOp->getOperands(), createNdOp->getAttrs());
-      // Replace only the conflicting uses of the createNdOp that can be
-      // resolved using the new layout.
-      createNdOp->replaceUsesWithIf(
-          ArrayRef<Value>(newOp.getResult()), [&](OpOperand &opnd) {
-            auto userLoadNdOp = dyn_cast<xegpu::LoadNdOp>(opnd.getOwner());
-            if (!userLoadNdOp)
-              return false;
-            auto resultLayout = getResultLayout(userLoadNdOp->getOpResult(0));
-            return hasConflict(userLoadNdOp) && resultLayout == expectedLayout;
-          });
-    }
-    return WalkResult::advance();
-  });
-  if (r.wasInterrupted())
-    return failure();
-  return success();
+// LogicalResult xegpu::resolveLayoutConflicts(OpBuilder &builder,
+//                                             Operation *target) {
+//   auto r = target->walk([&](xegpu::LoadNdOp loadNdOp) -> WalkResult {
+//     // Load op has a conflict if tensor desc layout is different from the its
+//     // result layout.
+//     auto getResultLayout = [](OpResult result) {
+//       auto resultLayoutName = xegpu::getTemporaryLayoutName(result);
+//       return result.getOwner()->getAttrOfType<xegpu::DistributeLayoutAttr>(
+//           resultLayoutName);
+//     };
+//     auto hasConflict = [&getResultLayout](xegpu::LoadNdOp loadNdOp) -> bool {
+//       auto tdescType = loadNdOp.getTensorDescType();
+//       auto tdescLayout = tdescType.getLayout();
+//       auto resultLayoutName =
+//           xegpu::getTemporaryLayoutName(loadNdOp->getOpResult(0));
+//       auto resultLayout = getResultLayout(loadNdOp->getOpResult(0));
+//       return tdescLayout && resultLayout && tdescLayout != resultLayout;
+//     };
+//     if (hasConflict(loadNdOp)) {
+//       OpBuilder builder(loadNdOp);
+//       // Try to get the defining createNdDesc op.
+//       auto createNdOp =
+//           loadNdOp.getTensorDesc().getDefiningOp<xegpu::CreateNdDescOp>();
+//       if (!createNdOp) {
+//         DBGS() << "Failed to resolve LoadNdOp layout conflict: " << *loadNdOp
+//                << "\n";
+//         return WalkResult::interrupt();
+//       }
+
+//       builder.setInsertionPointAfter(createNdOp);
+//       auto tdescType = loadNdOp.getTensorDescType();
+//       auto expectedLayout = getResultLayout(loadNdOp->getOpResult(0));
+//       auto newTensorDescType = xegpu::TensorDescType::get(
+//           createNdOp.getContext(), tdescType.getShape(),
+//           tdescType.getElementType(), tdescType.getEncoding(),
+//           expectedLayout);
+//       auto newOp = xegpu::CreateNdDescOp::create(
+//           builder, loadNdOp.getLoc(), newTensorDescType,
+//           createNdOp->getOperands(), createNdOp->getAttrs());
+//       // Replace only the conflicting uses of the createNdOp that can be
+//       // resolved using the new layout.
+//       createNdOp->replaceUsesWithIf(
+//           ArrayRef<Value>(newOp.getResult()), [&](OpOperand &opnd) {
+//             auto userLoadNdOp = dyn_cast<xegpu::LoadNdOp>(opnd.getOwner());
+//             if (!userLoadNdOp)
+//               return false;
+//             auto resultLayout =
+//             getResultLayout(userLoadNdOp->getOpResult(0)); return
+//             hasConflict(userLoadNdOp) && resultLayout == expectedLayout;
+//           });
+//     }
+//     return WalkResult::advance();
+//   });
+//   if (r.wasInterrupted())
+//     return failure();
+//   return success();
+// }
+
+LogicalResult xegpu::resolveLayoutConflicts(Operation *target) {
+  ResolveLayoutConflicts resolver(target);
+  return resolver.run();
 }
 
 void XeGPUPropagateLayoutPass::runOnOperation() {
@@ -1476,4 +1555,9 @@ void XeGPUPropagateLayoutPass::runOnOperation() {
     signalPassFailure();
     return;
   }
+  // Resolve layout conflicts if any.
+  if (failed(xegpu::resolveLayoutConflicts(getOperation()))) {
+    signalPassFailure();
+    return;
+  }
 }
diff --git a/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir b/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir
new file mode 100644
index 0000000000000..dd3f3c8bdc29e
--- /dev/null
+++ b/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir
@@ -0,0 +1,23 @@
+// RUN: mlir-opt --test-xegpu-resolve-layout-conflicts -split-input-file %s | FileCheck %s
+
+#load_lo = #xegpu.layout<inst_data = [8, 16]>
+#prefetch_lo = #xegpu.layout<inst_data = [16, 16]>
+gpu.module @test {
+
+// CHECK-LABEL:   func.func @load_nd_with_conflicting_tensor_desc
+// CHECK:           %{{.*}} = xegpu.create_nd_tdesc %{{.*}} : memref<64x64xf16>
+// CHECK-SAME:        -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<inst_data = [16, 16]>>
+// CHECK-NEXT:      %[[T1:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<64x64xf16>
+// CHECK-SAME:        -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<inst_data = [8, 16]>>
+// CHECK-NEXT:      %{{.*}} = xegpu.load_nd %[[T1]][%{{.*}}, %{{.*}}] <{layout = #xegpu.layout<inst_data = [8, 16]>}> :
+// CHECK-SAME:        !xegpu.tensor_desc<16x16xf16, #xegpu.layout<inst_data = [8, 16]>> -> vector<16x16xf16>
+func.func @load_nd_with_conflicting_tensor_desc(%arg0: memref<64x64xf16>) -> vector<16x16xf16> {
+  %c0 = arith.constant 0 : index
+  %0 = xegpu.create_nd_tdesc %arg0 : memref<64x64xf16>
+    -> !xegpu.tensor_desc<16x16xf16, #prefetch_lo>
+  %1 = xegpu.load_nd %0 [%c0, %c0] {layout = #load_lo} : !xegpu.tensor_desc<16x16xf16, #prefetch_lo>
+    -> vector<16x16xf16>
+  xegpu.prefetch_nd %0 [%c0, %c0] {layout = #prefetch_lo} : !xegpu.tensor_desc<16x16xf16, #prefetch_lo>
+  return %1 : vector<16x16xf16>
+}
+}
diff --git a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
index cfe8f32fdaf70..c8a6a6d7b8eb8 100644
--- a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
+++ b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
@@ -316,7 +316,36 @@ struct TestXeGPUPropagateLayouts
       signalPassFailure();
       return;
     }
-    if (xegpu::propagateLayouts(builder, getOperation(), kind).failed()) {
+    if (failed(xegpu::propagateLayouts(builder, getOperation(), kind))) {
+      signalPassFailure();
+    }
+  }
+};
+
+struct TestXeGPUResolveLayoutConflicts
+    : public PassWrapper<TestXeGPUResolveLayoutConflicts,
+                         OperationPass<gpu::GPUModuleOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestXeGPUResolveLayoutConflicts)
+
+  StringRef getArgument() const final {
+    return "test-xegpu-resolve-layout-conflicts";
+  }
+
+  StringRef getDescription() const final {
+    return "Test the implementation of XeGPU layout conflict resolution.";
+  }
+
+  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
+    registry.insert<xegpu::XeGPUDialect>();
+    registry.insert<gpu::GPUDialect>();
+  }
+
+  TestXeGPUResolveLayoutConflicts() = default;
+  TestXeGPUResolveLayoutConflicts(const TestXeGPUResolveLayoutConflicts &pass) =
+      default;
+
+  void runOnOperation() override {
+    if (failed(xegpu::resolveLayoutConflicts(getOperation()))) {
       signalPassFailure();
     }
   }
@@ -387,6 +416,8 @@ void registerTestXeGPULowerings() {
   PassRegistration<TestXeGPULayoutInterface>();
   PassRegistration<TestXeGPUSGDistribute>();
   PassRegistration<TestXeGPUMoveFuncBodyToWarpOp>();
+  PassRegistration<TestXeGPUPropagateLayouts>();
+  PassRegistration<TestXeGPUResolveLayoutConflicts>();
 }
 } // namespace test
 } // namespace mlir

>From 5d2a2c5f4afb274c37e10bfc0fecb8afbc0a22c9 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Fri, 19 Dec 2025 19:34:32 +0000
Subject: [PATCH 4/8] save work

---
 .../test/Dialect/XeGPU/propagate-layout-inst-data.mlir |  2 +-
 mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir |  2 +-
 mlir/test/Dialect/XeGPU/propagate-layout.mlir          | 10 +++++-----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir b/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir
index 5f70831f45e97..5e095fe0df89e 100644
--- a/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir
+++ b/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -xevm-attach-target='chip=pvc' -xegpu-propagate-layout="layout-kind=inst" -split-input-file %s | FileCheck %s
+// RUN: mlir-opt -xevm-attach-target='chip=pvc' -test-xegpu-propagate-layouts="layout-kind=inst" -split-input-file %s | FileCheck %s
 
 
 // CHECK-LABEL: func.func @load_store_no_array_len(
diff --git a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
index 092a4cf442782..7675c44be1c61 100644
--- a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
+++ b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -xevm-attach-target='chip=pvc' -xegpu-propagate-layout="layout-kind=subgroup" -split-input-file %s | FileCheck %s
+// RUN: mlir-opt -xevm-attach-target='chip=pvc' -test-xegpu-propagate-layouts="layout-kind=subgroup" -split-input-file %s | FileCheck %s
 
 gpu.module @test {
   // CHECK-LABEL: store_nd
diff --git a/mlir/test/Dialect/XeGPU/propagate-layout.mlir b/mlir/test/Dialect/XeGPU/propagate-layout.mlir
index b88d8e1a78a26..3e7f3d5156d62 100644
--- a/mlir/test/Dialect/XeGPU/propagate-layout.mlir
+++ b/mlir/test/Dialect/XeGPU/propagate-layout.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -xevm-attach-target='chip=pvc' -xegpu-propagate-layout="layout-kind=lane" -split-input-file %s | FileCheck %s
+// RUN: mlir-opt -xevm-attach-target='chip=pvc' -test-xegpu-propagate-layouts="layout-kind=lane" -split-input-file %s | FileCheck %s
 
 gpu.module @test {
 // CHECK-LABEL: func.func @dpas_f16(
@@ -32,7 +32,7 @@ func.func @dpas_f16(%arg0: memref<8x16xf16>, %arg1: memref<16x16xf16>, %arg2: me
 gpu.module @test {
 // CHECK-LABEL: func.func @dpas_i8(
 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: vector<8x32xi8>, %[[ARG1:[0-9a-zA-Z]+]]: vector<32x16xi8>, %[[ARG2:[0-9a-zA-Z]+]]: memref<8x16xi32>) {
-// CHECK: %[[T0:.*]] = xegpu.dpas %[[ARG0]], %[[ARG1]] {layout_a = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 2]>, layout_b = #xegpu.layout<lane_layout = [1, 16], lane_data = [4, 1]>, layout_cd = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} 
+// CHECK: %[[T0:.*]] = xegpu.dpas %[[ARG0]], %[[ARG1]] {layout_a = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 2]>, layout_b = #xegpu.layout<lane_layout = [1, 16], lane_data = [4, 1]>, layout_cd = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}
 
 func.func @dpas_i8(%arg0: vector<8x32xi8>, %arg1: vector<32x16xi8>, %arg2: memref<8x16xi32>) {
   %c0 = arith.constant 0 : index
@@ -109,7 +109,7 @@ gpu.module @test {
 // CHECK-NEXT: %[[CST0:.*]] = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} dense<true> : vector<16xi1>
 // CHECK-NEXT: %[[T2:.*]] = xegpu.create_tdesc %[[ARG1]], %[[CST]] : memref<256xf16>, vector<16xindex> ->
 // CHECK-SAME: !xegpu.tensor_desc<16x16xf16, #xegpu.scatter_tdesc_attr<chunk_size = 16 : i64>, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2]>>
-// CHECK-NEXT: %{{.*}} = xegpu.load %[[T2]], %[[CST0]]  <{layout = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2]>}> 
+// CHECK-NEXT: %{{.*}} = xegpu.load %[[T2]], %[[CST0]]  <{layout = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2]>}>
 // CHECK-SAME: !xegpu.tensor_desc<16x16xf16, #xegpu.scatter_tdesc_attr<chunk_size = 16 : i64>, #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 2]>>, vector<16xi1> -> vector<16x16xf16>
 func.func @load_gather_with_chunksize(%arg0: memref<8x16xf16>, %arg1: memref<256xf16>, %arg2: memref<8x16xf32>) {
   %c0 = arith.constant 0 : index
@@ -240,7 +240,7 @@ gpu.module @test {
 // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<256xf16>) {
 // CHECK: %[[MASK:.*]] = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [8], lane_data = [1]>} dense<true> : vector<16xi1>
 // CHECK: %[[OFFSETS:.*]] = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [8], lane_data = [1]>} dense<12> : vector<16xindex>
-// CHECK: %[[LOAD_VEC:.*]] = xegpu.load %[[ARG0]][%[[OFFSETS]]], %[[MASK]] 
+// CHECK: %[[LOAD_VEC:.*]] = xegpu.load %[[ARG0]][%[[OFFSETS]]], %[[MASK]]
 // CHECK-SAME: memref<256xf16>, vector<16xindex>, vector<16xi1> -> vector<16xf16>
 // CHECK: %[[ADD_RES:.*]] = arith.addf %[[LOAD_VEC]], %[[LOAD_VEC]] {layout_result_0 = #xegpu.layout<lane_layout = [8], lane_data = [1]>} : vector<16xf16>
 // CHECK: xegpu.store %[[ADD_RES]], %[[ARG0]][%[[OFFSETS]]], %[[MASK]]
@@ -697,4 +697,4 @@ func.func @vector_broadcast_scalar_to_vector(%arg0: !xegpu.tensor_desc<16x16xf16
   xegpu.store_nd %6, %arg0  : vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16>
   return
 }
-}
\ No newline at end of file
+}

>From 23bb87cbdd69610d83f54449fdd980583f2ee527 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Fri, 19 Dec 2025 19:51:21 +0000
Subject: [PATCH 5/8] save work

---
 .../XeGPU/Transforms/XeGPUPropagateLayout.cpp | 60 +------------------
 1 file changed, 1 insertion(+), 59 deletions(-)

diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
index c8138a4d16016..9c35b14be0bd5 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
@@ -1197,7 +1197,7 @@ struct ResolveLayoutConflicts {
   LogicalResult resolveLoadNdOp(xegpu::LoadNdOp loadNdOp);
 };
 
-}; // namespace
+} // namespace
 
 LogicalResult ResolveLayoutConflicts::run() {
   auto r = parentOp->walk([&](Operation *op) -> WalkResult {
@@ -1472,64 +1472,6 @@ LogicalResult xegpu::propagateLayouts(OpBuilder &builder, Operation *target,
   return success();
 }
 
-// LogicalResult xegpu::resolveLayoutConflicts(OpBuilder &builder,
-//                                             Operation *target) {
-//   auto r = target->walk([&](xegpu::LoadNdOp loadNdOp) -> WalkResult {
-//     // Load op has a conflict if tensor desc layout is different from the its
-//     // result layout.
-//     auto getResultLayout = [](OpResult result) {
-//       auto resultLayoutName = xegpu::getTemporaryLayoutName(result);
-//       return result.getOwner()->getAttrOfType<xegpu::DistributeLayoutAttr>(
-//           resultLayoutName);
-//     };
-//     auto hasConflict = [&getResultLayout](xegpu::LoadNdOp loadNdOp) -> bool {
-//       auto tdescType = loadNdOp.getTensorDescType();
-//       auto tdescLayout = tdescType.getLayout();
-//       auto resultLayoutName =
-//           xegpu::getTemporaryLayoutName(loadNdOp->getOpResult(0));
-//       auto resultLayout = getResultLayout(loadNdOp->getOpResult(0));
-//       return tdescLayout && resultLayout && tdescLayout != resultLayout;
-//     };
-//     if (hasConflict(loadNdOp)) {
-//       OpBuilder builder(loadNdOp);
-//       // Try to get the defining createNdDesc op.
-//       auto createNdOp =
-//           loadNdOp.getTensorDesc().getDefiningOp<xegpu::CreateNdDescOp>();
-//       if (!createNdOp) {
-//         DBGS() << "Failed to resolve LoadNdOp layout conflict: " << *loadNdOp
-//                << "\n";
-//         return WalkResult::interrupt();
-//       }
-
-//       builder.setInsertionPointAfter(createNdOp);
-//       auto tdescType = loadNdOp.getTensorDescType();
-//       auto expectedLayout = getResultLayout(loadNdOp->getOpResult(0));
-//       auto newTensorDescType = xegpu::TensorDescType::get(
-//           createNdOp.getContext(), tdescType.getShape(),
-//           tdescType.getElementType(), tdescType.getEncoding(),
-//           expectedLayout);
-//       auto newOp = xegpu::CreateNdDescOp::create(
-//           builder, loadNdOp.getLoc(), newTensorDescType,
-//           createNdOp->getOperands(), createNdOp->getAttrs());
-//       // Replace only the conflicting uses of the createNdOp that can be
-//       // resolved using the new layout.
-//       createNdOp->replaceUsesWithIf(
-//           ArrayRef<Value>(newOp.getResult()), [&](OpOperand &opnd) {
-//             auto userLoadNdOp = dyn_cast<xegpu::LoadNdOp>(opnd.getOwner());
-//             if (!userLoadNdOp)
-//               return false;
-//             auto resultLayout =
-//             getResultLayout(userLoadNdOp->getOpResult(0)); return
-//             hasConflict(userLoadNdOp) && resultLayout == expectedLayout;
-//           });
-//     }
-//     return WalkResult::advance();
-//   });
-//   if (r.wasInterrupted())
-//     return failure();
-//   return success();
-// }
-
 LogicalResult xegpu::resolveLayoutConflicts(Operation *target) {
   ResolveLayoutConflicts resolver(target);
   return resolver.run();

>From c1e98457fe74fd845b38941d60920cda343c8a56 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Mon, 26 Jan 2026 22:47:19 +0000
Subject: [PATCH 6/8] address comments

---
 .../XeGPU/Transforms/XeGPUPropagateLayout.cpp | 114 ++++++++++++------
 .../XeGPU/resolve-layout-conflicts.mlir       |  58 +++++++++
 2 files changed, 132 insertions(+), 40 deletions(-)

diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
index 0e890a002ad04..878dc5486f24c 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
@@ -26,6 +26,7 @@
 #include "mlir/IR/Visitors.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/FunctionInterfaces.h"
+#include "mlir/Interfaces/LoopLikeInterface.h"
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
@@ -810,7 +811,7 @@ void LayoutInfoPropagation::visitDpasOp(
       }
       instDataCD = {maxALen, maxCLen};
     }
-    if (layoutKind == LayoutKind::InstData) {
+    if (layoutKind == xegpu::LayoutKind::InstData) {
       dpasALayout =
           LayoutInfo(xegpu::LayoutAttr::get(dpas.getContext(), instDataA));
       dpasBLayout =
@@ -819,7 +820,7 @@ void LayoutInfoPropagation::visitDpasOp(
         dpasCDLayout =
             LayoutInfo(xegpu::LayoutAttr::get(dpas.getContext(), instDataCD));
       }
-    } else if (layoutKind == LayoutKind::Lane) {
+    } else if (layoutKind == xegpu::LayoutKind::Lane) {
       dpasALayout = getSIMTLayoutInfoForDPASOperand(
           aTy, 0, uArch, uArchInstruction->getPackedFormatBitSizeA());
       dpasBLayout = getSIMTLayoutInfoForDPASOperand(
@@ -981,7 +982,7 @@ void LayoutInfoPropagation::visitStoreNdOp(
     if (layoutKind == xegpu::LayoutKind::InstData)
       storeLayout =
           LayoutInfo(xegpu::LayoutAttr::get(dataTy.getContext(), instData));
-    else if (layoutKind == LayoutKind::Lane)
+    else if (layoutKind == xegpu::LayoutKind::Lane)
       storeLayout =
           getSIMTLayoutInfoBlockIO(store.getValueType(), uArch,
                                    uArchInstruction->getPackedFormatBitSize());
@@ -1171,7 +1172,7 @@ void LayoutInfoPropagation::visitLoadGatherOp(
             uArch->getInstruction(xegpu::uArch::InstructionKind::LoadGather));
 
     // Check if value inst_data complies with uArch
-    if (layoutKind == LayoutKind::InstData) {
+    if (layoutKind == xegpu::LayoutKind::InstData) {
       // Each lane loads either one element
       SmallVector<int> instDataUarch{subgroupSize};
       // Or multiple elements as 2D with lane's elements in the inner dimension
@@ -1213,10 +1214,10 @@ void LayoutInfoPropagation::visitLoadGatherOp(
   // Rank >1 data: Enforce the default xegpu 1D layout for mask.
   if (!hasParamsOfLayoutKind(anchorLayout) ||
       load.getValueType().getRank() > 1) {
-    if (layoutKind == LayoutKind::InstData)
+    if (layoutKind == xegpu::LayoutKind::InstData)
       maskLayout = LayoutInfo(
           xegpu::LayoutAttr::get(load->getContext(), {subgroupSize}));
-    else if (layoutKind == LayoutKind::Lane)
+    else if (layoutKind == xegpu::LayoutKind::Lane)
       maskLayout =
           getDefaultSIMTLayoutInfo(load->getContext(), 1, subgroupSize);
   }
@@ -1271,7 +1272,7 @@ void LayoutInfoPropagation::visitStoreScatterOp(
       return;
     }
 
-    if (layoutKind == LayoutKind::InstData) {
+    if (layoutKind == xegpu::LayoutKind::InstData) {
       const auto *uArchInstruction =
           dyn_cast<xegpu::uArch::StoreScatterInstruction>(uArch->getInstruction(
               xegpu::uArch::InstructionKind::StoreScatter));
@@ -1308,10 +1309,10 @@ void LayoutInfoPropagation::visitStoreScatterOp(
   // Rank >1 data: Enforce the default xegpu 1D layout for mask.
   if (!hasParamsOfLayoutKind(anchorLayout) ||
       storeScatter.getValueType().getRank() > 1) {
-    if (layoutKind == LayoutKind::InstData)
+    if (layoutKind == xegpu::LayoutKind::InstData)
       maskLayout = LayoutInfo(
           xegpu::LayoutAttr::get(storeScatter->getContext(), {subgroupSize}));
-    else if (layoutKind == LayoutKind::Lane)
+    else if (layoutKind == xegpu::LayoutKind::Lane)
       maskLayout =
           getDefaultSIMTLayoutInfo(storeScatter->getContext(), 1, subgroupSize);
   }
@@ -1345,7 +1346,7 @@ void LayoutInfoPropagation::visitStoreMatrixOp(
     assert(payloadTy.getRank() == 2 && "Expecting 2D vector for store matrix.");
     auto uArch = getUArch(getChipStr(storeMatrix).value_or(""));
     SmallVector<int> instData = {1, uArch->getSubgroupSize()};
-    if (layoutKind == LayoutKind::InstData)
+    if (layoutKind == xegpu::LayoutKind::InstData)
       layout = LayoutInfo(
           xegpu::LayoutAttr::get(storeMatrix.getContext(), instData));
     else
@@ -1453,60 +1454,93 @@ struct ResolveLayoutConflicts {
 private:
   Operation *parentOp;
   OpBuilder builder;
-  LogicalResult resolveLoadNdOp(xegpu::LoadNdOp loadNdOp);
+  LogicalResult resolveTensorDescConsumer(OpOperand &operand);
+  LogicalResult resolveVectorConsumer(OpOperand &operand);
 };
 
 } // namespace
 
 LogicalResult ResolveLayoutConflicts::run() {
+  // Helper function to get the tensor descriptor operand, or null if none found
+  // TODO: We assume only one tensor descriptor operand per op.
+  auto getTensorDescOperand = [](Operation *op) -> OpOperand * {
+    for (OpOperand &opnd : op->getOpOperands()) {
+      if (isa<xegpu::TensorDescType>(opnd.get().getType())) {
+        return &opnd;
+      }
+    }
+    return nullptr;
+  };
+
   auto r = parentOp->walk([&](Operation *op) -> WalkResult {
-    TypeSwitch<Operation *>(op).Case([&](xegpu::LoadNdOp loadNdOp) {
-      return failed(resolveLoadNdOp(loadNdOp)) ? WalkResult::interrupt()
-                                               : WalkResult::advance();
-    });
-    // TODO: Add other layout conflict resolution methods as needed.
+    // Check if this op is an anchor op and at least one operand is a tensor
+    // descriptor type.
+    OpOperand *tdescOperand = getTensorDescOperand(op);
+    if (isa<xegpu::AnchorLayoutInterface>(op) && tdescOperand) {
+      auto res = resolveTensorDescConsumer(*tdescOperand);
+      return succeeded(res) ? WalkResult::advance() : WalkResult::interrupt();
+    }
     return WalkResult::advance();
   });
 
   return r.wasInterrupted() ? failure() : success();
 }
 
-/// LoadNd has a conflict if the tensor descriptor layout is different from the
-/// load's anchor layout.
-LogicalResult
-ResolveLayoutConflicts::resolveLoadNdOp(xegpu::LoadNdOp loadNdOp) {
-  Attribute anchorLayout = loadNdOp.getLayoutAttr();
-  Attribute tdescLayout = loadNdOp.getTensorDescType().getLayout();
+/// Helper to get the defining CreateNdDescOp of a tensor descriptor value. This
+/// function tries to find the defining CreateNdDescOp recursively accross
+/// control-flow boundaries.
+static xegpu::CreateNdDescOp getDefiningCreateNdDescOp(Value tdescValue) {
+  // Try to get the defining CreateNdDescOp of the tensor descriptor.
+  auto definingOp = tdescValue.getDefiningOp<xegpu::CreateNdDescOp>();
+  if (definingOp)
+    return definingOp;
+  // If tdescValue is an argument, try to get the tied init value from the
+  // parent loop-like op.
+  if (auto arg = dyn_cast<BlockArgument>(tdescValue)) {
+    auto *parentOp = arg.getOwner()->getParentOp();
+    if (auto loop = dyn_cast<LoopLikeOpInterface>(parentOp)) {
+      OpOperand *tiedInit = loop.getTiedLoopInit(arg);
+      if (tiedInit)
+        return getDefiningCreateNdDescOp(tiedInit->get());
+    }
+  }
+  // If not found, return null.
+  return nullptr;
+}
 
-  if (anchorLayout && tdescLayout && anchorLayout != tdescLayout) {
+LogicalResult
+ResolveLayoutConflicts::resolveTensorDescConsumer(OpOperand &operand) {
+  Operation *consumerOp = operand.getOwner();
+  Value tdescValue = operand.get();
+  auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(consumerOp);
+  auto currTDescType = dyn_cast<xegpu::TensorDescType>(tdescValue.getType());
+  assert(anchorOp && currTDescType &&
+         "Expected anchor layout op and tensor descriptor consumer.");
+  Attribute currLayout = currTDescType.getLayout();
+  Attribute expectedLayout = anchorOp.getAnchorLayout();
+  // A conflict exists in tensot descriptor operand if tensor descriptor's
+  // layout is different from the anchor layout expected by the consumer.
+  if (expectedLayout && currLayout && expectedLayout != currLayout) {
     // Try to get the defining CreateNdDescOp of the tensor descriptor.
-    auto conflictingCreateNdOp =
-        loadNdOp.getTensorDesc().getDefiningOp<xegpu::CreateNdDescOp>();
+    auto conflictingCreateNdOp = getDefiningCreateNdDescOp(tdescValue);
     if (!conflictingCreateNdOp) {
       DBGS() << "Unable to find defining CreateNdDescOp for tensor descriptor: "
-             << loadNdOp.getTensorDesc() << "\n";
+             << tdescValue << "\n";
       return failure();
     }
     // Duplicate the CreateNdDescOp with the expected layout.
     builder.setInsertionPointAfter(conflictingCreateNdOp);
-    xegpu::TensorDescType tdescType = loadNdOp.getTensorDescType();
-    auto expectedLayout = anchorLayout;
     auto newTensorDescType = xegpu::TensorDescType::get(
-        conflictingCreateNdOp.getContext(), tdescType.getShape(),
-        tdescType.getElementType(), tdescType.getEncoding(), expectedLayout);
+        conflictingCreateNdOp.getContext(), currTDescType.getShape(),
+        currTDescType.getElementType(), currTDescType.getEncoding(),
+        expectedLayout);
     xegpu::CreateNdDescOp newOp = xegpu::CreateNdDescOp::create(
-        builder, loadNdOp.getLoc(), newTensorDescType,
+        builder, consumerOp->getLoc(), newTensorDescType,
         conflictingCreateNdOp->getOperands(),
         conflictingCreateNdOp->getAttrs());
-    // Replace only the conflicting uses of the createNdOp that can be
-    // resolved using the new layout.
-    conflictingCreateNdOp->replaceUsesWithIf(
-        ArrayRef<Value>(newOp.getResult()), [&](OpOperand &opnd) {
-          auto userLoadNdOp = dyn_cast<xegpu::LoadNdOp>(opnd.getOwner());
-          if (!userLoadNdOp)
-            return false;
-          return userLoadNdOp.getLayoutAttr() == expectedLayout;
-        });
+    // Replace the tensor descriptor operand in the consumer op with the new
+    // tensor descriptor.
+    consumerOp->replaceUsesOfWith(tdescValue, newOp.getResult());
   }
   return success();
 }
diff --git a/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir b/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir
index dd3f3c8bdc29e..d1dbe8bcff509 100644
--- a/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir
+++ b/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir
@@ -2,6 +2,7 @@
 
 #load_lo = #xegpu.layout<inst_data = [8, 16]>
 #prefetch_lo = #xegpu.layout<inst_data = [16, 16]>
+#load_lo1 = #xegpu.layout<inst_data = [32, 16]>
 gpu.module @test {
 
 // CHECK-LABEL:   func.func @load_nd_with_conflicting_tensor_desc
@@ -20,4 +21,61 @@ func.func @load_nd_with_conflicting_tensor_desc(%arg0: memref<64x64xf16>) -> vec
   xegpu.prefetch_nd %0 [%c0, %c0] {layout = #prefetch_lo} : !xegpu.tensor_desc<16x16xf16, #prefetch_lo>
   return %1 : vector<16x16xf16>
 }
+
+// CHECK-LABEL:   func.func @multiple_tensor_desc_conflicts
+// CHECK:           %[[C0:.*]] = arith.constant 0 : index
+// CHECK-NEXT:      %[[T0:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<64x64xf16>
+// CHECK-SAME:        -> !xegpu.tensor_desc<32x16xf16, #xegpu.layout<inst_data = [8, 16]>>
+// CHECK-NEXT:      %[[T1:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<64x64xf16>
+// CHECK-SAME:        -> !xegpu.tensor_desc<32x16xf16, #xegpu.layout<inst_data = [16, 16]>>
+// CHECK-NEXT:      %[[T2:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<64x64xf16>
+// CHECK-SAME:        -> !xegpu.tensor_desc<32x16xf16, #xegpu.layout<inst_data = [32, 16]>>
+// CHECK-NEXT:      %{{.*}} = xegpu.load_nd %[[T0]][%[[C0]], %[[C0]]] <{layout = #xegpu.layout<inst_data = [8, 16]>}> :
+// CHECK-SAME:        !xegpu.tensor_desc<32x16xf16, #xegpu.layout<inst_data = [8, 16]>> -> vector<32x16xf16>
+// CHECK-NEXT:      %{{.*}} = xegpu.load_nd %[[T2]][%[[C0]], %[[C0]]] <{layout = #xegpu.layout<inst_data = [32, 16]>}> :
+// CHECK-SAME:        !xegpu.tensor_desc<32x16xf16, #xegpu.layout<inst_data = [32, 16]>> -> vector<32x16xf16>
+// CHECK-NEXT:      xegpu.prefetch_nd %[[T1]][%[[C0]], %[[C0]]] <{layout = #xegpu.layout<inst_data = [16, 16]>}> :
+// CHECK-SAME:        !xegpu.tensor_desc<32x16xf16, #xegpu.layout<inst_data = [16, 16]>>
+func.func @multiple_tensor_desc_conflicts(%arg0: memref<64x64xf16>) -> vector<32x16xf16> {
+  %c0 = arith.constant 0 : index
+  %tdesc1 = xegpu.create_nd_tdesc %arg0 : memref<64x64xf16>
+    -> !xegpu.tensor_desc<32x16xf16, #load_lo>
+  %load1 = xegpu.load_nd %tdesc1 [%c0, %c0] {layout = #load_lo} : !xegpu.tensor_desc<32x16xf16, #load_lo>
+    -> vector<32x16xf16>
+  %load2 = xegpu.load_nd %tdesc1 [%c0, %c0] {layout = #load_lo1} : !xegpu.tensor_desc<32x16xf16, #load_lo>
+    -> vector<32x16xf16>
+  xegpu.prefetch_nd %tdesc1 [%c0, %c0] {layout = #prefetch_lo} : !xegpu.tensor_desc<32x16xf16, #load_lo>
+  %result = arith.addf %load1, %load2 : vector<32x16xf16>
+  return %result : vector<32x16xf16>
+}
+
+// CHECK-LABEL:   func.func @load_nd_with_conflicting_tensor_desc_in_loop
+// CHECK:           %[[T0:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<64x64xf16>
+// CHECK-SAME:        -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<inst_data = [16, 16]>>
+// CHECK-NEXT:      %[[T1:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<64x64xf16>
+// CHECK-SAME:        -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<inst_data = [8, 16]>>
+// CHECK-NEXT:      %{{.*}}:2 = scf.for %{{.*}} = %{{.*}} iter_args(%{{.*}} = %{{.*}}, %{{.*}} = %[[T0]])
+// CHECK-SAME:        -> (vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16, #xegpu.layout<inst_data = [16, 16]>>) {
+// CHECK-NEXT:        %{{.*}} = xegpu.load_nd %[[T1]][%{{.*}}] <{layout = #xegpu.layout<inst_data = [8, 16]>}> :
+// CHECK-SAME:          !xegpu.tensor_desc<16x16xf16, #xegpu.layout<inst_data = [8, 16]>> -> vector<16x16xf16>
+// CHECK:             scf.yield %{{.*}}, %{{.*}} : vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16, #xegpu.layout<inst_data = [16, 16]>>
+// CHECK:           xegpu.prefetch_nd %[[T0]][%{{.*}}] <{layout = #xegpu.layout<inst_data = [16, 16]>}> :
+// CHECK-SAME:        !xegpu.tensor_desc<16x16xf16, #xegpu.layout<inst_data = [16, 16]>>
+// CHECK-NEXT:      return %{{.*}}#0 : vector<16x16xf16>
+func.func @load_nd_with_conflicting_tensor_desc_in_loop(%arg0: memref<64x64xf16>) -> vector<16x16xf16> {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c4 = arith.constant 4 : index
+  %cst = arith.constant dense<0.0> : vector<16x16xf16>
+  %0 = xegpu.create_nd_tdesc %arg0 : memref<64x64xf16>
+    -> !xegpu.tensor_desc<16x16xf16, #prefetch_lo>
+  %1:2 = scf.for %i = %c0 to %c4 step %c1 iter_args(%acc = %cst, %tdesc = %0) -> (vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16, #prefetch_lo>) {
+    %2 = xegpu.load_nd %tdesc [%c0, %c0] {layout = #load_lo} : !xegpu.tensor_desc<16x16xf16, #prefetch_lo>
+      -> vector<16x16xf16>
+    %3 = arith.addf %acc, %2 : vector<16x16xf16>
+    scf.yield %3, %tdesc : vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16, #prefetch_lo>
+  }
+  xegpu.prefetch_nd %0 [%c0, %c0] {layout = #prefetch_lo} : !xegpu.tensor_desc<16x16xf16, #prefetch_lo>
+  return %1#0 : vector<16x16xf16>
+}
 }

>From e1c2d6c4a8652aeebcf0b6e1d57bf211d52ae1ca Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Mon, 26 Jan 2026 23:05:06 +0000
Subject: [PATCH 7/8] fix

---
 .../XeGPU/Transforms/XeGPUPropagateLayout.cpp | 39 +++++++++++--------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
index 878dc5486f24c..809dce09ff519 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
@@ -1461,24 +1461,22 @@ struct ResolveLayoutConflicts {
 } // namespace
 
 LogicalResult ResolveLayoutConflicts::run() {
-  // Helper function to get the tensor descriptor operand, or null if none found
-  // TODO: We assume only one tensor descriptor operand per op.
-  auto getTensorDescOperand = [](Operation *op) -> OpOperand * {
-    for (OpOperand &opnd : op->getOpOperands()) {
-      if (isa<xegpu::TensorDescType>(opnd.get().getType())) {
-        return &opnd;
-      }
-    }
-    return nullptr;
-  };
-
+  // Scan all operations in the parent operation and resolve layout conflicts at
+  // tensor descriptor and vector use points.
   auto r = parentOp->walk([&](Operation *op) -> WalkResult {
-    // Check if this op is an anchor op and at least one operand is a tensor
-    // descriptor type.
-    OpOperand *tdescOperand = getTensorDescOperand(op);
-    if (isa<xegpu::AnchorLayoutInterface>(op) && tdescOperand) {
-      auto res = resolveTensorDescConsumer(*tdescOperand);
-      return succeeded(res) ? WalkResult::advance() : WalkResult::interrupt();
+    for (OpOperand &operand : op->getOpOperands()) {
+      // Handle conflicts in tensor descriptor operands.
+      Type operandType = operand.get().getType();
+      if (isa<xegpu::AnchorLayoutInterface>(op) &&
+          isa<xegpu::TensorDescType>(operandType)) {
+        auto res = resolveTensorDescConsumer(operand);
+        return succeeded(res) ? WalkResult::advance() : WalkResult::interrupt();
+      }
+      // Handle conflicts in vector operands.
+      if (isa<VectorType>(operandType)) {
+        auto res = resolveVectorConsumer(operand);
+        return succeeded(res) ? WalkResult::advance() : WalkResult::interrupt();
+      }
     }
     return WalkResult::advance();
   });
@@ -1508,6 +1506,13 @@ static xegpu::CreateNdDescOp getDefiningCreateNdDescOp(Value tdescValue) {
   return nullptr;
 }
 
+LogicalResult
+ResolveLayoutConflicts::resolveVectorConsumer(OpOperand &operand) {
+  // TODO: Implement vector consumer layout conflict resolution. Requires layout
+  // utilities.
+  return success();
+}
+
 LogicalResult
 ResolveLayoutConflicts::resolveTensorDescConsumer(OpOperand &operand) {
   Operation *consumerOp = operand.getOwner();

>From 416b3edf2eb4e4639ef0904354da1190f3f2b92b Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Tue, 27 Jan 2026 17:48:09 +0000
Subject: [PATCH 8/8] fix

---
 mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
index 809dce09ff519..7825d5e5a7923 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
@@ -1521,6 +1521,12 @@ ResolveLayoutConflicts::resolveTensorDescConsumer(OpOperand &operand) {
   auto currTDescType = dyn_cast<xegpu::TensorDescType>(tdescValue.getType());
   assert(anchorOp && currTDescType &&
          "Expected anchor layout op and tensor descriptor consumer.");
+  // TODO: Scattered tensor desc is not supported for now.
+  if (currTDescType.isScattered()) {
+    DBGS() << "Scattered tensor descriptor not supported: " << tdescValue
+           << "\n";
+    return failure();
+  }
   Attribute currLayout = currTDescType.getLayout();
   Attribute expectedLayout = anchorOp.getAnchorLayout();
   // A conflict exists in tensot descriptor operand if tensor descriptor's