[Mlir-commits] [mlir] 7856e98 - [mlir][XeGPU] Fix crash in getUArch when no chip target attribute is set (#183912)

Tue Mar 3 03:19:42 PST 2026

Author: Mehdi Amini
Date: 2026-03-03T12:19:38+01:00
New Revision: 7856e98768087e0381b2f700bed544324ecd0984

URL: https://github.com/llvm/llvm-project/commit/7856e98768087e0381b2f700bed544324ecd0984
DIFF: https://github.com/llvm/llvm-project/commit/7856e98768087e0381b2f700bed544324ecd0984.diff

LOG: [mlir][XeGPU] Fix crash in getUArch when no chip target attribute is set (#183912)

When running --xegpu-subgroup-distribute (or --xegpu-propagate-layout)
on a gpu.func that lacks a chip target attribute, getChipStr() returns
std::nullopt and the callers pass an empty string to getUArch(). This
function used llvm_unreachable for unrecognised architecture names,
causing an immediate abort instead of gracefully skipping the operation.

Fix by:
1. Changing getUArch() to return nullptr for unknown arch names instead
of calling llvm_unreachable.
2. Adding null-pointer guards to all callers in XeGPUPropagateLayout.cpp
that were missing them. The callers in XeGPUSubgroupDistribute.cpp
already had null checks but they were unreachable due to the unreachable
call.

Add a regression test that runs --xegpu-subgroup-distribute on a
gpu.func without any chip attribute and verifies it no longer crashes.

Fixes #181531
Fixes #179167

Added: 
    mlir/test/Dialect/XeGPU/xegpu-subgroup-distribute-no-arch.mlir

Modified: 
    mlir/include/mlir/Dialect/XeGPU/uArch/IntelGpuXe2.h
    mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
    mlir/lib/Dialect/XeGPU/Transforms/XeGPUSgToWiDistributeExperimental.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/XeGPU/uArch/IntelGpuXe2.h b/mlir/include/mlir/Dialect/XeGPU/uArch/IntelGpuXe2.h
index 721cb74823718..b36c6e3fee353 100644

--- a/mlir/include/mlir/Dialect/XeGPU/uArch/IntelGpuXe2.h
+++ b/mlir/include/mlir/Dialect/XeGPU/uArch/IntelGpuXe2.h
@@ -282,11 +282,8 @@ struct BMGuArch : public Xe2Plus {
 inline const uArch *getUArch(llvm::StringRef archName) {
   if (archName.equals_insensitive("pvc"))
     return PVCuArch::getInstance();
-  else if (archName.equals_insensitive("bmg"))
+  if (archName.equals_insensitive("bmg"))
     return BMGuArch::getInstance();
-  else
-    llvm_unreachable("No matching uArch found");
-
   return nullptr;
 }
 

diff  --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
index 1031e7e2a13e0..7f7e8d6ad7734 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
@@ -576,7 +576,9 @@ void LayoutInfoPropagation::visitPrefetchNdOp(
     // prefetch.
     auto tdescTy = prefetch.getTensorDescType();
 
-    const auto *uArch = getUArch(getChipStr(prefetch).value_or(""));
+    const uArch *uArch = getUArch(getChipStr(prefetch).value_or(""));
+    if (!uArch)
+      return;
     const auto *uArchInstruction =
         dyn_cast<xegpu::uArch::Subgroup2DBlockPrefetchInstruction>(
             uArch->getInstruction(
@@ -630,7 +632,9 @@ void LayoutInfoPropagation::visitVectorMultiReductionOp(
   VectorType sourceTy = reduction.getSourceVectorType();
   SmallVector<int64_t> reductionDims(reduction.getReductionDims());
 
-  const auto *uArch = getUArch(xegpu::getChipStr(reduction).value_or(""));
+  const uArch *uArch = getUArch(xegpu::getChipStr(reduction).value_or(""));
+  if (!uArch)
+    return;
   auto consumerLayoutAttr =
       dyn_cast<xegpu::DistributeLayoutAttr>(resLayoutInfo.get());
 
@@ -739,7 +743,9 @@ void LayoutInfoPropagation::visitDpasOp(
     dpasBLayout = LayoutInfo(anchorLayoutB);
     dpasCDLayout = LayoutInfo(anchorLayoutCD);
   } else {
-    const auto *uArch = getUArch(getChipStr(dpas).value_or(""));
+    const uArch *uArch = getUArch(getChipStr(dpas).value_or(""));
+    if (!uArch)
+      return;
     VectorType aTy = dpas.getLhsType();
     VectorType bTy = dpas.getRhsType();
     VectorType cdTy = dpas.getResultType();
@@ -795,7 +801,9 @@ void LayoutInfoPropagation::visitStoreNdOp(
   if (hasParamsOfLayoutKind(anchorLayout)) {
     storeLayout = LayoutInfo(anchorLayout);
   } else {
-    const auto *uArch = getUArch(getChipStr(store).value_or(""));
+    const uArch *uArch = getUArch(getChipStr(store).value_or(""));
+    if (!uArch)
+      return;
     const auto *uArchInstruction =
         dyn_cast<xegpu::uArch::Subgroup2DBlockStoreInstruction>(
             uArch->getInstruction(
@@ -924,7 +932,9 @@ void LayoutInfoPropagation::visitVectorBitcastOp(
 
   auto consumerLayoutAttr =
       dyn_cast<xegpu::DistributeLayoutAttr>(resLayoutInfo.get());
-  const auto *uArch = getUArch(xegpu::getChipStr(bitcast).value_or(""));
+  const uArch *uArch = getUArch(xegpu::getChipStr(bitcast).value_or(""));
+  if (!uArch)
+    return;
   auto requiredResLayoutAttr = setupBitCastResultLayout(
       layoutKind, srcVecType, resVecType, consumerLayoutAttr, uArch);
 
@@ -954,8 +964,10 @@ void LayoutInfoPropagation::visitInsertStridedSliceOp(
 
   auto consumerLayoutAttr =
       dyn_cast<xegpu::DistributeLayoutAttr>(resLayoutInfo.get());
-  const auto *uArch =
+  const uArch *uArch =
       getUArch(xegpu::getChipStr(insertStridedSlice).value_or(""));
+  if (!uArch)
+    return;
 
   auto requiredResLayoutAttr = xegpu::setupInsertStridedSliceResultLayout(
       layoutKind, srcVecType, resVecType, consumerLayoutAttr, uArch);
@@ -978,7 +990,9 @@ void LayoutInfoPropagation::visitLoadGatherOp(
     ArrayRef<const LayoutInfoLattice *> results) {
   xegpu::DistributeLayoutAttr requiredAnchorLayoutAttr;
   xegpu::DistributeLayoutAttr anchorLayoutAttr = load.getLayoutAttr();
-  const auto *uArch = getUArch(getChipStr(load).value_or(""));
+  const uArch *uArch = getUArch(getChipStr(load).value_or(""));
+  if (!uArch)
+    return;
   auto subgroupSize = uArch->getSubgroupSize();
   VectorType resVecTy = load.getValueType();
   int chunkSize = load.getChunkSize().value_or(1);
@@ -1037,7 +1051,9 @@ void LayoutInfoPropagation::visitCreateDescOp(
   // Need the layout of the descriptor to propagate to the operands.
   if (!descLayout.isAssigned())
     return;
-  const auto *uArch = getUArch(getChipStr(createDesc).value_or(""));
+  const uArch *uArch = getUArch(getChipStr(createDesc).value_or(""));
+  if (!uArch)
+    return;
   // For offset operand propagate 1D default layout.
   LayoutInfo layout = getDefaultSIMTLayoutInfo(createDesc->getContext(), 1,
                                                uArch->getSubgroupSize());
@@ -1052,7 +1068,9 @@ void LayoutInfoPropagation::visitStoreScatterOp(
 
   xegpu::DistributeLayoutAttr requiredAnchorLayoutAttr;
   xegpu::DistributeLayoutAttr anchorLayoutAttr = storeScatter.getLayoutAttr();
-  const auto *uArch = getUArch(getChipStr(storeScatter).value_or(""));
+  const uArch *uArch = getUArch(getChipStr(storeScatter).value_or(""));
+  if (!uArch)
+    return;
   auto subgroupSize = uArch->getSubgroupSize();
   VectorType srcVecTy = storeScatter.getValueType();
   int chunkSize = storeScatter.getChunkSize().value_or(1);
@@ -1117,7 +1135,9 @@ void LayoutInfoPropagation::visitLoadMatrixOp(
     VectorType resVecTy =
         llvm::cast<VectorType>(loadMatrixOp.getRes().getType());
     assert(resVecTy.getRank() == 2 && "Expecting 2D vector for store matrix.");
-    const auto *uArch = getUArch(getChipStr(loadMatrixOp).value_or(""));
+    const uArch *uArch = getUArch(getChipStr(loadMatrixOp).value_or(""));
+    if (!uArch)
+      return;
     auto requiredAnchorLayoutAttr = xegpu::setupLoadMatrixAnchorLayout(
         layoutKind, resVecTy, consumerLayoutAttr, uArch);
     loadMatrixOp.setLayoutAttr(requiredAnchorLayoutAttr);
@@ -1136,7 +1156,9 @@ void LayoutInfoPropagation::visitStoreMatrixOp(
     VectorType srcVecTy =
         llvm::cast<VectorType>(storeMatrix.getData().getType());
     assert(srcVecTy.getRank() == 2 && "Expecting 2D vector for store matrix.");
-    const auto *uArch = getUArch(getChipStr(storeMatrix).value_or(""));
+    const uArch *uArch = getUArch(getChipStr(storeMatrix).value_or(""));
+    if (!uArch)
+      return;
     auto requiredAnchorLayoutAttr =
         xegpu::setupStoreMatrixAnchorLayout(layoutKind, srcVecTy, uArch);
     storeMatrix.setLayoutAttr(requiredAnchorLayoutAttr);

diff  --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSgToWiDistributeExperimental.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSgToWiDistributeExperimental.cpp
index c1a35c93f9aa6..cffd80f8fcf92 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSgToWiDistributeExperimental.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSgToWiDistributeExperimental.cpp
@@ -522,7 +522,7 @@ struct SgToWiVectorReduction : public OpConversionPattern<vector::ReductionOp> {
 
     // Get the subgroup size from the layout.
     int64_t sgSize = layout.getEffectiveLaneLayoutAsInt()[0];
-    const auto *uArch = getUArch(xegpu::getChipStr(op).value_or(""));
+    const uArch *uArch = getUArch(xegpu::getChipStr(op).value_or(""));
     if (!uArch)
       return rewriter.notifyMatchFailure(
           op, "xegpu::ReductionOp require target attribute attached to "

diff  --git a/mlir/test/Dialect/XeGPU/xegpu-subgroup-distribute-no-arch.mlir b/mlir/test/Dialect/XeGPU/xegpu-subgroup-distribute-no-arch.mlir
new file mode 100644
index 0000000000000..c3fdd9c90ffd5
--- /dev/null
+++ b/mlir/test/Dialect/XeGPU/xegpu-subgroup-distribute-no-arch.mlir
@@ -0,0 +1,12 @@
+// RUN: mlir-opt --xegpu-subgroup-distribute -split-input-file %s | FileCheck %s
+// Regression test for https://github.com/llvm/llvm-project/issues/181531:
+// Running --xegpu-subgroup-distribute without a chip target attribute used to
+// call llvm_unreachable in getUArch(). The pass should now bail out gracefully.
+
+// CHECK-LABEL: gpu.func @no_crash_without_chip_attr
+// CHECK:       gpu.return
+gpu.module @test_module {
+  gpu.func @no_crash_without_chip_attr(%arg0: memref<8x16xf16>, %arg1: memref<8x16xf16>) {
+    gpu.return
+  }
+}