[Mlir-commits] [mlir] [MLIR][XeGPU] Remove create tdesc op from xegpu dialect (PR #182804)

Sun Feb 22 21:21:50 PST 2026

llvmbot wrote:




@llvm/pr-subscribers-mlir

Author: Nishant Patel (nbpatel)

<details>
<summary>Changes</summary>



---

Patch is 93.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/182804.diff


11 Files Affected:

- (modified) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td (+13-164) 
- (modified) mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp (-53) 
- (modified) mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp (+1-1) 
- (modified) mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp (-23) 
- (modified) mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp (+1-69) 
- (modified) mlir/test/Dialect/XeGPU/invalid.mlir (-337) 
- (modified) mlir/test/Dialect/XeGPU/ops.mlir (+1-287) 
- (modified) mlir/test/Dialect/XeGPU/propagate-layout.mlir (+2-34) 
- (modified) mlir/test/Dialect/XeGPU/xegpu-blocking.mlir (-149) 
- (modified) mlir/test/Dialect/XeGPU/xegpu-unroll-patterns.mlir (-201) 
- (modified) mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp (+1-3) 


``````````diff

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 6d21aa9295716..7aff11d56a82a 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -648,107 +648,6 @@ def XeGPU_UpdateNdOffsetOp : XeGPU_Op<"update_nd_offset",
   let hasVerifier = 1;
 }
 
-def XeGPU_CreateDescOp: XeGPU_Op<"create_tdesc", [Pure, ViewLikeOpInterface]> {
-  let summary = "create scattered tensor descriptors (TensorDesc).";
-  let description = [{
-    "create_tdesc" is similar to "create_nd_tdesc" in terms that it creates
-    a Tensor Descriptor (TensorDescType) for a memory region. While "create_nd_tdesc"
-    is for creating continuous subviews, "create_tdesc" is for creating non-continuous
-    (scattered) subviews, allowing each lane in a subgroup specifying their own offset.
-    It accepts the following parameters:
-
-    Arguments:
-
-    - `source`: a 1D memref or pointer (i64, i32, ui64, ui32) represents the flattened
-      memory object.
-
-    - `offsets`: a vector containing offsets of each access point. Its size
-      is fixed to the hardware supportted subgroup size, e.g., 16 on PVC,
-      implying each element in the vector corresponds to a SIMT lane in the subgroup.
-
-    Results:
-    - `res`: scattered tensor descriptor
-
-    The first dimension of the result TensorDesc corresponds to lanes, so it should
-    match the dimension of offsets. It may also has a second dimension corresponding to
-    the chunk_size if the chunk size is larger than 1.
-
-    Example 1: It assumes subgroup size is 4, and accesses a[0], a[16], a[32], a[64]
-    ```mlir
-    %a = memref.alloc() : memref<1024xf32>
-    %0 = arith.constant dense<[0, 16, 32, 64]> : vector<4xindex>
-    %1 = xegpu.create_tdesc %a, %0: memref<1024xf32>, vector<4xindex> -> TensorDesc<4xf32>
-    ```
-
-    Example 2: It assumes subgroup size is 4, and each workitem access 8 elements.
-               It will access totally 32 data elements: a[0:7], a[16:23], a[32:39], a[64:71]
-    ```mlir
-    %0 = memref.alloc() : memref<1024xf32>
-    %off = arith.constant dense<[0, 16, 32, 64]> : vector<4xindex>
-    %1 = xegpu.create_tdesc %0, %off : memref<1024xf32>, vector<4xindex>
-          -> TensorDesc<4x8xf32, #xegpu.scattered_tdesc_attr<chunk_size = 8>>
-    ```
-
-    Example 3: It is similar to Example 2, but there is some overlaps among workitems.
-               It accesses: a[0:7], a[4:11], a[8:15], a[12:19]
-    ```mlir
-    %0 = memref.alloc() : memref<1024xf32>
-    %off = arith.constant dense<[0, 4, 8, 12]> : vector<4xindex>
-    %1 = xegpu.create_tdesc %0, %off : memref<1024xf32>, vector<4xindex>
-          -> TensorDesc<4x8xf32, #xegpu.scattered_tdesc_attr<chunk_size = 8>>
-    ```
-  }];
-
-  let arguments = (ins XeGPU_GatherScatterBaseAddrType:$source,
-      XeGPU_OffsetType:$offsets);
-  let results = (outs XeGPU_TensorDesc:$TensorDesc);
-
-  let builders = [
-    OpBuilder<(ins "xegpu::TensorDescType": $TensorDesc, "mlir::Value": $source,
-                   "llvm::ArrayRef<OpFoldResult>": $offsets)>,
-    OpBuilder<(ins "xegpu::TensorDescType": $TensorDesc, "mlir::Value": $source,
-                   "llvm::ArrayRef<int64_t>": $offsets)>,
-  ];
-
-  let assemblyFormat = [{
-    $source `,` $offsets attr-dict `:`  type($source) `,` type($offsets) `->` qualified(type($TensorDesc))
-  }];
-
-  let extraClassDeclaration = [{
-    xegpu::TensorDescType getTensorDescType() {
-      return getTensorDesc().getType();
-    }
-
-    mlir::VectorType getOffsetsType() {
-      return getOffsets().getType();
-    }
-
-    size_t getNumOffsets() {
-      return getOffsetsType().getNumElements();
-    }
-
-    mlir::Value getViewSource() { return getSource(); }
-
-    unsigned getSourceMemorySpace() {
-      auto srcTy = getSource().getType();
-      if (auto memrefTy = llvm::dyn_cast<mlir::MemRefType>(srcTy)) {
-        auto attr = memrefTy.getMemorySpace();
-        if (attr) {
-          if (auto intAttr = llvm::dyn_cast<mlir::IntegerAttr>(attr))
-            return static_cast<unsigned>(intAttr.getInt());
-          if (auto memSpaceAttr = llvm::dyn_cast<MemorySpaceAttr>(attr))
-            return static_cast<unsigned>(memSpaceAttr.getValue());
-        }
-      }
-      // take global as default memory scope.
-      return static_cast<unsigned>(MemorySpace::Global);
-    }
-
-  }];
-
-  let hasVerifier = 1;
-}
-
 def XeGPU_PrefetchOp : XeGPU_Op<"prefetch", [AnchorLayoutInterface]> {
   let summary = "prefetches a set of scattered data points to cache";
 
@@ -764,11 +663,9 @@ def XeGPU_PrefetchOp : XeGPU_Op<"prefetch", [AnchorLayoutInterface]> {
     Arguments:
 
     - `source`: represents the memory region to be loaded from, which can be either a
-        tensor_desc or a 1D memref or pointer (ui64, ui32, i64 or i32).
-        In case of tensor_desc, offsets come from the producer create_tdesc op.
-        tensor_desc cannot be used at lane level.
+        1D memref or pointer (ui64, ui32, i64 or i32).
 
-    - `offsets`: represents offsets from source. required if `source` in not a TensorDescType.
+    - `offsets`: represents offsets from source.
         offsets is a vector of `index` type and vector length is either the subgroup size
         or 1 at lane level. scalar offset is also valid for lane level.
 
@@ -794,7 +691,6 @@ def XeGPU_PrefetchOp : XeGPU_Op<"prefetch", [AnchorLayoutInterface]> {
     A variant accepts memref as base pointer and an offset instead of scattered TensorTdesc.
     It combines "create scattered TensorTdesc" and "prefetch with scattered TensorTdesc".
     The source operand could be a raw pointer (ui64, ui32, i64, i32).
-    Please refer to create_tdesc for the restriction of memref.
     ```mlir
       %a = memref.alloc() : memref<1024xf32>
       %0 = arith.constant dense<[0, 16, 32, 64]> : vector<4xindex>
@@ -896,11 +792,9 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [MemoryEffects<[MemRead]>, AnchorLayou
     Arguments:
 
     - `source`: represents the memory region to be loaded from, which can be either a
-        tensor_desc or a 1D memref or pointer (ui64, ui32, i64 or i32).
-        In case of tensor_desc, offsets come from the producer create_tdesc op.
-        tensor_desc cannot be used at lane level.
+        1D memref or pointer (ui64, ui32, i64 or i32).
 
-    - `offsets`: represents offsets from source. required if `source` in not a TensorDescType.
+    - `offsets`: represents offsets from source.
         offsets is a vector of `index` type and vector length is either the subgroup size
         or 1 at lane level. scalar offset is also valid for lane level.
 
@@ -918,32 +812,9 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [MemoryEffects<[MemRead]>, AnchorLayou
     Results:
     - `res`: represents loaded data
 
-
-  Example 1 (Workgroup level):
-  ```mlir
-    %2 = xegpu.load %1, %0 <{l1_hint = #xegpu.cache_hint<cached>,
-                             l2_hint = #xegpu.cache_hint<uncached>,
-                             l3_hint = #xegpu.cache_hint<uncached>},
-                             layout = #xegpu.layout<sg_layout = [8], sg_data = [32]>>
-          : !xegpu.tensor_desc<256xf32, #xegpu.scatter_tdesc_attr<memory_space=global>>,
-            vector<256xi1> -> vector<256xf32>
-  ```
-
-  Example 2 (Subgroup level):
-  ```mlir
-    %2 = xegpu.load %1, %0 <{l1_hint = #xegpu.cache_hint<cached>,
-                             l2_hint = #xegpu.cache_hint<uncached>,
-                             l3_hint = #xegpu.cache_hint<uncached>},
-                             layout = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 8]>>
-          : !xegpu.tensor_desc<16x8xf32, #xegpu.scatter_tdesc_attr<memory_space=global, chunk_size=8>>,
-            vector<16xi1> -> vector<16x8xf32>
-  ```
-
-  Example 3 (Subgroup level):
-  A variant accepts memref as base pointer and an offset instead of scattered TensorTdesc.
-  It combines "create scattered TensorTdesc" and "load with scattered TensorTdesc".
-  The source operand could be a raw pointer (ui64, ui32, i64, i32). Please refer to create_tdesc
-  for the restriction of memref.
+  Example 1 (Subgroup level):
+  A variant accepts memref as base pointer or the source operand
+  could be a raw pointer (ui64, ui32, i64, i32).
   ```mlir
     %a = memref.alloc() : memref<1024xf32>
     %offsets = vector.step : vector<16xindex>
@@ -955,7 +826,7 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [MemoryEffects<[MemRead]>, AnchorLayou
       : memref<1024xf32>, vector<16xi1>, vector<16xindex> -> vector<16xf32>
   ```
 
-  Example 4 (lane level):
+  Example 2 (lane level):
   lane level only accepts the offsets variant. chunk_size can be inferred from result
   type. In this example, chunk_size is 8.
   ```mlir
@@ -1067,11 +938,9 @@ def XeGPU_StoreScatterOp : XeGPU_Op<"store", [MemoryEffects<[MemWrite]>, AnchorL
     - `value`: represents the data to be stored.
 
     - `dest`: represents the memory region to be stored to, which can be either a
-        tensor_desc or a 1D memref or pointer (ui64, ui32, i64 or i32).
-        In case of tensor_desc, offsets come from the producer create_tdesc op.
-        tensor_desc cannot be used at lane level.
+        1D memref or pointer (ui64, ui32, i64 or i32).
 
-    - `offsets`: represents offsets from dest. required if `source` in not a TensorDescType.
+    - `offsets`: represents offsets from dest.
         offsets is a vector of `index` type and vector length is either the subgroup size
         or 1 at lane level. scalar offset is also valid for lane level.
 
@@ -1087,29 +956,9 @@ def XeGPU_StoreScatterOp : XeGPU_Op<"store", [MemoryEffects<[MemWrite]>, AnchorL
       to be stored. Only valid at workgroup and subgroup levels.
 
 
-  Example 1 (Workgroup level):
-  ```mlir
-    xegpu.store %0, %1, %2 <{l1_hint = #xegpu.cache_hint<uncached>,
-                             l2_hint = #xegpu.cache_hint<write_back>,
-                             l3_hint = #xegpu.cache_hint<write_through>,
-                             layout = #xegpu.layout<sg_layout = [8], sg_data = [16]>}>
-          : vector<256xf32>, !xegpu.tensor_desc<256xf32, #xegpu.scattered_tdesc_attr<>>, vector<256xi1>
-  ```
-
-  Example 2 (Subgroup level):
-  ```mlir
-    xegpu.store %0, %1, %2 <{l1_hint = #xegpu.cache_hint<uncached>,
-                             l2_hint = #xegpu.cache_hint<write_back>,
-                             l3_hint = #xegpu.cache_hint<write_through>,
-                             layout = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 8]>}>
-          : vector<16x8xf32>, !xegpu.tensor_desc<16x8xf32, #xegpu.scattered_tdesc_attr<chunk_size=8>>, vector<16xi1>
-  ```
-
-  Example 3 (Subgroup level):
-  A variant accepts memref as base pointer and an offset instead of scattered TensorTdesc.
-  It combines "create scattered TensorTdesc" and "store with scattered TensorTdesc".
+  Example 1 (Subgroup level):
+  A variant accepts memref as base pointer and an offset.
   The dest operand could be a raw pointer (uint64_t).
-  Please refer to create_tdesc for the restriction of memref.
   ```mlir
     %a = memref.alloc() : memref<1024xf32>
     %val = arith.constant dense<0.0> : vector<16xf32>
@@ -1122,7 +971,7 @@ def XeGPU_StoreScatterOp : XeGPU_Op<"store", [MemoryEffects<[MemWrite]>, AnchorL
       : vector<16xf32>, memref<1024xf32>, vector<16xi1>, vector<16xindex>
   ```
 
-  Example 4 (Lane level):
+  Example 2 (Lane level):
   Lane level IR only accepts the offsets variant. chunk_size can be inferred from value
   type. In this example, chunk_size is 8.
   ```mlir
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 3aba0f5070764..29e8419173aa2 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -739,59 +739,6 @@ LogicalResult UpdateNdOffsetOp::verify() {
   return success();
 }
 
-//===----------------------------------------------------------------------===//
-// XeGPU_CreateDescOp
-//===----------------------------------------------------------------------===//
-
-void CreateDescOp::build(OpBuilder &builder, OperationState &state,
-                         TensorDescType TensorDesc, Value source,
-                         llvm::ArrayRef<OpFoldResult> offsets) {
-  auto loc = source.getLoc();
-  int64_t size = static_cast<int64_t>(offsets.size());
-  auto type = VectorType::get(size, builder.getIndexType());
-  auto values = getValueOrCreateConstantIndexOp(builder, loc, offsets);
-  auto offset = vector::FromElementsOp::create(builder, loc, type, values);
-  build(builder, state, TensorDesc, source, offset);
-}
-
-void CreateDescOp::build(OpBuilder &builder, OperationState &state,
-                         TensorDescType TensorDesc, Value source,
-                         llvm::ArrayRef<int64_t> offsets) {
-  auto ofrs = getAsIndexOpFoldResult(builder.getContext(), offsets);
-  build(builder, state, TensorDesc, source, ofrs);
-}
-
-LogicalResult CreateDescOp::verify() {
-  auto tdescTy = getTensorDescType();
-
-  if (!tdescTy.isScattered())
-    return emitOpError("Expects a scattered TensorDesc.\n");
-
-  // Memory space of created TensorDesc should match with the source.
-  // Both source and TensorDesc are considered for global memory by default,
-  // if the memory scope attr is not specified. If source is an integer,
-  // it is considered as ptr to global memory.
-  auto srcMemorySpace = getSourceMemorySpace();
-  auto tdescMemorySpace = static_cast<unsigned>(tdescTy.getMemorySpace());
-  if (srcMemorySpace != tdescMemorySpace)
-    return emitOpError("Memory space mismatch.")
-           << " Source: " << srcMemorySpace
-           << ", TensorDesc: " << tdescMemorySpace;
-
-  // check total size
-  auto chunkSize = tdescTy.getChunkSizeAsInt();
-  SmallVector<int64_t> shape(getOffsetsType().getShape());
-  if (chunkSize != 1)
-    shape.push_back(chunkSize);
-
-  auto tdescShape = getShapeOf(tdescTy);
-  if (shape != tdescShape)
-    return emitOpError("Incorrect TensorDesc shape. ")
-           << "Expected is " << makeString(shape) << "\n";
-
-  return success();
-}
-
 //===----------------------------------------------------------------------===//
 // XeGPU_PrefetchOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
index 49b66d2a8f6f6..cb269330cff46 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
@@ -160,7 +160,7 @@ XeGPUBlockingPass::getTileShape(const T &operandOrResult) const {
 
 std::optional<SmallVector<int64_t>>
 XeGPUBlockingPass::getTileShape(Operation *op) const {
-  if (isa<xegpu::CreateNdDescOp, xegpu::UpdateNdOffsetOp, xegpu::CreateDescOp,
+  if (isa<xegpu::CreateNdDescOp, xegpu::UpdateNdOffsetOp,
           xegpu::UpdateOffsetOp, xegpu::LoadMatrixOp>(op))
     return getTileShape(op->getOpResult(0));
   if (isa<xegpu::PrefetchNdOp, xegpu::LoadNdOp, xegpu::PrefetchOp,
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
index bc309c9029878..dafeb04157439 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
@@ -348,10 +348,6 @@ class LayoutInfoPropagation
                             ArrayRef<LayoutInfoLattice *> operands,
                             ArrayRef<const LayoutInfoLattice *> results);
 
-  void visitCreateDescOp(xegpu::CreateDescOp createDesc,
-                         ArrayRef<LayoutInfoLattice *> operands,
-                         ArrayRef<const LayoutInfoLattice *> results);
-
   void visitUpdateNdOffsetOp(xegpu::UpdateNdOffsetOp updateNdOffset,
                              ArrayRef<LayoutInfoLattice *> operands,
                              ArrayRef<const LayoutInfoLattice *> results);
@@ -442,9 +438,6 @@ LogicalResult LayoutInfoPropagation::visitOperation(
       .Case([&](xegpu::LoadGatherOp loadGatherOp) {
         visitLoadGatherOp(loadGatherOp, operands, results);
       })
-      .Case([&](xegpu::CreateDescOp createDescOp) {
-        visitCreateDescOp(createDescOp, operands, results);
-      })
       .Case([&](xegpu::UpdateNdOffsetOp updateNdOffsetOp) {
         visitUpdateNdOffsetOp(updateNdOffsetOp, operands, results);
       })
@@ -1027,22 +1020,6 @@ void LayoutInfoPropagation::visitLoadGatherOp(
     propagateIfChanged(operands[2], operands[2]->meet(maskLayoutInfo));
 }
 
-/// Propagate the layout of the descriptor to the vector offset operand in
-/// CreateDescOp.
-void LayoutInfoPropagation::visitCreateDescOp(
-    xegpu::CreateDescOp createDesc, ArrayRef<LayoutInfoLattice *> operands,
-    ArrayRef<const LayoutInfoLattice *> results) {
-  LayoutInfo descLayout = results[0]->getValue();
-  // Need the layout of the descriptor to propagate to the operands.
-  if (!descLayout.isAssigned())
-    return;
-  auto uArch = getUArch(getChipStr(createDesc).value_or(""));
-  // For offset operand propagate 1D default layout.
-  LayoutInfo layout = getDefaultSIMTLayoutInfo(createDesc->getContext(), 1,
-                                               uArch->getSubgroupSize());
-  propagateIfChanged(operands[1], operands[1]->meet(layout));
-}
-
 /// Set the layout for the value, tensor descriptor, offset and mask operands in
 /// the StoreScatterOp.
 void LayoutInfoPropagation::visitStoreScatterOp(
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp
index 2b1bd4d73a576..d49d797581a5e 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp
@@ -477,74 +477,6 @@ struct UnrollDpasOp : public UnrollPattern<xegpu::DpasOp> {
   }
 };
 
-struct UnrollCreateDescOp : public UnrollPattern<xegpu::CreateDescOp> {
-  using UnrollPattern<xegpu::CreateDescOp>::UnrollPattern;
-  LogicalResult matchAndRewrite(xegpu::CreateDescOp op,
-                                PatternRewriter &rewriter) const override {
-    Location loc = op.getLoc();
-    xegpu::TensorDescType tdescTy = op.getType();
-    TypedValue<::mlir::VectorType> indiceVec = op.getOffsets();
-    VectorType indiceVecTy = indiceVec.getType();
-
-    if (!tdescTy.isScattered())
-      return failure();
-
-    std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
-    if (!targetShape)
-      return failure();
-
-    SmallVector<int64_t> targetIndiceShape(*targetShape);
-    int64_t originalChunkSize = tdescTy.getChunkSizeAsInt();
-    // IndiceVec is 1 dim lower than tdescTy when chunkSize is larger than 1.
-    if (originalChunkSize > 1)
-      targetIndiceShape.pop_back();
-
-    auto newTdescTy = getUnrolledTypes(tdescTy, *targetShape)[0];
-    SmallVector<Type> convertedIndiceTypes =
-        getUnrolledTypes(indiceVecTy, targetIndiceShape);
-    SmallVector<Value> convertedIndiceVec =
-        pack(indiceVec, convertedIndiceTypes, targetIndiceShape, loc, rewriter);
-
-    SmallVector<Value> newOps;
-
-    // More indices is need when chunkSize > 1. Since a big load from one
-    // address could be break into multiple small loads.
-    if (originalChunkSize > 1) {
-      int64_t blockedChunkSize = targetShape->back();
-      int64_t numNewChunks = originalChunkSize / blockedChunkSize;
-
-      for (auto [indice, indiceType] :
-           llvm::zip(convertedIndiceVec, convertedIndiceTypes)) {
-        for (int64_t i = 0; i < numNewChunks; ++i) {
-          // Compute the offset
-          Value inc = arith::ConstantIndexOp::create(rewriter, loc,
-                                                     i * blockedChunkSize);
-          Value incVec =
-              vector::BroadcastOp::create(rewriter, loc, indiceType, inc);
-          Value offsetIndice =
-              arith::AddIOp::create(rewriter, loc, indice, incVec);
-
-          auto newOp = xegpu::CreateDescOp::create(
-              rewriter, loc, newTdescTy, op.getSource(), offsetIndice);
-
-          newOps.push_back(newOp);
-        }
-      }
-    } else {
-      for (auto indice : convertedIndiceVec) {
-        auto newOp = xegpu::CreateDescOp::create(rewriter, loc, newTdescTy,
-                                                 op.getSource(), indice);
-      ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/182804