[Mlir-commits] [mlir] [MLIR][XeGPU] make offsets optional for create_nd_tdesc (PR #148335)

Jianhui Li llvmlistbot at llvm.org
Thu Jul 17 12:07:02 PDT 2025


https://github.com/Jianhui-Li updated https://github.com/llvm/llvm-project/pull/148335

>From b9a6d984765445fd17f257f936fe61a1cc94dab1 Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Tue, 1 Jul 2025 23:00:42 +0000
Subject: [PATCH 01/23] init code

---
 .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td |  41 ++-
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp        | 261 +++++++++++++++++-
 mlir/test/Dialect/XeGPU/ops.mlir              |  12 +-
 .../Dialect/XeGPU/subgroup-distribute.mlir    |   8 +-
 4 files changed, 306 insertions(+), 16 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index daab65ec893b8..018c187f642d6 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -110,23 +110,36 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
     Variadic<Index>: $offsets,
     Variadic<Index>: $shape,
     Variadic<Index>: $strides,
-    DenseI64ArrayAttr: $const_offsets,
+    OptionalAttr<DenseI64ArrayAttr>: $const_offsets,
     OptionalAttr<DenseI64ArrayAttr>: $const_shape,
     OptionalAttr<DenseI64ArrayAttr>: $const_strides
   );
   let results = (outs XeGPU_TensorDesc: $TensorDesc);
 
-  let assemblyFormat = [{
-    $source ``
-    custom<DynamicIndexList>($offsets, $const_offsets)
-    (`,` custom<DynamicIndexList>($shape, $const_shape)^
-     `,` custom<DynamicIndexList>($strides, $const_strides))?
-    attr-dict `:` type($source) `->` qualified(type($TensorDesc))
-  }];
+
+//  let assemblyFormat = [{
+//    $source
+//    (custom<DynamicIndexList>($offsets, $const_offsets)^)?
+//    (`base_shape` `:` custom<DynamicIndexList>($shape, $const_shape)^
+//     `base_strides` `:` custom<DynamicIndexList>($strides, $const_strides))?
+//    attr-dict `:` type($source) `->` qualified(type($TensorDesc))
+//  }];
 
   let hasVerifier = 1;
 
+  let hasCustomAssemblyFormat = 1;
+
   let builders = [
+    OpBuilder<(ins "Type": $tdesc, "TypedValue<MemRefType>": $source)>,
+
+    OpBuilder<(ins "Type": $tdesc, "TypedValue<MemRefType> ": $source,
+                   "llvm::ArrayRef<OpFoldResult>": $shape,
+                   "llvm::ArrayRef<OpFoldResult>": $strides)>,
+
+    OpBuilder<(ins "Type": $tdesc, "TypedValue<IntegerType> ": $source,
+                   "llvm::ArrayRef<OpFoldResult>": $shape,
+                   "llvm::ArrayRef<OpFoldResult>": $strides)>,
+
     OpBuilder<(ins "Type": $tdesc, "TypedValue<MemRefType>": $source,
                    "llvm::ArrayRef<OpFoldResult>": $offsets)>,
 
@@ -163,9 +176,19 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
     }
 
     ArrayRef<int64_t> getStaticOffsets(){
-      return getConstOffsets();
+      auto attr = getConstOffsetsAttr();
+      if (llvm::isa<IntegerType>(getSourceType()) || attr)
+        return attr;
+
+      // The offsets are allowed to be empty. The Traits verification of OffsetSizeAndStrideOpInterface interface assumes offsets being present. So it is set to be MAX to indicate user not passed any value (kDynamic means offsets passed as variable).
+      setConstOffsets(llvm::SmallVector<int64_t, 4>(getTensorDescShape().size(), std::numeric_limits<int64_t>::max()));
+      // setConstOffsets(llvm::SmallVector<int64_t, 4>(getTensorDescShape().size(), 0));
+      //setConstOffsets(llvm::SmallVector<int64_t, 4>(getTensorDescShape().size(), mlir::ShapedType::kDynamic));
+      attr = getConstOffsetsAttr();
+      return attr;
     }
 
+
     /// wrapper for matching with OffsetSizeAndStrideOpInterface
     /// If source is IntegerType or `const_shape` is filled,
     /// it will return `const_shape`, such that mixes of `shape`
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 2793c7a35bc97..13ef77bb4f970 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -122,7 +122,8 @@ void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
   build(builder, state, tdesc, source, dynamicOffsets /* dynamic offsets */,
         ValueRange({}) /* empty dynamic shape */,
         ValueRange({}) /* empty dynamic strides */,
-        staticOffsets /* const offsets */, {} /* empty const shape*/,
+        builder.getDenseI64ArrayAttr(staticOffsets)  /* const offsets */, 
+        {} /* empty const shape*/,
         {} /* empty const strides*/);
 }
 
@@ -220,6 +221,263 @@ LogicalResult CreateNdDescOp::verify() {
   return success();
 }
 
+
+ParseResult parseOptionalDynamicIndexList(
+    OpAsmParser &parser,
+    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &values,
+    DenseI64ArrayAttr &integers, DenseBoolArrayAttr &scalableFlags,
+    SmallVectorImpl<Type> *valueTypes = nullptr,
+    AsmParser::Delimiter delimiter = AsmParser::Delimiter::Square);
+inline ParseResult parseOptionalDynamicIndexList(
+    OpAsmParser &parser,
+    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &values,
+    DenseI64ArrayAttr &integers, SmallVectorImpl<Type> *valueTypes = nullptr,
+    AsmParser::Delimiter delimiter = AsmParser::Delimiter::Square) {
+  DenseBoolArrayAttr scalableFlags;
+  return parseOptionalDynamicIndexList(parser, values, integers, scalableFlags,
+                               valueTypes, delimiter);
+}
+
+ParseResult parseOptionalDynamicIndexList(
+    OpAsmParser &parser,
+    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &values,
+    DenseI64ArrayAttr &integers, DenseBoolArrayAttr &scalableFlags,
+    SmallVectorImpl<Type> *valueTypes, AsmParser::Delimiter delimiter) {
+
+  SmallVector<int64_t, 4> integerVals;
+  SmallVector<bool, 4> scalableVals;
+  auto parseIntegerOrValue = [&]() {
+    OpAsmParser::UnresolvedOperand operand;
+    auto res = parser.parseOptionalOperand(operand);
+
+    // When encountering `[`, assume that this is a scalable index.
+    scalableVals.push_back(parser.parseOptionalLSquare().succeeded());
+
+    if (res.has_value() && succeeded(res.value())) {
+      values.push_back(operand);
+      integerVals.push_back(ShapedType::kDynamic);
+      if (valueTypes && parser.parseColonType(valueTypes->emplace_back()))
+        return failure();
+    } else {
+      int64_t integer;
+      if (failed(parser.parseInteger(integer)))
+        return failure();
+      integerVals.push_back(integer);
+    }
+
+    // If this is assumed to be a scalable index, verify that there's a closing
+    // `]`.
+    if (scalableVals.back() && parser.parseOptionalRSquare().failed())
+      return failure();
+    return success();
+  };
+  if (parser.parseOptionalLSquare().succeeded()) {
+    if ( parser.parseCommaSeparatedList(parseIntegerOrValue) || parser.parseRSquare() )
+      return parser.emitError(parser.getNameLoc())
+            << "expected SSA value or integer";
+    integers = parser.getBuilder().getDenseI64ArrayAttr(integerVals);
+    scalableFlags = parser.getBuilder().getDenseBoolArrayAttr(scalableVals);
+    return success();
+  }
+  return success();
+}
+
+
+::mlir::ParseResult CreateNdDescOp::parse(::mlir::OpAsmParser &parser, ::mlir::OperationState &result) {
+  ::mlir::OpAsmParser::UnresolvedOperand sourceRawOperand{};
+  ::llvm::ArrayRef<::mlir::OpAsmParser::UnresolvedOperand> sourceOperands(&sourceRawOperand, 1);  ::llvm::SMLoc sourceOperandsLoc;
+  (void)sourceOperandsLoc;
+  ::llvm::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4> offsetsOperands;
+  ::llvm::SMLoc offsetsOperandsLoc;
+  (void)offsetsOperandsLoc;
+  ::mlir::DenseI64ArrayAttr const_offsetsAttr;
+  ::llvm::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4> shapeOperands;
+  ::llvm::SMLoc shapeOperandsLoc;
+  (void)shapeOperandsLoc;
+  ::mlir::DenseI64ArrayAttr const_shapeAttr;
+  ::llvm::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4> stridesOperands;
+  ::llvm::SMLoc stridesOperandsLoc;
+  (void)stridesOperandsLoc;
+  ::mlir::DenseI64ArrayAttr const_stridesAttr;
+  ::mlir::Type sourceRawType{};
+  ::llvm::ArrayRef<::mlir::Type> sourceTypes(&sourceRawType, 1);
+  ::mlir::Type TensorDescRawType{};
+  ::llvm::ArrayRef<::mlir::Type> TensorDescTypes(&TensorDescRawType, 1);
+
+  sourceOperandsLoc = parser.getCurrentLocation();
+  if (parser.parseOperand(sourceRawOperand))
+    return ::mlir::failure();
+
+      auto optionalOffsetResult = [&]() -> ::mlir::OptionalParseResult {
+      {
+        // skip the "offsets :" at the begining if it exists 
+        if (::mlir::succeeded(parser.parseOptionalKeyword("offsets"))) {
+          if (parser.parseColon())
+         return ::mlir::failure();
+        }
+        offsetsOperandsLoc = parser.getCurrentLocation();
+        auto odsResult = parseOptionalDynamicIndexList(parser, offsetsOperands, const_offsetsAttr);
+        // Debug print for offsets parsing using LLVM_DEBUG
+        LLVM_DEBUG(llvm::dbgs() << "parseOptionalDynamicIndexList returned: " << (odsResult ? "failure" : "success") << "\n");
+        LLVM_DEBUG(llvm::dbgs() << "offsetsOperands size: " << offsetsOperands.size() << "\n");
+        if (const_offsetsAttr)
+          LLVM_DEBUG(llvm::dbgs() << "const_offsetsAttr: " << const_offsetsAttr << "\n");
+        if (const_offsetsAttr)
+          result.getOrAddProperties<CreateNdDescOp::Properties>().const_offsets = const_offsetsAttr;
+      }
+        return ::mlir::success();
+      }();
+
+      if (optionalOffsetResult.has_value() && ::mlir::failed(*optionalOffsetResult)) {
+        LLVM_DEBUG(llvm::dbgs() << "optionalOffsetResult failed\n");
+        return ::mlir::failure();
+      } 
+
+  if (::mlir::succeeded(parser.parseOptionalKeyword("shape"))) {
+    LLVM_DEBUG(llvm::dbgs() << "Parsing 'shape' keyword\n");
+    if (parser.parseColon())
+      return ::mlir::failure();
+    {
+      shapeOperandsLoc = parser.getCurrentLocation();
+      auto odsResult = parseDynamicIndexList(parser, shapeOperands, const_shapeAttr);
+      LLVM_DEBUG(llvm::dbgs() << "parseDynamicIndexList for shape returned: " << (odsResult ? "failure" : "success") << "\n");
+      LLVM_DEBUG(llvm::dbgs() << "shapeOperands size: " << shapeOperands.size() << "\n");
+      if (const_shapeAttr)
+        LLVM_DEBUG(llvm::dbgs() << "const_shapeAttr: " << const_shapeAttr << "\n");
+      if (odsResult) return ::mlir::failure();
+      if (const_shapeAttr)
+        result.getOrAddProperties<CreateNdDescOp::Properties>().const_shape = const_shapeAttr;
+    }
+
+    if (parser.parseKeyword("strides"))
+      return ::mlir::failure();
+    if (parser.parseColon())
+      return ::mlir::failure();
+    {
+      stridesOperandsLoc = parser.getCurrentLocation();
+      auto odsResult = parseDynamicIndexList(parser, stridesOperands, const_stridesAttr);
+      LLVM_DEBUG(llvm::dbgs() << "parseDynamicIndexList for strides returned: " << (odsResult ? "failure" : "success") << "\n");
+      LLVM_DEBUG(llvm::dbgs() << "stridesOperands size: " << stridesOperands.size() << "\n");
+      if (const_stridesAttr)
+        LLVM_DEBUG(llvm::dbgs() << "const_stridesAttr: " << const_stridesAttr << "\n");
+      if (odsResult) return ::mlir::failure();
+      if (const_stridesAttr)
+        result.getOrAddProperties<CreateNdDescOp::Properties>().const_strides = const_stridesAttr;
+    }
+  }
+  {
+    auto loc = parser.getCurrentLocation();(void)loc;
+    if (parser.parseOptionalAttrDict(result.attributes))
+      return ::mlir::failure();
+    if (failed(verifyInherentAttrs(result.name, result.attributes, [&]() {
+        return parser.emitError(loc) << "'" << result.name.getStringRef() << "' op ";
+      })))
+      return ::mlir::failure();
+  }
+  if (parser.parseColon())
+    return ::mlir::failure();
+
+  {
+    ::mlir::Type type;
+    if (parser.parseCustomTypeWithFallback(type))
+      return ::mlir::failure();
+    sourceRawType = type;
+  }
+  if (parser.parseArrow())
+    return ::mlir::failure();
+
+  if (parser.parseType(TensorDescRawType))
+    return ::mlir::failure();
+
+  ::llvm::copy(::llvm::ArrayRef<int32_t>({1, static_cast<int32_t>(offsetsOperands.size()), static_cast<int32_t>(shapeOperands.size()), static_cast<int32_t>(stridesOperands.size())}), result.getOrAddProperties<CreateNdDescOp::Properties>().operandSegmentSizes.begin());
+
+  ::mlir::Type odsBuildableType0 = parser.getBuilder().getIndexType();
+  result.addTypes(TensorDescTypes);
+
+  if (parser.resolveOperands(sourceOperands, sourceTypes, sourceOperandsLoc, result.operands))
+    return ::mlir::failure();
+
+  if (parser.resolveOperands(offsetsOperands, odsBuildableType0, offsetsOperandsLoc, result.operands))
+    return ::mlir::failure();
+
+  if (parser.resolveOperands(shapeOperands, odsBuildableType0, shapeOperandsLoc, result.operands))
+    return ::mlir::failure();
+
+  if (parser.resolveOperands(stridesOperands, odsBuildableType0, stridesOperandsLoc, result.operands))
+    return ::mlir::failure();
+  return ::mlir::success();
+}
+
+void CreateNdDescOp::print(::mlir::OpAsmPrinter &_odsPrinter) {
+  _odsPrinter << ' ';
+  _odsPrinter << getSource();
+  // Print offsets if getConstOffsetsAttr() exists, is not empty, and its first value is not int64_t::max.
+  auto constOffsetsAttr = getConstOffsetsAttr();
+  bool printOffsets = false;
+  if (constOffsetsAttr && constOffsetsAttr.size() > 0) {
+    auto firstVal = constOffsetsAttr.asArrayRef()[0];
+    if (firstVal != std::numeric_limits<int64_t>::max()) {
+      printOffsets = true;
+    }
+  }
+  if (printOffsets) {
+
+    printDynamicIndexList(_odsPrinter, *this, getOffsets(), getConstOffsetsAttr());
+  }
+  if (((!getShape().empty()) || (getConstShapeAttr()))) {
+    _odsPrinter << ' ' << "shape";
+    _odsPrinter << ' ' << ":";
+    _odsPrinter << ' ';
+    printDynamicIndexList(_odsPrinter, *this, getShape(), getConstShapeAttr());
+    _odsPrinter << ' ' << "strides";
+    _odsPrinter << ' ' << ":";
+    _odsPrinter << ' ';
+    printDynamicIndexList(_odsPrinter, *this, getStrides(), getConstStridesAttr());
+  }
+  ::llvm::SmallVector<::llvm::StringRef, 2> elidedAttrs;
+  elidedAttrs.push_back("operandSegmentSizes");
+  elidedAttrs.push_back("const_offsets");
+  elidedAttrs.push_back("const_shape");
+  elidedAttrs.push_back("const_strides");
+  _odsPrinter.printOptionalAttrDict((*this)->getAttrs(), elidedAttrs);
+  _odsPrinter << ' ' << ":";
+  _odsPrinter << ' ';
+  {
+    auto type = getSource().getType();
+    if (auto validType = ::llvm::dyn_cast<::mlir::Type>(type))
+      _odsPrinter.printStrippedAttrOrType(validType);
+   else
+     _odsPrinter << type;
+  }
+  _odsPrinter << ' ' << "->";
+  _odsPrinter << ' ';
+  // _odsPrinter << getTensorDesc().getType();
+
+
+  _odsPrinter << "!xegpu.tensor_desc<";
+
+  auto tDesc = getTensorDesc().getType(); 
+  auto shape = tDesc.getShape();
+  for (int64_t dim : shape) {
+    if (mlir::ShapedType::isDynamic(dim))
+      _odsPrinter << '?';
+    else
+      _odsPrinter << dim;
+    _odsPrinter << 'x';
+  }
+
+  _odsPrinter << tDesc.getElementType();
+
+  if (auto encoding = tDesc.getEncoding())
+    _odsPrinter << ", " << encoding;
+
+  if (auto layout = tDesc.getLayout())
+    _odsPrinter << ", " << layout;
+
+  _odsPrinter << ">";
+
+}
+
 //===----------------------------------------------------------------------===//
 // XeGPU_PrefetchNdOp
 //===----------------------------------------------------------------------===//
@@ -635,6 +893,7 @@ LogicalResult ConvertLayoutOp::verify() {
   return mlir::success();
 }
 
+
 } // namespace xegpu
 } // namespace mlir
 
diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir
index aff8f63adc05b..e8836b7cffbc7 100644
--- a/mlir/test/Dialect/XeGPU/ops.mlir
+++ b/mlir/test/Dialect/XeGPU/ops.mlir
@@ -17,8 +17,8 @@ gpu.func @create_nd_tdesc_1(%src: memref<24x32xf32>) {
 gpu.func @create_nd_tdesc_2(%src: ui64, %w : index, %h : index, %x : index, %y : index) {
   //CHECK: %[[C:.*]] = arith.constant 1 : index
   %c1 = arith.constant 1 : index
-  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][%[[arg3]], %[[arg4]]], [%[[arg2]], %[[arg1]]], [%[[arg1]], %[[C]]] : ui64 -> !xegpu.tensor_desc<8x16xf32>
-  %1 = xegpu.create_nd_tdesc %src[%x, %y], [%h, %w], [%w, %c1] : ui64 -> !xegpu.tensor_desc<8x16xf32>
+  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][%[[arg3]],  %[[arg4]]] shape : [%[[arg2]], %[[arg1]]] strides : [%[[arg1]], %[[C]]] : ui64 -> !xegpu.tensor_desc<8x16xf32>
+  %1 = xegpu.create_nd_tdesc %src[%x, %y] shape:[%h, %w] strides: [%w, %c1] : ui64 -> !xegpu.tensor_desc<8x16xf32>
   gpu.return
 }
 
@@ -54,6 +54,14 @@ gpu.func @create_nd_tdesc_6(%src: memref<24x32xf32>) {
   gpu.return
 }
 
+// CHECK: gpu.func @test_create_nd_tdesc_7(%[[arg0:.*]]: ui64, %[[arg1:.*]]: index, %[[arg2:.*]]: index, %[[arg3:.*]]: index, %[[arg4:.*]]: index) {
+gpu.func @test_create_nd_tdesc_7(%src: ui64, %w : index, %h : index, %x : index, %y : index) {
+  //CHECK: %[[C:.*]] = arith.constant 1 : index
+  %c1 = arith.constant 1 : index
+  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][%[[arg3]], %[[arg4]]] shape : [%[[arg2]], %[[arg1]]] strides : [%[[arg1]], %[[C]]]  : ui64 -> !xegpu.tensor_desc<8x16xf32>
+  %1 = xegpu.create_nd_tdesc %src offsets : [%x, %y] shape : [%h, %w] strides : [%w, %c1]  : ui64 -> !xegpu.tensor_desc<8x16xf32>
+  gpu.return
+}
 
 // CHECK: gpu.func @prefetch_nd(%[[arg0:.*]]: memref<24x32xf16>) {
 gpu.func @prefetch_nd(%src: memref<24x32xf16>) {
diff --git a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
index 3d91b2269bc4b..ba29d1ab13cae 100644
--- a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
+++ b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
@@ -150,16 +150,16 @@ gpu.module @test {
 // CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: ui64, %[[ARG1:[0-9a-zA-Z]+]]: ui64, %[[ARG2:[0-9a-zA-Z]+]]: index,
 // CHECK-SAME: %[[ARG3:[0-9a-zA-Z]+]]: index, %[[ARG4:[0-9a-zA-Z]+]]: index,
 // CHECK-SAME: %[[ARG5:[0-9a-zA-Z]+]]: index, %[[ARG6:[0-9a-zA-Z]+]]: index, %[[ARG7:[0-9a-zA-Z]+]]: index) {
-// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]][{{.*}}], [%[[ARG2]], %[[ARG3]]], [%[[ARG4]], %[[ARG5]]] : ui64 -> !xegpu.tensor_desc<16x16xf16>
+// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]][{{.*}}] shape : [%[[ARG2]], %[[ARG3]]] strides : [%[[ARG4]], %[[ARG5]]] : ui64 -> !xegpu.tensor_desc<16x16xf16>
 // CHECK: %[[T1:.*]] = xegpu.load_nd %[[T0]]  : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16>
-// CHECK: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG1]][{{.*}}], [%[[ARG2]], %[[ARG3]]], [%[[ARG4]], %[[ARG5]]] : ui64 -> !xegpu.tensor_desc<16x16xf16>
+// CHECK: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG1]][{{.*}}] shape : [%[[ARG2]], %[[ARG3]]] strides : [%[[ARG4]], %[[ARG5]]] : ui64 -> !xegpu.tensor_desc<16x16xf16>
 // CHECK: xegpu.store_nd %[[T1]], %[[T2]]  : vector<16xf16>, !xegpu.tensor_desc<16x16xf16>
 gpu.module @test {
   gpu.func @create_nd_tdesc_non_memref(%arg0: ui64, %arg1: ui64, %arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index, %arg7: index) {
     %c0 = arith.constant 0 : index
-    %0 = xegpu.create_nd_tdesc %arg0[%c0, %c0], [%arg2, %arg3], [%arg4, %arg5] : ui64 -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+    %0 = xegpu.create_nd_tdesc %arg0[%c0, %c0] shape:[%arg2, %arg3] strides:[%arg4, %arg5] : ui64 -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
     %1 = xegpu.load_nd %0  {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<16x16xf16>
-    %2 = xegpu.create_nd_tdesc %arg1[%c0, %c0], [%arg2, %arg3], [%arg4, %arg5] : ui64 -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+    %2 = xegpu.create_nd_tdesc %arg1[%c0, %c0] shape:[%arg2, %arg3] strides:[%arg4, %arg5] : ui64 -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
     xegpu.store_nd %1, %2 : vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
     gpu.return
   }

>From 2465050985b4ad0b1073a8cb36b0a462d542d3ae Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Wed, 2 Jul 2025 02:08:16 +0000
Subject: [PATCH 02/23] add tests

---
 .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td |  2 +-
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp        | 98 ++++++++-----------
 mlir/test/Dialect/XeGPU/ops.mlir              | 14 ++-
 3 files changed, 52 insertions(+), 62 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 018c187f642d6..2cbae19ff2c05 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -182,8 +182,8 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
 
       // The offsets are allowed to be empty. The Traits verification of OffsetSizeAndStrideOpInterface interface assumes offsets being present. So it is set to be MAX to indicate user not passed any value (kDynamic means offsets passed as variable).
       setConstOffsets(llvm::SmallVector<int64_t, 4>(getTensorDescShape().size(), std::numeric_limits<int64_t>::max()));
-      // setConstOffsets(llvm::SmallVector<int64_t, 4>(getTensorDescShape().size(), 0));
       //setConstOffsets(llvm::SmallVector<int64_t, 4>(getTensorDescShape().size(), mlir::ShapedType::kDynamic));
+
       attr = getConstOffsetsAttr();
       return attr;
     }
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 13ef77bb4f970..cab4ca8a73898 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -136,8 +136,8 @@ void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
          shape.size() == strides.size() && shape.size() == offsets.size());
 
   Type srcTy = source.getType();
-  assert(isa<IntegerType>(srcTy) ||
-         isa<MemRefType>(srcTy) && "Source has to be either int or memref.");
+  assert((isa<IntegerType>(srcTy) ||
+         isa<MemRefType>(srcTy)) && "Source has to be either int or memref.");
 
   llvm::SmallVector<Value> dynamicOffsets;
   llvm::SmallVector<Value> dynamicShape;
@@ -222,27 +222,30 @@ LogicalResult CreateNdDescOp::verify() {
 }
 
 
+//ParseResult parseOptionalDynamicIndexList(
+//    OpAsmParser &parser,
+//    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &values,
+//    DenseI64ArrayAttr &integers, DenseBoolArrayAttr &scalableFlags,
+//    SmallVectorImpl<Type> *valueTypes = nullptr,
+//    AsmParser::Delimiter delimiter = AsmParser::Delimiter::Square);
+//inline ParseResult parseOptionalDynamicIndexList(
+//    OpAsmParser &parser,
+//    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &values,
+//    DenseI64ArrayAttr &integers, SmallVectorImpl<Type> *valueTypes = nullptr,
+//    AsmParser::Delimiter delimiter = AsmParser::Delimiter::Square) {
+//  DenseBoolArrayAttr scalableFlags;
+//  return parseOptionalDynamicIndexList(parser, values, integers, scalableFlags,
+//                               valueTypes, delimiter);
+//}
+
+
+
 ParseResult parseOptionalDynamicIndexList(
     OpAsmParser &parser,
     SmallVectorImpl<OpAsmParser::UnresolvedOperand> &values,
     DenseI64ArrayAttr &integers, DenseBoolArrayAttr &scalableFlags,
     SmallVectorImpl<Type> *valueTypes = nullptr,
-    AsmParser::Delimiter delimiter = AsmParser::Delimiter::Square);
-inline ParseResult parseOptionalDynamicIndexList(
-    OpAsmParser &parser,
-    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &values,
-    DenseI64ArrayAttr &integers, SmallVectorImpl<Type> *valueTypes = nullptr,
     AsmParser::Delimiter delimiter = AsmParser::Delimiter::Square) {
-  DenseBoolArrayAttr scalableFlags;
-  return parseOptionalDynamicIndexList(parser, values, integers, scalableFlags,
-                               valueTypes, delimiter);
-}
-
-ParseResult parseOptionalDynamicIndexList(
-    OpAsmParser &parser,
-    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &values,
-    DenseI64ArrayAttr &integers, DenseBoolArrayAttr &scalableFlags,
-    SmallVectorImpl<Type> *valueTypes, AsmParser::Delimiter delimiter) {
 
   SmallVector<int64_t, 4> integerVals;
   SmallVector<bool, 4> scalableVals;
@@ -286,18 +289,15 @@ ParseResult parseOptionalDynamicIndexList(
 ::mlir::ParseResult CreateNdDescOp::parse(::mlir::OpAsmParser &parser, ::mlir::OperationState &result) {
   ::mlir::OpAsmParser::UnresolvedOperand sourceRawOperand{};
   ::llvm::ArrayRef<::mlir::OpAsmParser::UnresolvedOperand> sourceOperands(&sourceRawOperand, 1);  ::llvm::SMLoc sourceOperandsLoc;
-  (void)sourceOperandsLoc;
+
   ::llvm::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4> offsetsOperands;
   ::llvm::SMLoc offsetsOperandsLoc;
-  (void)offsetsOperandsLoc;
   ::mlir::DenseI64ArrayAttr const_offsetsAttr;
   ::llvm::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4> shapeOperands;
   ::llvm::SMLoc shapeOperandsLoc;
-  (void)shapeOperandsLoc;
   ::mlir::DenseI64ArrayAttr const_shapeAttr;
   ::llvm::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4> stridesOperands;
   ::llvm::SMLoc stridesOperandsLoc;
-  (void)stridesOperandsLoc;
   ::mlir::DenseI64ArrayAttr const_stridesAttr;
   ::mlir::Type sourceRawType{};
   ::llvm::ArrayRef<::mlir::Type> sourceTypes(&sourceRawType, 1);
@@ -308,45 +308,32 @@ ::mlir::ParseResult CreateNdDescOp::parse(::mlir::OpAsmParser &parser, ::mlir::O
   if (parser.parseOperand(sourceRawOperand))
     return ::mlir::failure();
 
-      auto optionalOffsetResult = [&]() -> ::mlir::OptionalParseResult {
-      {
-        // skip the "offsets :" at the begining if it exists 
-        if (::mlir::succeeded(parser.parseOptionalKeyword("offsets"))) {
-          if (parser.parseColon())
-         return ::mlir::failure();
-        }
-        offsetsOperandsLoc = parser.getCurrentLocation();
-        auto odsResult = parseOptionalDynamicIndexList(parser, offsetsOperands, const_offsetsAttr);
-        // Debug print for offsets parsing using LLVM_DEBUG
-        LLVM_DEBUG(llvm::dbgs() << "parseOptionalDynamicIndexList returned: " << (odsResult ? "failure" : "success") << "\n");
-        LLVM_DEBUG(llvm::dbgs() << "offsetsOperands size: " << offsetsOperands.size() << "\n");
-        if (const_offsetsAttr)
-          LLVM_DEBUG(llvm::dbgs() << "const_offsetsAttr: " << const_offsetsAttr << "\n");
-        if (const_offsetsAttr)
-          result.getOrAddProperties<CreateNdDescOp::Properties>().const_offsets = const_offsetsAttr;
-      }
-        return ::mlir::success();
-      }();
 
-      if (optionalOffsetResult.has_value() && ::mlir::failed(*optionalOffsetResult)) {
-        LLVM_DEBUG(llvm::dbgs() << "optionalOffsetResult failed\n");
-        return ::mlir::failure();
-      } 
+    // skip the "offsets :" at the begining if it exists 
+    //if (::mlir::succeeded(parser.parseOptionalKeyword("offsets"))) {
+    //  if (parser.parseColon())
+    // return ::mlir::failure();
+    //}
+    offsetsOperandsLoc = parser.getCurrentLocation();
+
+    DenseBoolArrayAttr scalableFlags;
+    auto odsResult = parseOptionalDynamicIndexList(parser, offsetsOperands, const_offsetsAttr, scalableFlags);
+
+    if (const_offsetsAttr) {
+      if (odsResult) return ::mlir::failure();
+      result.getOrAddProperties<CreateNdDescOp::Properties>().const_offsets = const_offsetsAttr;
+    } 
 
   if (::mlir::succeeded(parser.parseOptionalKeyword("shape"))) {
-    LLVM_DEBUG(llvm::dbgs() << "Parsing 'shape' keyword\n");
     if (parser.parseColon())
       return ::mlir::failure();
     {
       shapeOperandsLoc = parser.getCurrentLocation();
       auto odsResult = parseDynamicIndexList(parser, shapeOperands, const_shapeAttr);
-      LLVM_DEBUG(llvm::dbgs() << "parseDynamicIndexList for shape returned: " << (odsResult ? "failure" : "success") << "\n");
-      LLVM_DEBUG(llvm::dbgs() << "shapeOperands size: " << shapeOperands.size() << "\n");
-      if (const_shapeAttr)
-        LLVM_DEBUG(llvm::dbgs() << "const_shapeAttr: " << const_shapeAttr << "\n");
-      if (odsResult) return ::mlir::failure();
-      if (const_shapeAttr)
+      if (const_shapeAttr) {
+        if (odsResult) return ::mlir::failure();
         result.getOrAddProperties<CreateNdDescOp::Properties>().const_shape = const_shapeAttr;
+      }
     }
 
     if (parser.parseKeyword("strides"))
@@ -356,13 +343,10 @@ ::mlir::ParseResult CreateNdDescOp::parse(::mlir::OpAsmParser &parser, ::mlir::O
     {
       stridesOperandsLoc = parser.getCurrentLocation();
       auto odsResult = parseDynamicIndexList(parser, stridesOperands, const_stridesAttr);
-      LLVM_DEBUG(llvm::dbgs() << "parseDynamicIndexList for strides returned: " << (odsResult ? "failure" : "success") << "\n");
-      LLVM_DEBUG(llvm::dbgs() << "stridesOperands size: " << stridesOperands.size() << "\n");
-      if (const_stridesAttr)
-        LLVM_DEBUG(llvm::dbgs() << "const_stridesAttr: " << const_stridesAttr << "\n");
-      if (odsResult) return ::mlir::failure();
-      if (const_stridesAttr)
+      if (const_stridesAttr) {
+        if (odsResult) return ::mlir::failure();
         result.getOrAddProperties<CreateNdDescOp::Properties>().const_strides = const_stridesAttr;
+      }
     }
   }
   {
diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir
index e8836b7cffbc7..d5a01e4c66b5e 100644
--- a/mlir/test/Dialect/XeGPU/ops.mlir
+++ b/mlir/test/Dialect/XeGPU/ops.mlir
@@ -54,12 +54,18 @@ gpu.func @create_nd_tdesc_6(%src: memref<24x32xf32>) {
   gpu.return
 }
 
-// CHECK: gpu.func @test_create_nd_tdesc_7(%[[arg0:.*]]: ui64, %[[arg1:.*]]: index, %[[arg2:.*]]: index, %[[arg3:.*]]: index, %[[arg4:.*]]: index) {
-gpu.func @test_create_nd_tdesc_7(%src: ui64, %w : index, %h : index, %x : index, %y : index) {
+
+// CHECK: gpu.func @test_create_nd_tdesc_7(%[[arg0:.*]]: ui64, %[[arg1:.*]]: index, %[[arg2:.*]]: index, %[[arg3:.*]]: index, %[[arg4:.*]]: index, %[[arg5:.*]]: memref<24x32xf32>) 
+gpu.func @test_create_nd_tdesc_7(%src: ui64, %w : index, %h : index, %x : index, %y : index, %src2: memref<24x32xf32>) {
   //CHECK: %[[C:.*]] = arith.constant 1 : index
   %c1 = arith.constant 1 : index
-  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][%[[arg3]], %[[arg4]]] shape : [%[[arg2]], %[[arg1]]] strides : [%[[arg1]], %[[C]]]  : ui64 -> !xegpu.tensor_desc<8x16xf32>
-  %1 = xegpu.create_nd_tdesc %src offsets : [%x, %y] shape : [%h, %w] strides : [%w, %c1]  : ui64 -> !xegpu.tensor_desc<8x16xf32>
+  
+  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg5]] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32>
+  %3 = xegpu.create_nd_tdesc %src2 : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32>
+ 
+   // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] shape : [%[[arg2]], %[[arg1]]] strides : [%[[arg1]], %[[C]]]  : ui64 -> !xegpu.tensor_desc<8x16xf32>
+  %2 = xegpu.create_nd_tdesc %src[0, 0] shape : [%h, %w] strides : [%w, %c1]  : ui64 -> !xegpu.tensor_desc<8x16xf32>
+
   gpu.return
 }
 

>From 107787193ebe82524ec5231d3c013d08d1532040 Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Wed, 2 Jul 2025 02:13:20 +0000
Subject: [PATCH 03/23] git-clang-format

---
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 136 +++++++++++++------------
 1 file changed, 70 insertions(+), 66 deletions(-)

diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index cab4ca8a73898..e6590c2ed53fa 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -122,9 +122,8 @@ void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
   build(builder, state, tdesc, source, dynamicOffsets /* dynamic offsets */,
         ValueRange({}) /* empty dynamic shape */,
         ValueRange({}) /* empty dynamic strides */,
-        builder.getDenseI64ArrayAttr(staticOffsets)  /* const offsets */, 
-        {} /* empty const shape*/,
-        {} /* empty const strides*/);
+        builder.getDenseI64ArrayAttr(staticOffsets) /* const offsets */,
+        {} /* empty const shape*/, {} /* empty const strides*/);
 }
 
 void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
@@ -136,8 +135,8 @@ void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
          shape.size() == strides.size() && shape.size() == offsets.size());
 
   Type srcTy = source.getType();
-  assert((isa<IntegerType>(srcTy) ||
-         isa<MemRefType>(srcTy)) && "Source has to be either int or memref.");
+  assert((isa<IntegerType>(srcTy) || isa<MemRefType>(srcTy)) &&
+         "Source has to be either int or memref.");
 
   llvm::SmallVector<Value> dynamicOffsets;
   llvm::SmallVector<Value> dynamicShape;
@@ -222,24 +221,6 @@ LogicalResult CreateNdDescOp::verify() {
 }
 
 
-//ParseResult parseOptionalDynamicIndexList(
-//    OpAsmParser &parser,
-//    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &values,
-//    DenseI64ArrayAttr &integers, DenseBoolArrayAttr &scalableFlags,
-//    SmallVectorImpl<Type> *valueTypes = nullptr,
-//    AsmParser::Delimiter delimiter = AsmParser::Delimiter::Square);
-//inline ParseResult parseOptionalDynamicIndexList(
-//    OpAsmParser &parser,
-//    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &values,
-//    DenseI64ArrayAttr &integers, SmallVectorImpl<Type> *valueTypes = nullptr,
-//    AsmParser::Delimiter delimiter = AsmParser::Delimiter::Square) {
-//  DenseBoolArrayAttr scalableFlags;
-//  return parseOptionalDynamicIndexList(parser, values, integers, scalableFlags,
-//                               valueTypes, delimiter);
-//}
-
-
-
 ParseResult parseOptionalDynamicIndexList(
     OpAsmParser &parser,
     SmallVectorImpl<OpAsmParser::UnresolvedOperand> &values,
@@ -275,9 +256,10 @@ ParseResult parseOptionalDynamicIndexList(
     return success();
   };
   if (parser.parseOptionalLSquare().succeeded()) {
-    if ( parser.parseCommaSeparatedList(parseIntegerOrValue) || parser.parseRSquare() )
+    if (parser.parseCommaSeparatedList(parseIntegerOrValue) ||
+        parser.parseRSquare())
       return parser.emitError(parser.getNameLoc())
-            << "expected SSA value or integer";
+             << "expected SSA value or integer";
     integers = parser.getBuilder().getDenseI64ArrayAttr(integerVals);
     scalableFlags = parser.getBuilder().getDenseBoolArrayAttr(scalableVals);
     return success();
@@ -285,18 +267,22 @@ ParseResult parseOptionalDynamicIndexList(
   return success();
 }
 
-
-::mlir::ParseResult CreateNdDescOp::parse(::mlir::OpAsmParser &parser, ::mlir::OperationState &result) {
+::mlir::ParseResult CreateNdDescOp::parse(::mlir::OpAsmParser &parser,
+                                          ::mlir::OperationState &result) {
   ::mlir::OpAsmParser::UnresolvedOperand sourceRawOperand{};
-  ::llvm::ArrayRef<::mlir::OpAsmParser::UnresolvedOperand> sourceOperands(&sourceRawOperand, 1);  ::llvm::SMLoc sourceOperandsLoc;
+  ::llvm::ArrayRef<::mlir::OpAsmParser::UnresolvedOperand> sourceOperands(
+      &sourceRawOperand, 1);
+  ::llvm::SMLoc sourceOperandsLoc;
 
-  ::llvm::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4> offsetsOperands;
+  ::llvm::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4>
+      offsetsOperands;
   ::llvm::SMLoc offsetsOperandsLoc;
   ::mlir::DenseI64ArrayAttr const_offsetsAttr;
   ::llvm::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4> shapeOperands;
   ::llvm::SMLoc shapeOperandsLoc;
   ::mlir::DenseI64ArrayAttr const_shapeAttr;
-  ::llvm::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4> stridesOperands;
+  ::llvm::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4>
+      stridesOperands;
   ::llvm::SMLoc stridesOperandsLoc;
   ::mlir::DenseI64ArrayAttr const_stridesAttr;
   ::mlir::Type sourceRawType{};
@@ -308,31 +294,36 @@ ::mlir::ParseResult CreateNdDescOp::parse(::mlir::OpAsmParser &parser, ::mlir::O
   if (parser.parseOperand(sourceRawOperand))
     return ::mlir::failure();
 
+  // skip the "offsets :" at the begining if it exists
+  // if (::mlir::succeeded(parser.parseOptionalKeyword("offsets"))) {
+  //  if (parser.parseColon())
+  // return ::mlir::failure();
+  //}
+  offsetsOperandsLoc = parser.getCurrentLocation();
 
-    // skip the "offsets :" at the begining if it exists 
-    //if (::mlir::succeeded(parser.parseOptionalKeyword("offsets"))) {
-    //  if (parser.parseColon())
-    // return ::mlir::failure();
-    //}
-    offsetsOperandsLoc = parser.getCurrentLocation();
-
-    DenseBoolArrayAttr scalableFlags;
-    auto odsResult = parseOptionalDynamicIndexList(parser, offsetsOperands, const_offsetsAttr, scalableFlags);
+  DenseBoolArrayAttr scalableFlags;
+  auto odsResult = parseOptionalDynamicIndexList(
+      parser, offsetsOperands, const_offsetsAttr, scalableFlags);
 
-    if (const_offsetsAttr) {
-      if (odsResult) return ::mlir::failure();
-      result.getOrAddProperties<CreateNdDescOp::Properties>().const_offsets = const_offsetsAttr;
-    } 
+  if (const_offsetsAttr) {
+    if (odsResult)
+      return ::mlir::failure();
+    result.getOrAddProperties<CreateNdDescOp::Properties>().const_offsets =
+        const_offsetsAttr;
+  }
 
   if (::mlir::succeeded(parser.parseOptionalKeyword("shape"))) {
     if (parser.parseColon())
       return ::mlir::failure();
     {
       shapeOperandsLoc = parser.getCurrentLocation();
-      auto odsResult = parseDynamicIndexList(parser, shapeOperands, const_shapeAttr);
+      auto odsResult =
+          parseDynamicIndexList(parser, shapeOperands, const_shapeAttr);
       if (const_shapeAttr) {
-        if (odsResult) return ::mlir::failure();
-        result.getOrAddProperties<CreateNdDescOp::Properties>().const_shape = const_shapeAttr;
+        if (odsResult)
+          return ::mlir::failure();
+        result.getOrAddProperties<CreateNdDescOp::Properties>().const_shape =
+            const_shapeAttr;
       }
     }
 
@@ -342,20 +333,24 @@ ::mlir::ParseResult CreateNdDescOp::parse(::mlir::OpAsmParser &parser, ::mlir::O
       return ::mlir::failure();
     {
       stridesOperandsLoc = parser.getCurrentLocation();
-      auto odsResult = parseDynamicIndexList(parser, stridesOperands, const_stridesAttr);
+      auto odsResult =
+          parseDynamicIndexList(parser, stridesOperands, const_stridesAttr);
       if (const_stridesAttr) {
-        if (odsResult) return ::mlir::failure();
-        result.getOrAddProperties<CreateNdDescOp::Properties>().const_strides = const_stridesAttr;
+        if (odsResult)
+          return ::mlir::failure();
+        result.getOrAddProperties<CreateNdDescOp::Properties>().const_strides =
+            const_stridesAttr;
       }
     }
   }
   {
-    auto loc = parser.getCurrentLocation();(void)loc;
+    auto loc = parser.getCurrentLocation();
     if (parser.parseOptionalAttrDict(result.attributes))
       return ::mlir::failure();
     if (failed(verifyInherentAttrs(result.name, result.attributes, [&]() {
-        return parser.emitError(loc) << "'" << result.name.getStringRef() << "' op ";
-      })))
+          return parser.emitError(loc)
+                 << "'" << result.name.getStringRef() << "' op ";
+        })))
       return ::mlir::failure();
   }
   if (parser.parseColon())
@@ -373,21 +368,30 @@ ::mlir::ParseResult CreateNdDescOp::parse(::mlir::OpAsmParser &parser, ::mlir::O
   if (parser.parseType(TensorDescRawType))
     return ::mlir::failure();
 
-  ::llvm::copy(::llvm::ArrayRef<int32_t>({1, static_cast<int32_t>(offsetsOperands.size()), static_cast<int32_t>(shapeOperands.size()), static_cast<int32_t>(stridesOperands.size())}), result.getOrAddProperties<CreateNdDescOp::Properties>().operandSegmentSizes.begin());
+  ::llvm::copy(::llvm::ArrayRef<int32_t>(
+                   {1, static_cast<int32_t>(offsetsOperands.size()),
+                    static_cast<int32_t>(shapeOperands.size()),
+                    static_cast<int32_t>(stridesOperands.size())}),
+               result.getOrAddProperties<CreateNdDescOp::Properties>()
+                   .operandSegmentSizes.begin());
 
   ::mlir::Type odsBuildableType0 = parser.getBuilder().getIndexType();
   result.addTypes(TensorDescTypes);
 
-  if (parser.resolveOperands(sourceOperands, sourceTypes, sourceOperandsLoc, result.operands))
+  if (parser.resolveOperands(sourceOperands, sourceTypes, sourceOperandsLoc,
+                             result.operands))
     return ::mlir::failure();
 
-  if (parser.resolveOperands(offsetsOperands, odsBuildableType0, offsetsOperandsLoc, result.operands))
+  if (parser.resolveOperands(offsetsOperands, odsBuildableType0,
+                             offsetsOperandsLoc, result.operands))
     return ::mlir::failure();
 
-  if (parser.resolveOperands(shapeOperands, odsBuildableType0, shapeOperandsLoc, result.operands))
+  if (parser.resolveOperands(shapeOperands, odsBuildableType0, shapeOperandsLoc,
+                             result.operands))
     return ::mlir::failure();
 
-  if (parser.resolveOperands(stridesOperands, odsBuildableType0, stridesOperandsLoc, result.operands))
+  if (parser.resolveOperands(stridesOperands, odsBuildableType0,
+                             stridesOperandsLoc, result.operands))
     return ::mlir::failure();
   return ::mlir::success();
 }
@@ -395,7 +399,8 @@ ::mlir::ParseResult CreateNdDescOp::parse(::mlir::OpAsmParser &parser, ::mlir::O
 void CreateNdDescOp::print(::mlir::OpAsmPrinter &_odsPrinter) {
   _odsPrinter << ' ';
   _odsPrinter << getSource();
-  // Print offsets if getConstOffsetsAttr() exists, is not empty, and its first value is not int64_t::max.
+  // Print offsets if getConstOffsetsAttr() exists, is not empty, and its first
+  // value is not int64_t::max.
   auto constOffsetsAttr = getConstOffsetsAttr();
   bool printOffsets = false;
   if (constOffsetsAttr && constOffsetsAttr.size() > 0) {
@@ -406,7 +411,8 @@ void CreateNdDescOp::print(::mlir::OpAsmPrinter &_odsPrinter) {
   }
   if (printOffsets) {
 
-    printDynamicIndexList(_odsPrinter, *this, getOffsets(), getConstOffsetsAttr());
+    printDynamicIndexList(_odsPrinter, *this, getOffsets(),
+                          getConstOffsetsAttr());
   }
   if (((!getShape().empty()) || (getConstShapeAttr()))) {
     _odsPrinter << ' ' << "shape";
@@ -416,7 +422,8 @@ void CreateNdDescOp::print(::mlir::OpAsmPrinter &_odsPrinter) {
     _odsPrinter << ' ' << "strides";
     _odsPrinter << ' ' << ":";
     _odsPrinter << ' ';
-    printDynamicIndexList(_odsPrinter, *this, getStrides(), getConstStridesAttr());
+    printDynamicIndexList(_odsPrinter, *this, getStrides(),
+                          getConstStridesAttr());
   }
   ::llvm::SmallVector<::llvm::StringRef, 2> elidedAttrs;
   elidedAttrs.push_back("operandSegmentSizes");
@@ -430,17 +437,16 @@ void CreateNdDescOp::print(::mlir::OpAsmPrinter &_odsPrinter) {
     auto type = getSource().getType();
     if (auto validType = ::llvm::dyn_cast<::mlir::Type>(type))
       _odsPrinter.printStrippedAttrOrType(validType);
-   else
-     _odsPrinter << type;
+    else
+      _odsPrinter << type;
   }
   _odsPrinter << ' ' << "->";
   _odsPrinter << ' ';
   // _odsPrinter << getTensorDesc().getType();
 
-
   _odsPrinter << "!xegpu.tensor_desc<";
 
-  auto tDesc = getTensorDesc().getType(); 
+  auto tDesc = getTensorDesc().getType();
   auto shape = tDesc.getShape();
   for (int64_t dim : shape) {
     if (mlir::ShapedType::isDynamic(dim))
@@ -459,7 +465,6 @@ void CreateNdDescOp::print(::mlir::OpAsmPrinter &_odsPrinter) {
     _odsPrinter << ", " << layout;
 
   _odsPrinter << ">";
-
 }
 
 //===----------------------------------------------------------------------===//
@@ -877,7 +882,6 @@ LogicalResult ConvertLayoutOp::verify() {
   return mlir::success();
 }
 
-
 } // namespace xegpu
 } // namespace mlir
 

>From 42baa22915a12f680b1aba6b43a6acf10e0009ad Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Wed, 2 Jul 2025 03:03:13 +0000
Subject: [PATCH 04/23] add more  tests

---
 mlir/test/Dialect/XeGPU/ops.mlir | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir
index d5a01e4c66b5e..d746de69c4f8f 100644
--- a/mlir/test/Dialect/XeGPU/ops.mlir
+++ b/mlir/test/Dialect/XeGPU/ops.mlir
@@ -63,8 +63,35 @@ gpu.func @test_create_nd_tdesc_7(%src: ui64, %w : index, %h : index, %x : index,
   // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg5]] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32>
   %3 = xegpu.create_nd_tdesc %src2 : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32>
  
-   // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] shape : [%[[arg2]], %[[arg1]]] strides : [%[[arg1]], %[[C]]]  : ui64 -> !xegpu.tensor_desc<8x16xf32>
+  gpu.return
+}
+
+// CHECK: gpu.func @test_create_nd_tdesc_8(%[[arg0:.*]]: ui64, %[[arg1:.*]]: index, %[[arg2:.*]]: index, %[[arg3:.*]]: index, %[[arg4:.*]]: index) 
+gpu.func @test_create_nd_tdesc_8(%src: ui64, %w : index, %h : index, %x : index, %y : index) {
+  
+  %c1 = arith.constant 1 : index   
+  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] shape : [%arg2, %arg1] strides : [%arg1, %c1] : ui64 -> !xegpu.tensor_desc<8x16xf32>
   %2 = xegpu.create_nd_tdesc %src[0, 0] shape : [%h, %w] strides : [%w, %c1]  : ui64 -> !xegpu.tensor_desc<8x16xf32>
+ 
+  gpu.return
+}
+
+// CHECK-LABEL: func @test_create_nd_tdesc_9({{.*}}) 
+
+gpu.func @test_create_nd_tdesc_9(%src: memref<?x?xf16>, %w : index, %h : index, %x : index, %y : index) {
+
+  %c1 = arith.constant 1 : index
+  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0[%arg3, %arg4] shape : [%arg2, %arg1] strides : [%arg1, %c1] : memref<?x?xf16> -> !xegpu.tensor_desc<8x16xf16>
+  %1 = xegpu.create_nd_tdesc %src[%x, %y] shape:[%h, %w] strides:[%w, %c1]  : memref<?x?xf16> -> !xegpu.tensor_desc<8x16xf16>
+
+  gpu.return
+}
+
+// CHECK-LABEL: func @test_create_nd_tdesc_10({{.*}}) 
+gpu.func @test_create_nd_tdesc_10(%src: memref<?x?xf16>, %w : index, %h : index, %x : index, %y : index) {  
+  %c1 = arith.constant 1 : index
+  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0 shape : [%arg2, %arg1] strides : [%arg1, %c1] : memref<?x?xf16> -> !xegpu.tensor_desc<8x16xf16> 
+  %2 = xegpu.create_nd_tdesc %src shape:[%h, %w] strides:[%w, %c1]  : memref<?x?xf16> -> !xegpu.tensor_desc<8x16xf16>
 
   gpu.return
 }

>From 204d34781cc18dbd19a640afe024245afe0c9684 Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Wed, 2 Jul 2025 03:04:09 +0000
Subject: [PATCH 05/23]  git-clang-format

---
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index e6590c2ed53fa..ba788f3454d25 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -220,7 +220,6 @@ LogicalResult CreateNdDescOp::verify() {
   return success();
 }
 
-
 ParseResult parseOptionalDynamicIndexList(
     OpAsmParser &parser,
     SmallVectorImpl<OpAsmParser::UnresolvedOperand> &values,

>From 2793c8130b7379987f6ea451c4fc3dcd7e8a34b4 Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Sat, 12 Jul 2025 05:07:52 +0000
Subject: [PATCH 06/23] add ui64 case support

---
 .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 28 ++++++++++---------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 2cbae19ff2c05..86c9d40575104 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -116,15 +116,6 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
   );
   let results = (outs XeGPU_TensorDesc: $TensorDesc);
 
-
-//  let assemblyFormat = [{
-//    $source
-//    (custom<DynamicIndexList>($offsets, $const_offsets)^)?
-//    (`base_shape` `:` custom<DynamicIndexList>($shape, $const_shape)^
-//     `base_strides` `:` custom<DynamicIndexList>($strides, $const_strides))?
-//    attr-dict `:` type($source) `->` qualified(type($TensorDesc))
-//  }];
-
   let hasVerifier = 1;
 
   let hasCustomAssemblyFormat = 1;
@@ -177,12 +168,23 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
 
     ArrayRef<int64_t> getStaticOffsets(){
       auto attr = getConstOffsetsAttr();
-      if (llvm::isa<IntegerType>(getSourceType()) || attr)
+
+      if (attr) 
         return attr;
 
-      // The offsets are allowed to be empty. The Traits verification of OffsetSizeAndStrideOpInterface interface assumes offsets being present. So it is set to be MAX to indicate user not passed any value (kDynamic means offsets passed as variable).
-      setConstOffsets(llvm::SmallVector<int64_t, 4>(getTensorDescShape().size(), std::numeric_limits<int64_t>::max()));
-      //setConstOffsets(llvm::SmallVector<int64_t, 4>(getTensorDescShape().size(), mlir::ShapedType::kDynamic));
+      auto memrefType = llvm::dyn_cast<MemRefType>(getSourceType());
+      int rank = 0;
+      if (memrefType) {
+        //use source memref's rank, as source memref rank may be higher
+        rank = memrefType.getRank();
+      } else {
+        //nd_tdesc created from ui64, use nd_tdesc's rank
+        rank = getTensorDescShape().size();
+      };
+
+      // The offsets are allowed to be empty. The Traits verification of OffsetSizeAndStrideOpInterface interface assumes offsets being present.
+      // It is set to be MAX to indicate user not passed any value, instead of kDynamic which means offsets passed as value.
+      setConstOffsets(llvm::SmallVector<int64_t, 4>(rank, std::numeric_limits<int64_t>::max()));
 
       attr = getConstOffsetsAttr();
       return attr;

>From 6793689a36bf58b07fdbee24b92f1fe0fb56cff2 Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Sat, 12 Jul 2025 05:49:58 +0000
Subject: [PATCH 07/23] remove unnecessary comments

---
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 62db7bd858d78..9f6090ad279f5 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -294,11 +294,6 @@ ::mlir::ParseResult CreateNdDescOp::parse(::mlir::OpAsmParser &parser,
   if (parser.parseOperand(sourceRawOperand))
     return ::mlir::failure();
 
-  // skip the "offsets :" at the begining if it exists
-  // if (::mlir::succeeded(parser.parseOptionalKeyword("offsets"))) {
-  //  if (parser.parseColon())
-  // return ::mlir::failure();
-  //}
   offsetsOperandsLoc = parser.getCurrentLocation();
 
   DenseBoolArrayAttr scalableFlags;
@@ -399,8 +394,7 @@ ::mlir::ParseResult CreateNdDescOp::parse(::mlir::OpAsmParser &parser,
 void CreateNdDescOp::print(::mlir::OpAsmPrinter &_odsPrinter) {
   _odsPrinter << ' ';
   _odsPrinter << getSource();
-  // Print offsets if getConstOffsetsAttr() exists, is not empty, and its first
-  // value is not int64_t::max.
+
   auto constOffsetsAttr = getConstOffsetsAttr();
   bool printOffsets = false;
   if (constOffsetsAttr && constOffsetsAttr.size() > 0) {

>From 4a96c71994ce09f785ef7eea33edc1209b714adc Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Mon, 14 Jul 2025 18:48:25 +0000
Subject: [PATCH 08/23] fix VectorToXeGPU tests

---
 mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td                  | 2 +-
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp                          | 2 +-
 mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir           | 2 +-
 mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir          | 2 +-
 mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir  | 2 +-
 mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 710fc62b032a9..f55ee27278f66 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -184,7 +184,7 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
 
       // The offsets are allowed to be empty. The Traits verification of OffsetSizeAndStrideOpInterface interface assumes offsets being present.
       // It is set to be MAX to indicate user not passed any value, instead of kDynamic which means offsets passed as value.
-      setConstOffsets(llvm::SmallVector<int64_t, 4>(rank, std::numeric_limits<int64_t>::max()));
+      setConstOffsets(llvm::SmallVector<int64_t, 4>(rank, std::numeric_limits<int64_t>::max()>>16));
 
       attr = getConstOffsetsAttr();
       return attr;
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 9f6090ad279f5..fe68b65711ba6 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -399,7 +399,7 @@ void CreateNdDescOp::print(::mlir::OpAsmPrinter &_odsPrinter) {
   bool printOffsets = false;
   if (constOffsetsAttr && constOffsetsAttr.size() > 0) {
     auto firstVal = constOffsetsAttr.asArrayRef()[0];
-    if (firstVal != std::numeric_limits<int64_t>::max()) {
+    if (firstVal != std::numeric_limits<int64_t>::max()>>16) {
       printOffsets = true;
     }
   }
diff --git a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
index 4af7061a4f8a3..0add1c2447596 100644
--- a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
@@ -54,7 +54,7 @@ func.func @load_dynamic_source(%source: memref<?x?x?xf32>,
 // CHECK-DAG:   %[[DIM_2:.+]] = memref.dim %[[SRC]], %[[C2]]
 // CHECK:       %[[DIM_0_STRIDE:.+]] = arith.muli %[[DIM_2]], %[[DIM_1]]
 // CHECK:       %[[DESC:.+]] = xegpu.create_nd_tdesc %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
-// CHECK-SAME:    [%[[DIM_0]], %[[DIM_1]], %[[DIM_2]]], [%[[DIM_0_STRIDE]], %[[DIM_2]], 1]
+// CHECK-SAME:   shape : [%[[DIM_0]], %[[DIM_1]], %[[DIM_2]]] strides : [%[[DIM_0_STRIDE]], %[[DIM_2]], 1]
 // CHECK-SAME:    memref<?x?x?xf32> -> !xegpu.tensor_desc<8x16xf32,
 // CHECK:       %[[VEC:.+]] = xegpu.load_nd %[[DESC]]{{.*}}-> vector<8x16xf32>
 // CHECK:       return %[[VEC]]
diff --git a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
index d68a02b54e967..567a2a3a5c24d 100644
--- a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
@@ -56,7 +56,7 @@ func.func @store_dynamic_source(%vec: vector<8x16xf32>,
 // CHECK-DAG:   %[[DIM_2:.+]] = memref.dim %[[SRC]], %[[C2]]
 // CHECK:       %[[DIM_0_STRIDE:.+]] = arith.muli %[[DIM_2]], %[[DIM_1]]
 // CHECK:       %[[DESC:.+]] = xegpu.create_nd_tdesc %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
-// CHECK-SAME:    [%[[DIM_0]], %[[DIM_1]], %[[DIM_2]]], [%[[DIM_0_STRIDE]], %[[DIM_2]], 1]
+// CHECK-SAME:   shape : [%[[DIM_0]], %[[DIM_1]], %[[DIM_2]]] strides : [%[[DIM_0_STRIDE]], %[[DIM_2]], 1]
 // CHECK-SAME:    memref<?x?x?xf32> -> !xegpu.tensor_desc<8x16xf32,
 // CHECK:       xegpu.store_nd %[[VEC]], %[[DESC]] : vector<8x16xf32>
 
diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
index c2f760b29afc4..72e08eab2c596 100644
--- a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
@@ -96,7 +96,7 @@ func.func @load_dynamic_source(%source: memref<?x?x?xf32>,
 // CHECK-DAG:   %[[DIM_2:.+]] = memref.dim %[[SRC]], %[[C2]]
 // CHECK:       %[[DIM_0_STRIDE:.+]] = arith.muli %[[DIM_2]], %[[DIM_1]]
 // CHECK:       %[[DESC:.+]] = xegpu.create_nd_tdesc %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
-// CHECK-SAME:    [%[[DIM_0]], %[[DIM_1]], %[[DIM_2]]], [%[[DIM_0_STRIDE]], %[[DIM_2]], 1]
+// CHECK-SAME:   shape : [%[[DIM_0]], %[[DIM_1]], %[[DIM_2]]] strides : [%[[DIM_0_STRIDE]], %[[DIM_2]], 1]
 // CHECK-SAME:    memref<?x?x?xf32> -> !xegpu.tensor_desc<8x16xf32
 // CHECK:       %[[VEC:.+]] = xegpu.load_nd %[[DESC]]{{.*}}-> vector<8x16xf32>
 // CHECK:       return %[[VEC]]
diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
index 8de6c2283b37c..a5fc34ae839bf 100644
--- a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
@@ -60,7 +60,7 @@ func.func @store_dynamic_source(%vec: vector<8x16xf32>,
 // CHECK-DAG:   %[[DIM_2:.+]] = memref.dim %[[SRC]], %[[C2]]
 // CHECK:       %[[DIM_0_STRIDE:.+]] = arith.muli %[[DIM_2]], %[[DIM_1]]
 // CHECK:       %[[DESC:.+]] = xegpu.create_nd_tdesc %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
-// CHECK-SAME:    [%[[DIM_0]], %[[DIM_1]], %[[DIM_2]]], [%[[DIM_0_STRIDE]], %[[DIM_2]], 1]
+// CHECK-SAME:   shape : [%[[DIM_0]], %[[DIM_1]], %[[DIM_2]]] strides : [%[[DIM_0_STRIDE]], %[[DIM_2]], 1]
 // CHECK-SAME:    memref<?x?x?xf32> -> !xegpu.tensor_desc<8x16xf32
 // CHECK:       xegpu.store_nd %[[VEC]], %[[DESC]] : vector<8x16xf32>
 

>From 689a8a5303493039c1e940c7d68d26dd711b49d0 Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Mon, 14 Jul 2025 18:52:52 +0000
Subject: [PATCH 09/23] tweak default offset value

---
 mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 2 +-
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index f55ee27278f66..710fc62b032a9 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -184,7 +184,7 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
 
       // The offsets are allowed to be empty. The Traits verification of OffsetSizeAndStrideOpInterface interface assumes offsets being present.
       // It is set to be MAX to indicate user not passed any value, instead of kDynamic which means offsets passed as value.
-      setConstOffsets(llvm::SmallVector<int64_t, 4>(rank, std::numeric_limits<int64_t>::max()>>16));
+      setConstOffsets(llvm::SmallVector<int64_t, 4>(rank, std::numeric_limits<int64_t>::max()));
 
       attr = getConstOffsetsAttr();
       return attr;
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index fe68b65711ba6..6d5c29ac9a319 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -399,7 +399,7 @@ void CreateNdDescOp::print(::mlir::OpAsmPrinter &_odsPrinter) {
   bool printOffsets = false;
   if (constOffsetsAttr && constOffsetsAttr.size() > 0) {
     auto firstVal = constOffsetsAttr.asArrayRef()[0];
-    if (firstVal != std::numeric_limits<int64_t>::max()>>16) {
+    if (firstVal != std::numeric_limits<int64_t>::max() ) {
       printOffsets = true;
     }
   }

>From 02d37952748c3cb567317f7caf5fe93ead9e10eb Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Mon, 14 Jul 2025 22:15:11 +0000
Subject: [PATCH 10/23] git-clang-format

---
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 6d5c29ac9a319..9f6090ad279f5 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -399,7 +399,7 @@ void CreateNdDescOp::print(::mlir::OpAsmPrinter &_odsPrinter) {
   bool printOffsets = false;
   if (constOffsetsAttr && constOffsetsAttr.size() > 0) {
     auto firstVal = constOffsetsAttr.asArrayRef()[0];
-    if (firstVal != std::numeric_limits<int64_t>::max() ) {
+    if (firstVal != std::numeric_limits<int64_t>::max()) {
       printOffsets = true;
     }
   }

>From 01718f4b4aa9f376986d610252144f38dd946b7d Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Tue, 15 Jul 2025 01:46:05 +0000
Subject: [PATCH 11/23] add builders

---
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 61 ++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 9f6090ad279f5..2335d40e05cef 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -112,6 +112,67 @@ isValidGatherScatterParams(Type maskTy, VectorType valueTy,
 //===----------------------------------------------------------------------===//
 // XeGPU_CreateNdDescOp
 //===----------------------------------------------------------------------===//
+
+void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
+                           Type tdesc, TypedValue<MemRefType> source) {
+  [[maybe_unused]] auto ty = source.getType();
+  assert(ty.hasStaticShape());
+
+  build(builder, state, tdesc, source, ValueRange({}) /* dynamic offsets */,
+        ValueRange({}) /* empty dynamic shape */,
+        ValueRange({}) /* empty dynamic strides */,
+        builder.getDenseI64ArrayAttr({}) /* const offsets */,
+        builder.getDenseI64ArrayAttr({}) /* empty const shape*/,
+        builder.getDenseI64ArrayAttr({}) /* empty const strides*/);
+}
+
+
+void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
+                           Type tdesc, TypedValue<MemRefType> source,
+                           llvm::ArrayRef<OpFoldResult> shape,
+                           llvm::ArrayRef<OpFoldResult> strides) {
+  assert(shape.size() && strides.size() && shape.size() == strides.size());
+
+  llvm::SmallVector<int64_t> staticShape;
+  llvm::SmallVector<int64_t> staticStrides;
+  llvm::SmallVector<Value> dynamicShape;
+  llvm::SmallVector<Value> dynamicStrides;
+
+  dispatchIndexOpFoldResults(shape, dynamicShape, staticShape);
+  dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides);
+
+  auto staticShapeAttr = builder.getDenseI64ArrayAttr(staticShape);
+  auto staticStridesAttr = builder.getDenseI64ArrayAttr(staticStrides);
+
+  build(builder, state, tdesc, source, ValueRange({}), dynamicShape,
+        dynamicStrides, builder.getDenseI64ArrayAttr({}), staticShapeAttr,
+        staticStridesAttr);
+}
+
+
+void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
+                           Type tdesc, TypedValue<IntegerType> source,
+                           llvm::ArrayRef<OpFoldResult> shape,
+                           llvm::ArrayRef<OpFoldResult> strides) {
+  assert(shape.size() && strides.size() && shape.size() == strides.size());
+
+  llvm::SmallVector<int64_t> staticShape;
+  llvm::SmallVector<int64_t> staticStrides;
+  llvm::SmallVector<Value> dynamicShape;
+  llvm::SmallVector<Value> dynamicStrides;
+
+  dispatchIndexOpFoldResults(shape, dynamicShape, staticShape);
+  dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides);
+
+  auto staticShapeAttr = builder.getDenseI64ArrayAttr(staticShape);
+  auto staticStridesAttr = builder.getDenseI64ArrayAttr(staticStrides);
+
+  build(builder, state, tdesc, source, ValueRange({}), dynamicShape,
+        dynamicStrides, builder.getDenseI64ArrayAttr({}), staticShapeAttr,
+        staticStridesAttr);
+}
+
+
 void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
                            Type tdesc, TypedValue<MemRefType> source,
                            llvm::ArrayRef<OpFoldResult> offsets) {

>From 5ef6ca940fa2f3078be5959ece81a2c3f655751c Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Tue, 15 Jul 2025 01:46:54 +0000
Subject: [PATCH 12/23] git-clang-format

---
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 2335d40e05cef..18827a55071e7 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -126,7 +126,6 @@ void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
         builder.getDenseI64ArrayAttr({}) /* empty const strides*/);
 }
 
-
 void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
                            Type tdesc, TypedValue<MemRefType> source,
                            llvm::ArrayRef<OpFoldResult> shape,
@@ -149,7 +148,6 @@ void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
         staticStridesAttr);
 }
 
-
 void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
                            Type tdesc, TypedValue<IntegerType> source,
                            llvm::ArrayRef<OpFoldResult> shape,
@@ -172,7 +170,6 @@ void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
         staticStridesAttr);
 }
 
-
 void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
                            Type tdesc, TypedValue<MemRefType> source,
                            llvm::ArrayRef<OpFoldResult> offsets) {

>From 882313facacabd01667d459163c34802be427af4 Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Tue, 15 Jul 2025 19:42:21 +0000
Subject: [PATCH 13/23] simplify custom parser

---
 .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td |  18 +-
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp        | 220 ++----------------
 2 files changed, 31 insertions(+), 207 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 710fc62b032a9..1d62b1942e5f7 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -114,12 +114,19 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
     OptionalAttr<DenseI64ArrayAttr>: $const_shape,
     OptionalAttr<DenseI64ArrayAttr>: $const_strides
   );
+
+  let assemblyFormat = [{
+    $source ``
+    custom<OptionalDynamicIndexList>($offsets, $const_offsets)
+    (`shape` `:` custom<DynamicIndexList>($shape, $const_shape)^
+     `strides``:` custom<DynamicIndexList>($strides, $const_strides))?
+    attr-dict `:` type($source) `->` qualified(type($TensorDesc))
+  }];
+
   let results = (outs XeGPU_TensorDesc: $TensorDesc);
 
   let hasVerifier = 1;
 
-  let hasCustomAssemblyFormat = 1;
-
   let builders = [
     OpBuilder<(ins "Type": $tdesc, "TypedValue<MemRefType>": $source)>,
 
@@ -174,14 +181,13 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
 
       auto memrefType = llvm::dyn_cast<MemRefType>(getSourceType());
       int rank = 0;
-      if (memrefType) {
+      if (memrefType) 
         //use source memref's rank, as source memref rank may be higher
         rank = memrefType.getRank();
-      } else {
+      else
         //nd_tdesc created from ui64, use nd_tdesc's rank
         rank = getTensorDescShape().size();
-      };
-
+      
       // The offsets are allowed to be empty. The Traits verification of OffsetSizeAndStrideOpInterface interface assumes offsets being present.
       // It is set to be MAX to indicate user not passed any value, instead of kDynamic which means offsets passed as value.
       setConstOffsets(llvm::SmallVector<int64_t, 4>(rank, std::numeric_limits<int64_t>::max()));
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 18827a55071e7..d9dce24c51c0b 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -282,19 +282,15 @@ LogicalResult CreateNdDescOp::verify() {
 ParseResult parseOptionalDynamicIndexList(
     OpAsmParser &parser,
     SmallVectorImpl<OpAsmParser::UnresolvedOperand> &values,
-    DenseI64ArrayAttr &integers, DenseBoolArrayAttr &scalableFlags,
+    DenseI64ArrayAttr &integers,
     SmallVectorImpl<Type> *valueTypes = nullptr,
     AsmParser::Delimiter delimiter = AsmParser::Delimiter::Square) {
 
   SmallVector<int64_t, 4> integerVals;
-  SmallVector<bool, 4> scalableVals;
   auto parseIntegerOrValue = [&]() {
     OpAsmParser::UnresolvedOperand operand;
     auto res = parser.parseOptionalOperand(operand);
 
-    // When encountering `[`, assume that this is a scalable index.
-    scalableVals.push_back(parser.parseOptionalLSquare().succeeded());
-
     if (res.has_value() && succeeded(res.value())) {
       values.push_back(operand);
       integerVals.push_back(ShapedType::kDynamic);
@@ -307,10 +303,6 @@ ParseResult parseOptionalDynamicIndexList(
       integerVals.push_back(integer);
     }
 
-    // If this is assumed to be a scalable index, verify that there's a closing
-    // `]`.
-    if (scalableVals.back() && parser.parseOptionalRSquare().failed())
-      return failure();
     return success();
   };
   if (parser.parseOptionalLSquare().succeeded()) {
@@ -319,204 +311,30 @@ ParseResult parseOptionalDynamicIndexList(
       return parser.emitError(parser.getNameLoc())
              << "expected SSA value or integer";
     integers = parser.getBuilder().getDenseI64ArrayAttr(integerVals);
-    scalableFlags = parser.getBuilder().getDenseBoolArrayAttr(scalableVals);
     return success();
   }
   return success();
 }
 
-::mlir::ParseResult CreateNdDescOp::parse(::mlir::OpAsmParser &parser,
-                                          ::mlir::OperationState &result) {
-  ::mlir::OpAsmParser::UnresolvedOperand sourceRawOperand{};
-  ::llvm::ArrayRef<::mlir::OpAsmParser::UnresolvedOperand> sourceOperands(
-      &sourceRawOperand, 1);
-  ::llvm::SMLoc sourceOperandsLoc;
-
-  ::llvm::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4>
-      offsetsOperands;
-  ::llvm::SMLoc offsetsOperandsLoc;
-  ::mlir::DenseI64ArrayAttr const_offsetsAttr;
-  ::llvm::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4> shapeOperands;
-  ::llvm::SMLoc shapeOperandsLoc;
-  ::mlir::DenseI64ArrayAttr const_shapeAttr;
-  ::llvm::SmallVector<::mlir::OpAsmParser::UnresolvedOperand, 4>
-      stridesOperands;
-  ::llvm::SMLoc stridesOperandsLoc;
-  ::mlir::DenseI64ArrayAttr const_stridesAttr;
-  ::mlir::Type sourceRawType{};
-  ::llvm::ArrayRef<::mlir::Type> sourceTypes(&sourceRawType, 1);
-  ::mlir::Type TensorDescRawType{};
-  ::llvm::ArrayRef<::mlir::Type> TensorDescTypes(&TensorDescRawType, 1);
-
-  sourceOperandsLoc = parser.getCurrentLocation();
-  if (parser.parseOperand(sourceRawOperand))
-    return ::mlir::failure();
-
-  offsetsOperandsLoc = parser.getCurrentLocation();
-
-  DenseBoolArrayAttr scalableFlags;
-  auto odsResult = parseOptionalDynamicIndexList(
-      parser, offsetsOperands, const_offsetsAttr, scalableFlags);
-
-  if (const_offsetsAttr) {
-    if (odsResult)
-      return ::mlir::failure();
-    result.getOrAddProperties<CreateNdDescOp::Properties>().const_offsets =
-        const_offsetsAttr;
-  }
-
-  if (::mlir::succeeded(parser.parseOptionalKeyword("shape"))) {
-    if (parser.parseColon())
-      return ::mlir::failure();
-    {
-      shapeOperandsLoc = parser.getCurrentLocation();
-      auto odsResult =
-          parseDynamicIndexList(parser, shapeOperands, const_shapeAttr);
-      if (const_shapeAttr) {
-        if (odsResult)
-          return ::mlir::failure();
-        result.getOrAddProperties<CreateNdDescOp::Properties>().const_shape =
-            const_shapeAttr;
-      }
-    }
-
-    if (parser.parseKeyword("strides"))
-      return ::mlir::failure();
-    if (parser.parseColon())
-      return ::mlir::failure();
-    {
-      stridesOperandsLoc = parser.getCurrentLocation();
-      auto odsResult =
-          parseDynamicIndexList(parser, stridesOperands, const_stridesAttr);
-      if (const_stridesAttr) {
-        if (odsResult)
-          return ::mlir::failure();
-        result.getOrAddProperties<CreateNdDescOp::Properties>().const_strides =
-            const_stridesAttr;
-      }
-    }
-  }
-  {
-    auto loc = parser.getCurrentLocation();
-    if (parser.parseOptionalAttrDict(result.attributes))
-      return ::mlir::failure();
-    if (failed(verifyInherentAttrs(result.name, result.attributes, [&]() {
-          return parser.emitError(loc)
-                 << "'" << result.name.getStringRef() << "' op ";
-        })))
-      return ::mlir::failure();
-  }
-  if (parser.parseColon())
-    return ::mlir::failure();
-
-  {
-    ::mlir::Type type;
-    if (parser.parseCustomTypeWithFallback(type))
-      return ::mlir::failure();
-    sourceRawType = type;
-  }
-  if (parser.parseArrow())
-    return ::mlir::failure();
-
-  if (parser.parseType(TensorDescRawType))
-    return ::mlir::failure();
-
-  ::llvm::copy(::llvm::ArrayRef<int32_t>(
-                   {1, static_cast<int32_t>(offsetsOperands.size()),
-                    static_cast<int32_t>(shapeOperands.size()),
-                    static_cast<int32_t>(stridesOperands.size())}),
-               result.getOrAddProperties<CreateNdDescOp::Properties>()
-                   .operandSegmentSizes.begin());
-
-  ::mlir::Type odsBuildableType0 = parser.getBuilder().getIndexType();
-  result.addTypes(TensorDescTypes);
-
-  if (parser.resolveOperands(sourceOperands, sourceTypes, sourceOperandsLoc,
-                             result.operands))
-    return ::mlir::failure();
-
-  if (parser.resolveOperands(offsetsOperands, odsBuildableType0,
-                             offsetsOperandsLoc, result.operands))
-    return ::mlir::failure();
-
-  if (parser.resolveOperands(shapeOperands, odsBuildableType0, shapeOperandsLoc,
-                             result.operands))
-    return ::mlir::failure();
-
-  if (parser.resolveOperands(stridesOperands, odsBuildableType0,
-                             stridesOperandsLoc, result.operands))
-    return ::mlir::failure();
-  return ::mlir::success();
-}
-
-void CreateNdDescOp::print(::mlir::OpAsmPrinter &_odsPrinter) {
-  _odsPrinter << ' ';
-  _odsPrinter << getSource();
-
-  auto constOffsetsAttr = getConstOffsetsAttr();
-  bool printOffsets = false;
-  if (constOffsetsAttr && constOffsetsAttr.size() > 0) {
-    auto firstVal = constOffsetsAttr.asArrayRef()[0];
-    if (firstVal != std::numeric_limits<int64_t>::max()) {
-      printOffsets = true;
+void printOptionalDynamicIndexList(
+    OpAsmPrinter &printer, Operation *op, OperandRange values,
+    ArrayRef<int64_t> integers, TypeRange valueTypes = TypeRange()) {
+
+  if (values.empty() && llvm::all_of(integers, [](int64_t i) { return i == std::numeric_limits<int64_t>::max(); }))
+    return;
+  printer << '[';
+  unsigned dynamicValIdx = 0;
+  llvm::interleaveComma(integers, printer, [&](int64_t integer) {
+    if (ShapedType::isDynamic(integer)) {
+      printer << values[dynamicValIdx];
+      if (!valueTypes.empty())
+        printer << " : " << valueTypes[dynamicValIdx];
+      ++dynamicValIdx;
+    } else {
+      printer << integer;
     }
-  }
-  if (printOffsets) {
-
-    printDynamicIndexList(_odsPrinter, *this, getOffsets(),
-                          getConstOffsetsAttr());
-  }
-  if (((!getShape().empty()) || (getConstShapeAttr()))) {
-    _odsPrinter << ' ' << "shape";
-    _odsPrinter << ' ' << ":";
-    _odsPrinter << ' ';
-    printDynamicIndexList(_odsPrinter, *this, getShape(), getConstShapeAttr());
-    _odsPrinter << ' ' << "strides";
-    _odsPrinter << ' ' << ":";
-    _odsPrinter << ' ';
-    printDynamicIndexList(_odsPrinter, *this, getStrides(),
-                          getConstStridesAttr());
-  }
-  ::llvm::SmallVector<::llvm::StringRef, 2> elidedAttrs;
-  elidedAttrs.push_back("operandSegmentSizes");
-  elidedAttrs.push_back("const_offsets");
-  elidedAttrs.push_back("const_shape");
-  elidedAttrs.push_back("const_strides");
-  _odsPrinter.printOptionalAttrDict((*this)->getAttrs(), elidedAttrs);
-  _odsPrinter << ' ' << ":";
-  _odsPrinter << ' ';
-  {
-    auto type = getSource().getType();
-    if (auto validType = ::llvm::dyn_cast<::mlir::Type>(type))
-      _odsPrinter.printStrippedAttrOrType(validType);
-    else
-      _odsPrinter << type;
-  }
-  _odsPrinter << ' ' << "->";
-  _odsPrinter << ' ';
-  // _odsPrinter << getTensorDesc().getType();
-
-  _odsPrinter << "!xegpu.tensor_desc<";
-
-  auto tDesc = getTensorDesc().getType();
-  auto shape = tDesc.getShape();
-  for (int64_t dim : shape) {
-    if (mlir::ShapedType::isDynamic(dim))
-      _odsPrinter << '?';
-    else
-      _odsPrinter << dim;
-    _odsPrinter << 'x';
-  }
-
-  _odsPrinter << tDesc.getElementType();
-
-  if (auto encoding = tDesc.getEncoding())
-    _odsPrinter << ", " << encoding;
-
-  if (auto layout = tDesc.getLayout())
-    _odsPrinter << ", " << layout;
-
-  _odsPrinter << ">";
+  });
+  printer << ']';
 }
 
 //===----------------------------------------------------------------------===//

>From 456534a1a59e086b9c87f95ea7ef917eb5791bb7 Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Tue, 15 Jul 2025 20:25:48 +0000
Subject: [PATCH 14/23] add comma before shape and strides

---
 mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td   |  4 ++--
 .../Conversion/VectorToXeGPU/load-to-xegpu.mlir  |  2 +-
 .../Conversion/VectorToXeGPU/store-to-xegpu.mlir |  2 +-
 .../VectorToXeGPU/transfer-read-to-xegpu.mlir    |  2 +-
 .../VectorToXeGPU/transfer-write-to-xegpu.mlir   |  2 +-
 mlir/test/Dialect/XeGPU/ops.mlir                 | 16 ++++++++--------
 mlir/test/Dialect/XeGPU/subgroup-distribute.mlir |  8 ++++----
 7 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 1d62b1942e5f7..988e5f10abe0a 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -118,8 +118,8 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
   let assemblyFormat = [{
     $source ``
     custom<OptionalDynamicIndexList>($offsets, $const_offsets)
-    (`shape` `:` custom<DynamicIndexList>($shape, $const_shape)^
-     `strides``:` custom<DynamicIndexList>($strides, $const_strides))?
+    (`,` `shape` `:` custom<DynamicIndexList>($shape, $const_shape)^
+     `,` `strides``:` custom<DynamicIndexList>($strides, $const_strides))?
     attr-dict `:` type($source) `->` qualified(type($TensorDesc))
   }];
 
diff --git a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
index 0add1c2447596..58719e75b1bde 100644
--- a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
@@ -54,7 +54,7 @@ func.func @load_dynamic_source(%source: memref<?x?x?xf32>,
 // CHECK-DAG:   %[[DIM_2:.+]] = memref.dim %[[SRC]], %[[C2]]
 // CHECK:       %[[DIM_0_STRIDE:.+]] = arith.muli %[[DIM_2]], %[[DIM_1]]
 // CHECK:       %[[DESC:.+]] = xegpu.create_nd_tdesc %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
-// CHECK-SAME:   shape : [%[[DIM_0]], %[[DIM_1]], %[[DIM_2]]] strides : [%[[DIM_0_STRIDE]], %[[DIM_2]], 1]
+// CHECK-SAME:  , shape : [%[[DIM_0]], %[[DIM_1]], %[[DIM_2]]], strides : [%[[DIM_0_STRIDE]], %[[DIM_2]], 1]
 // CHECK-SAME:    memref<?x?x?xf32> -> !xegpu.tensor_desc<8x16xf32,
 // CHECK:       %[[VEC:.+]] = xegpu.load_nd %[[DESC]]{{.*}}-> vector<8x16xf32>
 // CHECK:       return %[[VEC]]
diff --git a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
index 567a2a3a5c24d..0d3da815529e3 100644
--- a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
@@ -56,7 +56,7 @@ func.func @store_dynamic_source(%vec: vector<8x16xf32>,
 // CHECK-DAG:   %[[DIM_2:.+]] = memref.dim %[[SRC]], %[[C2]]
 // CHECK:       %[[DIM_0_STRIDE:.+]] = arith.muli %[[DIM_2]], %[[DIM_1]]
 // CHECK:       %[[DESC:.+]] = xegpu.create_nd_tdesc %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
-// CHECK-SAME:   shape : [%[[DIM_0]], %[[DIM_1]], %[[DIM_2]]] strides : [%[[DIM_0_STRIDE]], %[[DIM_2]], 1]
+// CHECK-SAME:  , shape : [%[[DIM_0]], %[[DIM_1]], %[[DIM_2]]], strides : [%[[DIM_0_STRIDE]], %[[DIM_2]], 1]
 // CHECK-SAME:    memref<?x?x?xf32> -> !xegpu.tensor_desc<8x16xf32,
 // CHECK:       xegpu.store_nd %[[VEC]], %[[DESC]] : vector<8x16xf32>
 
diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
index 72e08eab2c596..05b41a8233e8c 100644
--- a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
@@ -96,7 +96,7 @@ func.func @load_dynamic_source(%source: memref<?x?x?xf32>,
 // CHECK-DAG:   %[[DIM_2:.+]] = memref.dim %[[SRC]], %[[C2]]
 // CHECK:       %[[DIM_0_STRIDE:.+]] = arith.muli %[[DIM_2]], %[[DIM_1]]
 // CHECK:       %[[DESC:.+]] = xegpu.create_nd_tdesc %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
-// CHECK-SAME:   shape : [%[[DIM_0]], %[[DIM_1]], %[[DIM_2]]] strides : [%[[DIM_0_STRIDE]], %[[DIM_2]], 1]
+// CHECK-SAME:  , shape : [%[[DIM_0]], %[[DIM_1]], %[[DIM_2]]], strides : [%[[DIM_0_STRIDE]], %[[DIM_2]], 1]
 // CHECK-SAME:    memref<?x?x?xf32> -> !xegpu.tensor_desc<8x16xf32
 // CHECK:       %[[VEC:.+]] = xegpu.load_nd %[[DESC]]{{.*}}-> vector<8x16xf32>
 // CHECK:       return %[[VEC]]
diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
index a5fc34ae839bf..2bfee03892d10 100644
--- a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
@@ -60,7 +60,7 @@ func.func @store_dynamic_source(%vec: vector<8x16xf32>,
 // CHECK-DAG:   %[[DIM_2:.+]] = memref.dim %[[SRC]], %[[C2]]
 // CHECK:       %[[DIM_0_STRIDE:.+]] = arith.muli %[[DIM_2]], %[[DIM_1]]
 // CHECK:       %[[DESC:.+]] = xegpu.create_nd_tdesc %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
-// CHECK-SAME:   shape : [%[[DIM_0]], %[[DIM_1]], %[[DIM_2]]] strides : [%[[DIM_0_STRIDE]], %[[DIM_2]], 1]
+// CHECK-SAME:  , shape : [%[[DIM_0]], %[[DIM_1]], %[[DIM_2]]], strides : [%[[DIM_0_STRIDE]], %[[DIM_2]], 1]
 // CHECK-SAME:    memref<?x?x?xf32> -> !xegpu.tensor_desc<8x16xf32
 // CHECK:       xegpu.store_nd %[[VEC]], %[[DESC]] : vector<8x16xf32>
 
diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir
index 0d679e519ed60..868baf90c0ad4 100644
--- a/mlir/test/Dialect/XeGPU/ops.mlir
+++ b/mlir/test/Dialect/XeGPU/ops.mlir
@@ -17,8 +17,8 @@ gpu.func @create_nd_tdesc_1(%src: memref<24x32xf32>) {
 gpu.func @create_nd_tdesc_2(%src: ui64, %w : index, %h : index, %x : index, %y : index) {
   //CHECK: %[[C:.*]] = arith.constant 1 : index
   %c1 = arith.constant 1 : index
-  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][%[[arg3]],  %[[arg4]]] shape : [%[[arg2]], %[[arg1]]] strides : [%[[arg1]], %[[C]]] : ui64 -> !xegpu.tensor_desc<8x16xf32>
-  %1 = xegpu.create_nd_tdesc %src[%x, %y] shape:[%h, %w] strides: [%w, %c1] : ui64 -> !xegpu.tensor_desc<8x16xf32>
+  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][%[[arg3]],  %[[arg4]]], shape : [%[[arg2]], %[[arg1]]], strides : [%[[arg1]], %[[C]]] : ui64 -> !xegpu.tensor_desc<8x16xf32>
+  %1 = xegpu.create_nd_tdesc %src[%x, %y], shape:[%h, %w], strides: [%w, %c1] : ui64 -> !xegpu.tensor_desc<8x16xf32>
   gpu.return
 }
 
@@ -77,8 +77,8 @@ gpu.func @test_create_nd_tdesc_7(%src: ui64, %w : index, %h : index, %x : index,
 gpu.func @test_create_nd_tdesc_8(%src: ui64, %w : index, %h : index, %x : index, %y : index) {
   
   %c1 = arith.constant 1 : index   
-  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0 shape : [%arg2, %arg1] strides : [%arg1, %c1] : ui64 -> !xegpu.tensor_desc<8x16xf32>
-  %2 = xegpu.create_nd_tdesc %src shape : [%h, %w] strides : [%w, %c1]  : ui64 -> !xegpu.tensor_desc<8x16xf32>
+  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0, shape : [%arg2, %arg1], strides : [%arg1, %c1] : ui64 -> !xegpu.tensor_desc<8x16xf32>
+  %2 = xegpu.create_nd_tdesc %src, shape : [%h, %w], strides : [%w, %c1]  : ui64 -> !xegpu.tensor_desc<8x16xf32>
  
   gpu.return
 }
@@ -88,8 +88,8 @@ gpu.func @test_create_nd_tdesc_8(%src: ui64, %w : index, %h : index, %x : index,
 gpu.func @test_create_nd_tdesc_9(%src: memref<?x?xf16>, %w : index, %h : index, %x : index, %y : index) {
 
   %c1 = arith.constant 1 : index
-  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0[%arg3, %arg4] shape : [%arg2, %arg1] strides : [%arg1, %c1] : memref<?x?xf16> -> !xegpu.tensor_desc<8x16xf16>
-  %1 = xegpu.create_nd_tdesc %src[%x, %y] shape:[%h, %w] strides:[%w, %c1]  : memref<?x?xf16> -> !xegpu.tensor_desc<8x16xf16>
+  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0[%arg3, %arg4], shape : [%arg2, %arg1], strides : [%arg1, %c1] : memref<?x?xf16> -> !xegpu.tensor_desc<8x16xf16>
+  %1 = xegpu.create_nd_tdesc %src[%x, %y], shape:[%h, %w], strides:[%w, %c1]  : memref<?x?xf16> -> !xegpu.tensor_desc<8x16xf16>
 
   gpu.return
 }
@@ -97,8 +97,8 @@ gpu.func @test_create_nd_tdesc_9(%src: memref<?x?xf16>, %w : index, %h : index,
 // CHECK-LABEL: func @test_create_nd_tdesc_10({{.*}}) 
 gpu.func @test_create_nd_tdesc_10(%src: memref<?x?xf16>, %w : index, %h : index, %x : index, %y : index) {  
   %c1 = arith.constant 1 : index
-  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0 shape : [%arg2, %arg1] strides : [%arg1, %c1] : memref<?x?xf16> -> !xegpu.tensor_desc<8x16xf16> 
-  %2 = xegpu.create_nd_tdesc %src shape:[%h, %w] strides:[%w, %c1]  : memref<?x?xf16> -> !xegpu.tensor_desc<8x16xf16>
+  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0, shape : [%arg2, %arg1], strides : [%arg1, %c1] : memref<?x?xf16> -> !xegpu.tensor_desc<8x16xf16> 
+  %2 = xegpu.create_nd_tdesc %src, shape:[%h, %w], strides:[%w, %c1]  : memref<?x?xf16> -> !xegpu.tensor_desc<8x16xf16>
 
   gpu.return
 }
diff --git a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
index ba29d1ab13cae..0bfbc4a35c03b 100644
--- a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
+++ b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
@@ -150,16 +150,16 @@ gpu.module @test {
 // CHECK: (%[[ARG0:[0-9a-zA-Z]+]]: ui64, %[[ARG1:[0-9a-zA-Z]+]]: ui64, %[[ARG2:[0-9a-zA-Z]+]]: index,
 // CHECK-SAME: %[[ARG3:[0-9a-zA-Z]+]]: index, %[[ARG4:[0-9a-zA-Z]+]]: index,
 // CHECK-SAME: %[[ARG5:[0-9a-zA-Z]+]]: index, %[[ARG6:[0-9a-zA-Z]+]]: index, %[[ARG7:[0-9a-zA-Z]+]]: index) {
-// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]][{{.*}}] shape : [%[[ARG2]], %[[ARG3]]] strides : [%[[ARG4]], %[[ARG5]]] : ui64 -> !xegpu.tensor_desc<16x16xf16>
+// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]][{{.*}}], shape : [%[[ARG2]], %[[ARG3]]], strides : [%[[ARG4]], %[[ARG5]]] : ui64 -> !xegpu.tensor_desc<16x16xf16>
 // CHECK: %[[T1:.*]] = xegpu.load_nd %[[T0]]  : !xegpu.tensor_desc<16x16xf16> -> vector<16xf16>
-// CHECK: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG1]][{{.*}}] shape : [%[[ARG2]], %[[ARG3]]] strides : [%[[ARG4]], %[[ARG5]]] : ui64 -> !xegpu.tensor_desc<16x16xf16>
+// CHECK: %[[T2:.*]] = xegpu.create_nd_tdesc %[[ARG1]][{{.*}}], shape : [%[[ARG2]], %[[ARG3]]], strides : [%[[ARG4]], %[[ARG5]]] : ui64 -> !xegpu.tensor_desc<16x16xf16>
 // CHECK: xegpu.store_nd %[[T1]], %[[T2]]  : vector<16xf16>, !xegpu.tensor_desc<16x16xf16>
 gpu.module @test {
   gpu.func @create_nd_tdesc_non_memref(%arg0: ui64, %arg1: ui64, %arg2: index, %arg3: index, %arg4: index, %arg5: index, %arg6: index, %arg7: index) {
     %c0 = arith.constant 0 : index
-    %0 = xegpu.create_nd_tdesc %arg0[%c0, %c0] shape:[%arg2, %arg3] strides:[%arg4, %arg5] : ui64 -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+    %0 = xegpu.create_nd_tdesc %arg0[%c0, %c0], shape:[%arg2, %arg3], strides:[%arg4, %arg5] : ui64 -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
     %1 = xegpu.load_nd %0  {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<16x16xf16>
-    %2 = xegpu.create_nd_tdesc %arg1[%c0, %c0] shape:[%arg2, %arg3] strides:[%arg4, %arg5] : ui64 -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+    %2 = xegpu.create_nd_tdesc %arg1[%c0, %c0], shape:[%arg2, %arg3], strides:[%arg4, %arg5] : ui64 -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
     xegpu.store_nd %1, %2 : vector<16x16xf16>, !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
     gpu.return
   }

>From b6f016e562ae0189744948d9c013bff598bac93f Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Tue, 15 Jul 2025 20:58:28 +0000
Subject: [PATCH 15/23] tie the offsets rank to input tensor shape instead of
 tdesc

---
 mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 988e5f10abe0a..1864b8ba5d1d6 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -186,7 +186,7 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
         rank = memrefType.getRank();
       else
         //nd_tdesc created from ui64, use nd_tdesc's rank
-        rank = getTensorDescShape().size();
+        rank = getMixedSizes().size();
       
       // The offsets are allowed to be empty. The Traits verification of OffsetSizeAndStrideOpInterface interface assumes offsets being present.
       // It is set to be MAX to indicate user not passed any value, instead of kDynamic which means offsets passed as value.

>From cd518d2c909ac7173aad24579ca50ddf6b066119 Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Tue, 15 Jul 2025 21:06:53 +0000
Subject: [PATCH 16/23] git-clang-format

---
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index d9dce24c51c0b..e5da293d34e0c 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -282,8 +282,7 @@ LogicalResult CreateNdDescOp::verify() {
 ParseResult parseOptionalDynamicIndexList(
     OpAsmParser &parser,
     SmallVectorImpl<OpAsmParser::UnresolvedOperand> &values,
-    DenseI64ArrayAttr &integers,
-    SmallVectorImpl<Type> *valueTypes = nullptr,
+    DenseI64ArrayAttr &integers, SmallVectorImpl<Type> *valueTypes = nullptr,
     AsmParser::Delimiter delimiter = AsmParser::Delimiter::Square) {
 
   SmallVector<int64_t, 4> integerVals;
@@ -316,11 +315,14 @@ ParseResult parseOptionalDynamicIndexList(
   return success();
 }
 
-void printOptionalDynamicIndexList(
-    OpAsmPrinter &printer, Operation *op, OperandRange values,
-    ArrayRef<int64_t> integers, TypeRange valueTypes = TypeRange()) {
+void printOptionalDynamicIndexList(OpAsmPrinter &printer, Operation *op,
+                                   OperandRange values,
+                                   ArrayRef<int64_t> integers,
+                                   TypeRange valueTypes = TypeRange()) {
 
-  if (values.empty() && llvm::all_of(integers, [](int64_t i) { return i == std::numeric_limits<int64_t>::max(); }))
+  if (values.empty() && llvm::all_of(integers, [](int64_t i) {
+        return i == std::numeric_limits<int64_t>::max();
+      }))
     return;
   printer << '[';
   unsigned dynamicValIdx = 0;

>From 546a3f773f06571bd11fef0d0a70884a6a2d6333 Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Tue, 15 Jul 2025 23:59:04 +0000
Subject: [PATCH 17/23] addverifier for invalid cases

---
 .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 17 +++++++++--
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp        | 22 ++++++++++----
 mlir/test/Dialect/XeGPU/invalid.mlir          | 29 ++++++++++++++-----
 3 files changed, 53 insertions(+), 15 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 1864b8ba5d1d6..73e5f4a176fbb 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -196,17 +196,22 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
       return attr;
     }
 
-
     /// wrapper for matching with OffsetSizeAndStrideOpInterface
     /// If source is IntegerType or `const_shape` is filled,
     /// it will return `const_shape`, such that mixes of `shape`
     /// and `const_shape` will be used to represent the shape of
     /// source operand. They overide static shape from source memref type.
     ArrayRef<int64_t> getStaticSizes() {
+      /// To be compatible with OffsetSizeAndStrideOpInterface, which expects valid return value and perform checks
+      static  llvm::SmallVector<int64_t, 4> emptyShape;
+      
       auto attr = getConstShapeAttr();
-      if (llvm::isa<IntegerType>(getSourceType()) || attr)
+      if (attr)
         return attr;
 
+      if (llvm::isa<IntegerType>(getSourceType()))
+        return emptyShape;
+
       auto memrefType = llvm::dyn_cast<MemRefType>(getSourceType());
       assert(memrefType && "Incorrect use of getStaticSizes");
       return memrefType.getShape();
@@ -218,9 +223,15 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
     /// and `const_strides` will be used to represent the strides of
     /// source operand. They overide static strides from source memref type.
     ArrayRef<int64_t> getStaticStrides() {
+      /// To be compatible with OffsetSizeAndStrideOpInterface, which expects valid return value and perform checks
+      static llvm::SmallVector<int64_t, 4> emptyStrides;
+
       auto attr = getConstStridesAttr();
-      if (llvm::isa<IntegerType>(getSourceType()) || attr)
+      if (attr)
         return attr;
+      
+      if (llvm::isa<IntegerType>(getSourceType()))
+        return emptyStrides;
 
       auto memrefType = llvm::dyn_cast<MemRefType>(getSourceType());
       assert(memrefType && "Incorrect use of getStaticStrides");
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index e5da293d34e0c..37f403a46976b 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -116,7 +116,7 @@ isValidGatherScatterParams(Type maskTy, VectorType valueTy,
 void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
                            Type tdesc, TypedValue<MemRefType> source) {
   [[maybe_unused]] auto ty = source.getType();
-  assert(ty.hasStaticShape());
+  assert(ty.hasStaticShape() && "expecting a memref with static shape");
 
   build(builder, state, tdesc, source, ValueRange({}) /* dynamic offsets */,
         ValueRange({}) /* empty dynamic shape */,
@@ -130,7 +130,8 @@ void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
                            Type tdesc, TypedValue<MemRefType> source,
                            llvm::ArrayRef<OpFoldResult> shape,
                            llvm::ArrayRef<OpFoldResult> strides) {
-  assert(shape.size() && strides.size() && shape.size() == strides.size());
+  assert(shape.size() && strides.size() && shape.size() == strides.size() &&
+         "Shape and strides must be present and of equal size for ui64 initialization.");
 
   llvm::SmallVector<int64_t> staticShape;
   llvm::SmallVector<int64_t> staticStrides;
@@ -152,7 +153,8 @@ void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
                            Type tdesc, TypedValue<IntegerType> source,
                            llvm::ArrayRef<OpFoldResult> shape,
                            llvm::ArrayRef<OpFoldResult> strides) {
-  assert(shape.size() && strides.size() && shape.size() == strides.size());
+  assert(shape.size() && strides.size() && shape.size() == strides.size() &&
+         "Shape and strides must be present and of equal size for ui64 initialization.");
 
   llvm::SmallVector<int64_t> staticShape;
   llvm::SmallVector<int64_t> staticStrides;
@@ -255,6 +257,13 @@ LogicalResult CreateNdDescOp::verify() {
     invalidElemTy |= memrefTy.getElementType() != getElementType();
   }
 
+  if (llvm::isa<IntegerType>(getSourceType()) ) {
+    // strides and shape must present for integer source.
+    if (getMixedStrides().empty() || getMixedSizes().empty())
+      return emitOpError("Expecting strides and shape to be present for "
+                         "integer source.");
+  } 
+
   // mismatches among shape, strides, and offsets are
   // already handeled by OffsetSizeAndStrideOpInterface.
   // So they are not check here.
@@ -301,18 +310,21 @@ ParseResult parseOptionalDynamicIndexList(
         return failure();
       integerVals.push_back(integer);
     }
-
     return success();
   };
+
+  //If the optional values are given there must be left bracket
   if (parser.parseOptionalLSquare().succeeded()) {
     if (parser.parseCommaSeparatedList(parseIntegerOrValue) ||
         parser.parseRSquare())
       return parser.emitError(parser.getNameLoc())
-             << "expected SSA value or integer";
+             << "expected a list of SSA values or integers";
     integers = parser.getBuilder().getDenseI64ArrayAttr(integerVals);
     return success();
   }
+  
   return success();
+
 }
 
 void printOptionalDynamicIndexList(OpAsmPrinter &printer, Operation *op,
diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir
index 83a98ab0622b7..eb564d55bfd51 100644
--- a/mlir/test/Dialect/XeGPU/invalid.mlir
+++ b/mlir/test/Dialect/XeGPU/invalid.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s -split-input-file -verify-diagnostics
 
 // -----
-func.func @create_nd_tdesc_vc_1(%src: memref<24xf32>) {
+func.func @create_nd_tdesc_1(%src: memref<24xf32>) {
   // expected-error at +1 {{Expecting the TensorDesc rank is not greater than the ranks of shape, strides, offsets or the memref source}}
   %1 = xegpu.create_nd_tdesc %src[0] : memref<24xf32> -> !xegpu.tensor_desc<8x16xf32>
   return
@@ -9,47 +9,62 @@ func.func @create_nd_tdesc_vc_1(%src: memref<24xf32>) {
 
 // -----
 
-func.func @create_nd_tdesc_vc_2(%src: memref<24x32xf32>) {
+func.func @create_nd_tdesc_2(%src: memref<24x32xf32>) {
   // expected-error at +1 {{TensorDesc should have the same element type with the source if it is a memref}}
   %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf16>
   return
 }
 
 // -----
-func.func @create_nd_tdesc_vc_3(%src: memref<2x24x32xf32, 3>) {
+func.func @create_nd_tdesc_3(%src: memref<2x24x32xf32, 3>) {
   // expected-error at +1 {{SLM is only supported for 1D block tensor}}
   %1 = xegpu.create_nd_tdesc %src[0, 0, 0] : memref<2x24x32xf32, 3> -> !xegpu.tensor_desc<8x16xf32, #xegpu.block_tdesc_attr<memory_space = slm>>
   return
 }
 
 // -----
-func.func @create_nd_tdesc_vc_4(%src: memref<2x24x32xf32, 3>) {
+func.func @create_nd_tdesc_4(%src: memref<2x24x32xf32, 3>) {
   // expected-error at +1 {{Memory space mismatch}}
   %1 = xegpu.create_nd_tdesc %src[0, 0, 0] : memref<2x24x32xf32, 3> -> !xegpu.tensor_desc<16xf32>
   return
 }
 
 // -----
-func.func @create_nd_tdesc_subgroup_1(%src: memref<128x128xf32>) {
+func.func @create_nd_tdesc_5(%src: memref<128x128xf32>) {
   // expected-error at +1 {{cannot distribute [128, 128] using #xegpu.layout<sg_layout = [4, 2], sg_data = [24, 48]>}}
   %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<128x128xf32> -> !xegpu.tensor_desc<128x128xf32, #xegpu.layout<sg_layout = [4, 2], sg_data = [24, 48]>>
   return
 }
 
 // -----
-func.func @create_nd_tdesc_subgroup_1(%src: memref<128x128xf32>) {
+func.func @create_nd_tdesc_6(%src: memref<128x128xf32>) {
   // expected-error at +1 {{cannot distribute [128, 128] using #xegpu.layout<sg_layout = [4, 2], sg_data = [32, 64], inst_data = [24, 48]>}}
   %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<128x128xf32> -> !xegpu.tensor_desc<128x128xf32, #xegpu.layout<sg_layout = [4, 2], sg_data = [32, 64], inst_data = [24, 48]>>
   return
 }
 
 // -----
-func.func @create_nd_tdesc_subgroup_1(%src: memref<128x128xf32>) {
+func.func @create_nd_tdesc_7(%src: memref<128x128xf32>) {
   // expected-error at +1 {{cannot distribute [128, 128] using #xegpu.layout<sg_layout = [4, 2], sg_data = [32, 64], inst_data = [64, 32]>}}
   %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<128x128xf32> -> !xegpu.tensor_desc<128x128xf32, #xegpu.layout<sg_layout = [4, 2], sg_data = [32, 64], inst_data = [64, 32]>>
   return
 }
 
+// -----
+func.func @create_nd_tdesc_8(%src: ui64) {
+  // expected-error at +1 {{'xegpu.create_nd_tdesc' op Expecting strides and shape to be present for integer source}}
+  %1 = xegpu.create_nd_tdesc %src : ui64-> !xegpu.tensor_desc<128x128xf32>
+  return
+}
+
+// -----
+func.func @create_nd_tdesc_9(%src: ui64) {
+  // expected-error at +1 {{expected mixed offsets rank to match mixed sizes rank}}
+  %1 = xegpu.create_nd_tdesc %src[0, 0] : ui64-> !xegpu.tensor_desc<128x128xf32>
+  return
+}
+
+
 // -----
 func.func @prefetch_nd_vc_1(%src: memref<24x32xf16>) {
   %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16>

>From 784695516860120c94009e3be98029506c90620a Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Wed, 16 Jul 2025 00:02:55 +0000
Subject: [PATCH 18/23] git-clang-format

---
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 37f403a46976b..2940a7d83e7d9 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -131,7 +131,8 @@ void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
                            llvm::ArrayRef<OpFoldResult> shape,
                            llvm::ArrayRef<OpFoldResult> strides) {
   assert(shape.size() && strides.size() && shape.size() == strides.size() &&
-         "Shape and strides must be present and of equal size for ui64 initialization.");
+         "Shape and strides must be present and of equal size for ui64 "
+         "initialization.");
 
   llvm::SmallVector<int64_t> staticShape;
   llvm::SmallVector<int64_t> staticStrides;
@@ -154,7 +155,8 @@ void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
                            llvm::ArrayRef<OpFoldResult> shape,
                            llvm::ArrayRef<OpFoldResult> strides) {
   assert(shape.size() && strides.size() && shape.size() == strides.size() &&
-         "Shape and strides must be present and of equal size for ui64 initialization.");
+         "Shape and strides must be present and of equal size for ui64 "
+         "initialization.");
 
   llvm::SmallVector<int64_t> staticShape;
   llvm::SmallVector<int64_t> staticStrides;
@@ -257,12 +259,12 @@ LogicalResult CreateNdDescOp::verify() {
     invalidElemTy |= memrefTy.getElementType() != getElementType();
   }
 
-  if (llvm::isa<IntegerType>(getSourceType()) ) {
+  if (llvm::isa<IntegerType>(getSourceType())) {
     // strides and shape must present for integer source.
     if (getMixedStrides().empty() || getMixedSizes().empty())
       return emitOpError("Expecting strides and shape to be present for "
                          "integer source.");
-  } 
+  }
 
   // mismatches among shape, strides, and offsets are
   // already handeled by OffsetSizeAndStrideOpInterface.
@@ -313,7 +315,7 @@ ParseResult parseOptionalDynamicIndexList(
     return success();
   };
 
-  //If the optional values are given there must be left bracket
+  // If the optional values are given there must be left bracket
   if (parser.parseOptionalLSquare().succeeded()) {
     if (parser.parseCommaSeparatedList(parseIntegerOrValue) ||
         parser.parseRSquare())
@@ -322,9 +324,8 @@ ParseResult parseOptionalDynamicIndexList(
     integers = parser.getBuilder().getDenseI64ArrayAttr(integerVals);
     return success();
   }
-  
-  return success();
 
+  return success();
 }
 
 void printOptionalDynamicIndexList(OpAsmPrinter &printer, Operation *op,

>From ded955292b267c4d6e9c6388a79eb044f2e1a0d5 Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Wed, 16 Jul 2025 00:21:03 +0000
Subject: [PATCH 19/23] add comments

---
 mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 4 ++--
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp         | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 73e5f4a176fbb..19cfd908864be 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -189,7 +189,7 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
         rank = getMixedSizes().size();
       
       // The offsets are allowed to be empty. The Traits verification of OffsetSizeAndStrideOpInterface interface assumes offsets being present.
-      // It is set to be MAX to indicate user not passed any value, instead of kDynamic which means offsets passed as value.
+      // place-holder value MAX indicating user doesn't provide offsets, instead of kDynamic which means offsets passed as value.
       setConstOffsets(llvm::SmallVector<int64_t, 4>(rank, std::numeric_limits<int64_t>::max()));
 
       attr = getConstOffsetsAttr();
@@ -204,7 +204,7 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
     ArrayRef<int64_t> getStaticSizes() {
       /// To be compatible with OffsetSizeAndStrideOpInterface, which expects valid return value and perform checks
       static  llvm::SmallVector<int64_t, 4> emptyShape;
-      
+
       auto attr = getConstShapeAttr();
       if (attr)
         return attr;
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 2940a7d83e7d9..7e03950c8a1e2 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -334,6 +334,7 @@ void printOptionalDynamicIndexList(OpAsmPrinter &printer, Operation *op,
                                    TypeRange valueTypes = TypeRange()) {
 
   if (values.empty() && llvm::all_of(integers, [](int64_t i) {
+        // place-holder value MAX indicating user doesn't provide offsets
         return i == std::numeric_limits<int64_t>::max();
       }))
     return;

>From 97b6e3920bde240e99006f11d5047fa0775ae4e0 Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Thu, 17 Jul 2025 15:51:46 +0000
Subject: [PATCH 20/23] simplify custom print

---
 .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 13 ++-------
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp        | 28 ++++++++-----------
 2 files changed, 14 insertions(+), 27 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 19cfd908864be..788f3e2164871 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -179,17 +179,10 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
       if (attr) 
         return attr;
 
-      auto memrefType = llvm::dyn_cast<MemRefType>(getSourceType());
-      int rank = 0;
-      if (memrefType) 
-        //use source memref's rank, as source memref rank may be higher
-        rank = memrefType.getRank();
-      else
-        //nd_tdesc created from ui64, use nd_tdesc's rank
-        rank = getMixedSizes().size();
+      int64_t rank = getMixedSizes().size();
       
-      // The offsets are allowed to be empty. The Traits verification of OffsetSizeAndStrideOpInterface interface assumes offsets being present.
-      // place-holder value MAX indicating user doesn't provide offsets, instead of kDynamic which means offsets passed as value.
+      // Set constant offset to MAX to indicate no offsets provided
+      // or else the printer can't differeiate this with valid const_offset value (say 0)
       setConstOffsets(llvm::SmallVector<int64_t, 4>(rank, std::numeric_limits<int64_t>::max()));
 
       attr = getConstOffsetsAttr();
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 7e03950c8a1e2..53b25c4710117 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -12,6 +12,7 @@
 #include "mlir/Dialect/XeGPU/IR/XeGPU.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/TypeUtilities.h"
+#include "mlir/Interfaces/ViewLikeInterface.h"
 
 #include "llvm/Support/Debug.h"
 
@@ -328,29 +329,22 @@ ParseResult parseOptionalDynamicIndexList(
   return success();
 }
 
+
 void printOptionalDynamicIndexList(OpAsmPrinter &printer, Operation *op,
                                    OperandRange values,
                                    ArrayRef<int64_t> integers,
-                                   TypeRange valueTypes = TypeRange()) {
+                                   TypeRange valueTypes = TypeRange(),
+                                   AsmParser::Delimiter delimiter = AsmParser::Delimiter::Square){
 
   if (values.empty() && llvm::all_of(integers, [](int64_t i) {
-        // place-holder value MAX indicating user doesn't provide offsets
+        // MAX indiates no user-provided offsets for CreateNdDescOp.
         return i == std::numeric_limits<int64_t>::max();
-      }))
-    return;
-  printer << '[';
-  unsigned dynamicValIdx = 0;
-  llvm::interleaveComma(integers, printer, [&](int64_t integer) {
-    if (ShapedType::isDynamic(integer)) {
-      printer << values[dynamicValIdx];
-      if (!valueTypes.empty())
-        printer << " : " << valueTypes[dynamicValIdx];
-      ++dynamicValIdx;
-    } else {
-      printer << integer;
-    }
-  });
-  printer << ']';
+      }))    
+      return;
+
+  return printDynamicIndexList(printer, op, values, integers,
+                               /*scalableFlags=*/{}, valueTypes, delimiter);
+
 }
 
 //===----------------------------------------------------------------------===//

>From ed1d48e8360c22851f6d2a5090db7c2837771311 Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Thu, 17 Jul 2025 15:54:07 +0000
Subject: [PATCH 21/23] git-clang-format

---
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 53b25c4710117..b20460cf8aefe 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -329,22 +329,19 @@ ParseResult parseOptionalDynamicIndexList(
   return success();
 }
 
-
-void printOptionalDynamicIndexList(OpAsmPrinter &printer, Operation *op,
-                                   OperandRange values,
-                                   ArrayRef<int64_t> integers,
-                                   TypeRange valueTypes = TypeRange(),
-                                   AsmParser::Delimiter delimiter = AsmParser::Delimiter::Square){
+void printOptionalDynamicIndexList(
+    OpAsmPrinter &printer, Operation *op, OperandRange values,
+    ArrayRef<int64_t> integers, TypeRange valueTypes = TypeRange(),
+    AsmParser::Delimiter delimiter = AsmParser::Delimiter::Square) {
 
   if (values.empty() && llvm::all_of(integers, [](int64_t i) {
         // MAX indiates no user-provided offsets for CreateNdDescOp.
         return i == std::numeric_limits<int64_t>::max();
-      }))    
-      return;
+      }))
+    return;
 
   return printDynamicIndexList(printer, op, values, integers,
                                /*scalableFlags=*/{}, valueTypes, delimiter);
-
 }
 
 //===----------------------------------------------------------------------===//

>From d3e935b57863e1681c2d877bfc0f6bd41c7d8712 Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Thu, 17 Jul 2025 16:46:10 +0000
Subject: [PATCH 22/23] use simpler interface for DenseI64ArrayAttr

---
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index b20460cf8aefe..38cceb53f1313 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -122,9 +122,9 @@ void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
   build(builder, state, tdesc, source, ValueRange({}) /* dynamic offsets */,
         ValueRange({}) /* empty dynamic shape */,
         ValueRange({}) /* empty dynamic strides */,
-        builder.getDenseI64ArrayAttr({}) /* const offsets */,
-        builder.getDenseI64ArrayAttr({}) /* empty const shape*/,
-        builder.getDenseI64ArrayAttr({}) /* empty const strides*/);
+        DenseI64ArrayAttr({}) /* const offsets */,
+        DenseI64ArrayAttr({}) /* empty const shape*/,
+        DenseI64ArrayAttr({}) /* empty const strides*/);
 }
 
 void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,

>From 205fea7c4be453d1103cbe51db89d4460d9ba199 Mon Sep 17 00:00:00 2001
From: Jianhui Li <jian.hui.li at intel.com>
Date: Thu, 17 Jul 2025 19:06:35 +0000
Subject: [PATCH 23/23] address feedback

---
 mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 4 +---
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp         | 6 ------
 mlir/test/Dialect/XeGPU/ops.mlir               | 6 +++---
 3 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 788f3e2164871..81e25f7537cb0 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -181,9 +181,7 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface
 
       int64_t rank = getMixedSizes().size();
       
-      // Set constant offset to MAX to indicate no offsets provided
-      // or else the printer can't differeiate this with valid const_offset value (say 0)
-      setConstOffsets(llvm::SmallVector<int64_t, 4>(rank, std::numeric_limits<int64_t>::max()));
+      setConstOffsets(llvm::SmallVector<int64_t, 4>(rank, 0));
 
       attr = getConstOffsetsAttr();
       return attr;
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 38cceb53f1313..78cbf884a1911 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -334,12 +334,6 @@ void printOptionalDynamicIndexList(
     ArrayRef<int64_t> integers, TypeRange valueTypes = TypeRange(),
     AsmParser::Delimiter delimiter = AsmParser::Delimiter::Square) {
 
-  if (values.empty() && llvm::all_of(integers, [](int64_t i) {
-        // MAX indiates no user-provided offsets for CreateNdDescOp.
-        return i == std::numeric_limits<int64_t>::max();
-      }))
-    return;
-
   return printDynamicIndexList(printer, op, values, integers,
                                /*scalableFlags=*/{}, valueTypes, delimiter);
 }
diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir
index 868baf90c0ad4..695437354cd7c 100644
--- a/mlir/test/Dialect/XeGPU/ops.mlir
+++ b/mlir/test/Dialect/XeGPU/ops.mlir
@@ -67,7 +67,7 @@ gpu.func @test_create_nd_tdesc_7(%src: ui64, %w : index, %h : index, %x : index,
   //CHECK: %[[C:.*]] = arith.constant 1 : index
   %c1 = arith.constant 1 : index
   
-  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg5]] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32>
+  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg5]][0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32>
   %3 = xegpu.create_nd_tdesc %src2 : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32>
  
   gpu.return
@@ -77,7 +77,7 @@ gpu.func @test_create_nd_tdesc_7(%src: ui64, %w : index, %h : index, %x : index,
 gpu.func @test_create_nd_tdesc_8(%src: ui64, %w : index, %h : index, %x : index, %y : index) {
   
   %c1 = arith.constant 1 : index   
-  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0, shape : [%arg2, %arg1], strides : [%arg1, %c1] : ui64 -> !xegpu.tensor_desc<8x16xf32>
+  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0[0, 0], shape : [%arg2, %arg1], strides : [%arg1, %c1] : ui64 -> !xegpu.tensor_desc<8x16xf32>
   %2 = xegpu.create_nd_tdesc %src, shape : [%h, %w], strides : [%w, %c1]  : ui64 -> !xegpu.tensor_desc<8x16xf32>
  
   gpu.return
@@ -97,7 +97,7 @@ gpu.func @test_create_nd_tdesc_9(%src: memref<?x?xf16>, %w : index, %h : index,
 // CHECK-LABEL: func @test_create_nd_tdesc_10({{.*}}) 
 gpu.func @test_create_nd_tdesc_10(%src: memref<?x?xf16>, %w : index, %h : index, %x : index, %y : index) {  
   %c1 = arith.constant 1 : index
-  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0, shape : [%arg2, %arg1], strides : [%arg1, %c1] : memref<?x?xf16> -> !xegpu.tensor_desc<8x16xf16> 
+  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0[0, 0], shape : [%arg2, %arg1], strides : [%arg1, %c1] : memref<?x?xf16> -> !xegpu.tensor_desc<8x16xf16> 
   %2 = xegpu.create_nd_tdesc %src, shape:[%h, %w], strides:[%w, %c1]  : memref<?x?xf16> -> !xegpu.tensor_desc<8x16xf16>
 
   gpu.return



More information about the Mlir-commits mailing list