[Mlir-commits] [mlir] [MLIR][XeGPU] Adding XeGPU 2d block operators (PR #84692)

Chao Chen llvmlistbot at llvm.org
Thu Mar 14 08:13:27 PDT 2024


https://github.com/chencha3 updated https://github.com/llvm/llvm-project/pull/84692

>From ad27a81fd0fddbffb7e5b3529017f3c532b0db7d Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Sun, 10 Mar 2024 20:35:47 +0000
Subject: [PATCH 1/5] add XeGPU 2D block operators

---
 mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h    |   4 +
 .../mlir/Dialect/XeGPU/IR/XeGPUAttrs.td       |  61 ++++
 .../mlir/Dialect/XeGPU/IR/XeGPUDialect.td     |   4 +-
 .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 211 +++++++++++++
 .../mlir/Dialect/XeGPU/IR/XeGPUTypes.td       | 104 ++++++-
 mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp    |  72 ++++-
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp        | 276 +++++++++++++++++-
 mlir/test/Dialect/XeGPU/XeGPUOps.mlir         |  62 ++++
 8 files changed, 787 insertions(+), 7 deletions(-)
 create mode 100644 mlir/test/Dialect/XeGPU/XeGPUOps.mlir

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
index 7aaa4ecc7ee77a..8dc3ff78d25ede 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
@@ -9,7 +9,11 @@
 #ifndef MLIR_DIALECT_XEGPU_IR_XEGPU_H
 #define MLIR_DIALECT_XEGPU_IR_XEGPU_H
 
+#include <mlir/Bytecode/BytecodeOpInterface.h>
+#include <mlir/IR/BuiltinTypes.h>
 #include <mlir/IR/Dialect.h>
+#include <mlir/Interfaces/ShapedOpInterfaces.h>
+#include <mlir/Interfaces/SideEffectInterfaces.h>
 
 namespace mlir {
 namespace xegpu {
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index bb325c272e3324..cd38549f1ccf43 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -10,6 +10,7 @@
 #define MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
 
 include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td"
+include "mlir/IR/EnumAttr.td"
 
 class XeGPUAttr<string name, string attrMnemonic, list<Trait> traits = [],
                 string baseCppClass = "::mlir::Attribute">
@@ -17,4 +18,64 @@ class XeGPUAttr<string name, string attrMnemonic, list<Trait> traits = [],
   let mnemonic = attrMnemonic;
 }
 
+def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> {
+  let parameters = (ins
+    OptionalParameter<"MemoryScopeAttr">: $memory_scope,
+    OptionalParameter<"IntegerAttr", "1">: $array_length,
+    OptionalParameter<"BoolAttr", "true">: $boundary_check
+  );
+
+  let builders = [
+    AttrBuilder<(ins
+      CArg<"xegpu::MemoryScope", "xegpu::MemoryScope::Global">:$memory_scope,
+      CArg<"int", "1">:$array_length,
+      CArg<"bool", "true">: $boundary_check
+    )>
+  ];
+
+  let assemblyFormat = "`<` struct(params) `>`";
+}
+
+//===----------------------------------------------------------------------===//
+// XeGPU Memory Scope Enums.
+//===----------------------------------------------------------------------===//
+def XeGPU_MemoryScopeGlobal: I32EnumAttrCase<"Global", 0, "global">;
+def XeGPU_MemoryScopeShared: I32EnumAttrCase<"SLM", 1, "slm">;
+def XeGPU_MemoryScope: I32EnumAttr<"MemoryScope", 
+      "The address space of the memory the tensor descritor is created for", 
+      [XeGPU_MemoryScopeGlobal, XeGPU_MemoryScopeShared]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::mlir::xegpu";
+}
+
+def XeGPU_MemoryScopeAttr: 
+  EnumAttr<XeGPU_Dialect, XeGPU_MemoryScope, "memory_scope"> {
+    let assemblyFormat = "$value";
+}
+
+//===----------------------------------------------------------------------===//
+// XeGPU Cache Enums.
+//===----------------------------------------------------------------------===//
+def XeGPU_CachePolicyCached:        I32EnumAttrCase<"CACHED", 0, "cached">;                    // valid for read and write
+def XeGPU_CachePolicyUncached:      I32EnumAttrCase<"UNCACHED", 1, "uncached">;                // valid for read and write
+def XeGPU_CachePolicyStreaming:     I32EnumAttrCase<"STREAMING", 2, "streaming">;              // valid for read only
+def XeGPU_CachePolicyInvalid:       I32EnumAttrCase<"READ_INVALIDATE", 3, "read_invalidate">;  // valid for read only
+def XeGPU_CachePolicyWriteBack:     I32EnumAttrCase<"WRITE_BACK", 4, "write_back">;            // valid for write only
+def XeGPU_CachePolicyWriteThrough:  I32EnumAttrCase<"WRITE_THROUGH", 5, "write_through">;      // valid for write only
+
+def XeGPU_CachePolicyEnums : I32EnumAttr<"CachePolicy", "Cache policy", 
+  [XeGPU_CachePolicyCached, XeGPU_CachePolicyUncached, 
+   XeGPU_CachePolicyStreaming, XeGPU_CachePolicyInvalid,
+   XeGPU_CachePolicyWriteBack, XeGPU_CachePolicyWriteThrough]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::mlir::xegpu";
+}
+
+def XeGPU_CacheHintAttr 
+  : EnumAttr<XeGPU_Dialect, XeGPU_CachePolicyEnums, "cache_hint"> {
+    let assemblyFormat = "`<` $value `>`";
+}
+
+
+
 #endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
index 3851275ad30a0a..c2f09319c790e0 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
@@ -23,8 +23,8 @@ def XeGPU_Dialect : Dialect {
       the lower-level GPU compiler.
     }];
 
-    // let useDefaultTypePrinterParser = true;
-    // let useDefaultAttributePrinterParser = true;
+    let useDefaultTypePrinterParser = true;
+    let useDefaultAttributePrinterParser = true;
 }
 
 #endif // MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 5825ef9195b03f..9d37d77e03a0c5 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -12,6 +12,22 @@
 include "mlir/Dialect/XeGPU/IR/XeGPUAttrs.td"
 include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td"
 include "mlir/Dialect/XeGPU/IR/XeGPUTypes.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/IR/AttrTypeBase.td"
+
+
+include "mlir/IR/OpBase.td"
+include "mlir/IR/OpAsmInterface.td"
+include "mlir/IR/AttrTypeBase.td"
+include "mlir/IR/BuiltinTypes.td"
+include "mlir/IR/BuiltinTypeInterfaces.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/ViewLikeInterface.td"
+include "mlir/Interfaces/CastInterfaces.td"
+include "mlir/Interfaces/ControlFlowInterfaces.td"
+include "mlir/Interfaces/CopyOpInterface.td"
+include "mlir/Interfaces/InferTypeOpInterface.td"
+include "mlir/Interfaces/ShapedOpInterfaces.td"
 
 
 // Base class for dialect operations. This operation inherits from the base
@@ -23,4 +39,199 @@ class XeGPU_Op<string mnemonic, list<Trait> traits = []>:
           Op<XeGPU_Dialect, mnemonic, traits>;
 
 
+def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSegments]> {
+
+  let summary = "create nd tensor descriptor operation";
+  let description = [{
+    The "create_nd_tdesc" operation creates a TensorDescType which represents
+    a sub-view of a 2D memory region (It can be extended to support N-D memory
+    region if needed in future). Elements in the subview continuous in each 
+    dimention. It encodes the following important information for supporting 
+    Intel hardware features:
+
+    * source: an object representing (starting address/pointer of) a 2D memory region. 
+        It can be either a 2D memref object, or simply a pointer represented by uint64_t type.
+        for the later case, the shape and layout information of the 2D memory region should 
+        be explicitly passed via `dynamic_shape` and `dynamic_strides` parameters.
+    * offsets: two index values represents offsets from the "source" at the each dimension 
+        at which the subview of the target memory will be created. It is encoded via two
+        variables, including "dynamic_offsets" and "static_offsets", such that it can
+        accept various forms, such as, operands (e.g., [%c0, %c]) and attributes (e.g., [2, 4])).
+    * shape: the shape information of the memory region pointed by the "source".  It is 
+        typically encoded via the MemRefType of the source, e.g., memref<4096x4096xf16>. 
+        But if "source" is simply a pointer represented as uint64_t type, or a memref 
+        type without shape information e.g., memref<?x?xf16>, the shape information has 
+        to be explicitly passed via the "dynamic_shape" argument. Currently "dynamic_shape" 
+        only accepts operands(e.g., [%c4096, %c4096]), not attributes(e.g., [4096, 4096]).
+    * strides: the strides of the memory region pointed by the "source". Similar to shape, 
+        it is typically encoded via the MemRefType of the source too. But if "source" is 
+        simply a pointer represented as uint64_t type, or a memref type without shape 
+        information e.g., memref<?x?xf16>, the strides information has to be explicitly 
+        passed via the "dynamic_strides" argument. And it currently only accepts operands two.
+
+    Example 1 (suppose the tensor shape inferred by the compiler is 8x16):
+    %0 = memref.alloc() : memref<1024x1024xf32>
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+    %1 = xegpu.create_nd_tdesc %0[%c0, %c0]: memref<1024x1024xf32> -> TensorDesc<8x16xf32>
+
+    Example 2 (suppose the tensor shape inferred by the compiler is 8x16):
+    %0 = memref.alloc(%h, %w) : memref<?x?xf32>
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+    %1 = xegpu.create_nd_tdesc %0[%c0, %c0], [%h, %w], [%w, %c1]: memref<?x?xf32> -> TensorDesc<8x16xf32>
+
+    Example 3 (suppose the tensor shape inferred by the compiler is 8x16):
+    %0 = ... : ui64
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+    %1 = xegpu.create_nd_tdesc %0[%c0, %c0], [%h, %w], [%w, %c1]: ui64 -> TensorDesc<8x16xf32>
+  }];
+
+  let arguments = (ins 
+    XeGPU_BaseAddrType: $source, 
+    Variadic<Index>: $dynamic_offsets, 
+    Variadic<Index>: $dynamic_shape, 
+    Variadic<Index>: $dynamic_strides,
+    DenseI64ArrayAttr: $static_offsets
+  );
+  let results = (outs XeGPU_TensorDesc: $TensorDesc);
+
+  let assemblyFormat = [{
+    $source ``
+    custom<DynamicIndexList>($dynamic_offsets, $static_offsets)
+    (`,` `[` $dynamic_shape^ `]` `,` `[` $dynamic_strides `]`)?
+    attr-dict `:` type($source) `->` qualified(type($TensorDesc))
+  }];
+  let skipDefaultBuilders = 1;
+  let hasVerifier = 1;
+
+  let builders = [
+    OpBuilder<(ins "Type": $TensorDesc, "Value": $source, "ValueRange": $offsets, 
+                   "ValueRange": $shape, "ValueRange": $strides, 
+                   "llvm::ArrayRef<int64_t>": $static_offsets)>,
+
+    OpBuilder<(ins "Type": $tdesc, "Value": $source, 
+                   "llvm::ArrayRef<OpFoldResult>": $offsets)>,
+
+    OpBuilder<(ins "Type": $tdesc, "Value": $source, 
+                   "llvm::ArrayRef<OpFoldResult>": $offsets,
+                   "ValueRange": $shape, "ValueRange": $stride)>
+  ];
+
+  let extraClassDeclaration = [{
+    /// Returns the type of the source memref operand.
+    Type getSourceType() {
+      return getSource().getType();
+    }
+
+    /// Returns the type of the result TensorDesc.
+    xegpu::TensorDescType getType() {
+      return getTensorDesc().getType();
+    }
+
+    /// Returns the offsets info to the source. It consolidates
+    /// information from both dynamic_offsets and static_offsets
+    /// parameters. static_offsets parameter always has the expected
+    /// ranks with some dim could have ShapeType::kDynamic value
+    /// indicating the corresponding value should be from dynamic_offsets.
+    llvm::SmallVector<OpFoldResult> getOffsets();
+
+    /// returns the shape info of the source. It is either from the
+    /// memref type, if source is a memref with static shape
+    /// information or from the dynamic_shape parameter. If both
+    /// exists, the dynamic_shape parameter will be used and the
+    /// shape information from  memref type will be ignored.
+    llvm::SmallVector<OpFoldResult> getShape();
+
+    /// returns the strides info of the source. It is either from the
+    /// memref type, if source is a memref with static shape
+    /// information or from the dynamic_stride parameter. If both
+    /// exists, the dynamic_strides parameter will be used and the
+    /// strides information from  memref type will be ignored.
+    llvm::SmallVector<OpFoldResult> getStrides();
+
+    /// Return the element type of the TensorDesc
+    Type getElementType() {
+      return getType().getElementType();
+    }
+
+    /// Return the shape of the TensorDesc
+    llvm::ArrayRef<int64_t> getTensorDescShape() {
+      return getType().getShape();
+    }
+  }];
+}
+
+def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> {
+  let summary = "prefetches a nD block to cache";
+  let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
+                       OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
+                       OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
+                       OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
+
+  // Format: xegpu.prefetch_nd %tdesc {l1_hint = #xegpu.cache_hint<cached>, 
+  //                                   l2_hint = #xegpu.cache_hint<cached>, 
+  //                                   l3_hint = #xegpu.cache_hint<cached>}
+  //         : !xegpu.tensor_desc<8x16xf16>
+  let assemblyFormat = "$TensorDesc attr-dict `:` qualified(type($TensorDesc))";
+}
+
+
+def XeGPU_LoadNDOp : XeGPU_Op<"load_nd"> {
+  let summary = "loads a n-D block from memory (represented by TensorDesc)" 
+                "to registers (represented by vector)";
+  let description = [{
+    LoadNDOp essentially mimics the hardware block read instruction to read 
+    a block of data from memory to register. It takes a set of cache hints 
+    for each level of cache, L1, L2 and L3. If hardware does not have a 
+    correspoding cache, Corresponding cache hint attribute will be masked.
+    If both transpose and vnni_axis present at the same time. It assume to 
+    perform transpose first and then vnni transform.
+  }];
+
+  let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
+                       OptionalAttr<I64Attr>: $vnni_axis,
+                       OptionalAttr<DenseI64ArrayAttr>: $transpose,
+                       OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
+                       OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
+                       OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
+
+  let results = (outs XeGPU_ValueType: $value);
+
+  let extraClassDeclaration = [{
+    VectorType getType() {
+      return llvm::dyn_cast<VectorType>(getValue().getType());
+    }
+
+    xegpu::TensorDescType getTensorDescType() {
+      return getTensorDesc().getType();
+    }
+  }];
+
+  // Format: xegpu.load_nd %1 {transpose = [1, 0], 
+  //                l1_hint = #xegpu.cache_hint<cached>, 
+  //                l2_hint = #xegpu.cache_hint<uncached>, 
+  //                l3_hint = #xegpu.cache_hint<streaming>}
+  //         : !xegpu.tensor_desc<8x16xf32> -> vector<16x8xf32>
+  let assemblyFormat = "$TensorDesc attr-dict `:` qualified(type($TensorDesc)) `->` type($value)";
+  let hasVerifier = 1;
+}
+
+def XeGPU_StoreNDOp : XeGPU_Op<"store_nd", []> {
+  let summary = "stores a n-D block register region back to memory, currently only supports 2D";
+  let arguments = (ins XeGPU_ValueType: $value,
+                       XeGPU_TensorDesc: $TensorDesc,
+                       OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
+                       OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
+                       OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
+
+  // Format: xegpu.store_nd %3, %2 {l1_hint = #xegpu.cache_hint<uncached>,
+  //                                l2_hint = #xegpu.cache_hint<write_back>, 
+  //                                l3_hint = #xegpu.cache_hint<write_through>}
+  //                                : vector<8x16xf16>, !xegpu.tensor_desc<8x16xf16>
+  let assemblyFormat = "$value `,` $TensorDesc attr-dict `:` type($value) `,` qualified(type($TensorDesc))";
+  let hasVerifier = 1;
+}
+
 #endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
index 1d75bb4e2906fe..36b04ea12bcad0 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
@@ -9,9 +9,9 @@
 #ifndef MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD
 #define MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD
 
-include "mlir/IR/BuiltinTypes.td"
 include "mlir/Dialect/XeGPU/IR/XeGPUAttrs.td"
 include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td"
+include "mlir/IR/BuiltinTypes.td"
 
 def XeGPU_IntType: AnyTypeOf<[I1, I8, I16, I32, I64, SI1, SI8, SI16, SI32, SI64, UI1, UI8, UI16, UI32, UI64]>;
 def XeGPU_FloatType: AnyTypeOf<[F16, F32, F64, BF16, TF32]>;
@@ -30,4 +30,106 @@ class XeGPUTypeDef<string name, string typeMnemonic, list<Trait> traits = [],
   let mnemonic = typeMnemonic;
 }
 
+def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
+        [ShapedTypeInterface], "::mlir::TensorType"> {
+  let summary = "TensorDesc describing regions of interested data.";
+  let description = [{
+    TensorDesc is a type designed to describe regions of the interested data as well as some 
+    features that are unique to Intel hardware. Different with the builtin tensor type in MLIR, 
+    it essentially only contains the meta data, and doesn't hold the data by itself. It is designed 
+    to mainly support 2d block load/store and DPAS (matrix multiplication instruction) on Intel GPU. 
+    It encodes the following information:
+
+    * shape:  the sizes/shape of the intereted data block, e.g., 8x16 means 8 rows
+              and each row contains 16 continious data element. The rows could be
+              either continuous or not, depends on whether the encoding attribute
+              is set or not.
+    * element_type: the data type of the data element, e.g., f16, f32.
+
+    Similar to the builtin tensor, it also provides an optinal attribute to encoding 
+    the following information via the TensorDescAttr object:
+    * memory_scope (xegpu::MemoryScope): [optional] where the data is located, 
+                global memory or shared memory. It is default to Global.
+    * array_length (int): [optional] The number of continuous blocks with size as `shape`,
+               that will be loaded by block load at a time. It is default to 1.
+    * boundary_check (bool): [optional] indicates whether the operation detects the boundary 
+                and pads with zero for out-of-boundary access. It is default to do boundary check.
+    
+
+    Syntax:
+
+    ```
+    TensorDesc-type ::= `tensor_desc` `<` dim-list element-type (attr-list)? `>`
+    element-type ::= float-type | integer-type | index-type
+    dim-list := (static-dim-list `x`)?
+    static-dim-list ::= decimal-literal `x` decimal-literal
+    attr-list = (, memory_scope = value)? (, arr_len = value)? (, boundary_check = value)?
+    ```
+
+    Examples:
+
+    ```mlir
+    // A block TensorDesc with 8x16 i32 elements
+    xegpu.tensor_desc<8x16xi32>
+
+    // A block TensorDesc with 8x16 f32 elements
+    xegpu.tensor_desc<8x16xf32>
+
+    // A TensorDesc with 8x16 f32 elements for a memory region in shared memory space.
+    xegpu.tensor_desc<8x16xf32, #xegpu.tdesc_attr<memory_scope = slm>>
+    ```
+  }];
+
+  let parameters = (ins ArrayRefParameter<"int64_t">: $shape,
+                        "mlir::Type": $elementType,
+                        OptionalParameter<"mlir::Attribute">: $encoding);
+
+  let extraClassDeclaration = [{
+    using TensorType::clone;
+    using mlir::ShapedType::Trait<TensorDescType>::getElementTypeBitWidth;
+    using mlir::ShapedType::Trait<TensorDescType>::getRank;
+    using mlir::ShapedType::Trait<TensorDescType>::getNumElements;
+    using mlir::ShapedType::Trait<TensorDescType>::isDynamicDim;
+    using mlir::ShapedType::Trait<TensorDescType>::hasStaticShape;
+    using mlir::ShapedType::Trait<TensorDescType>::getNumDynamicDims;
+    using mlir::ShapedType::Trait<TensorDescType>::getDimSize;
+    using mlir::ShapedType::Trait<TensorDescType>::getDynamicDimIndex;
+
+    TensorDescType clone(::mlir::Type elementType) {
+      return llvm::cast<TensorDescType>(cloneWith(getShape(), elementType));
+    }
+
+    TensorDescAttr getEncodingAsTensorDescAttr() const {
+      return llvm::dyn_cast_if_present<TensorDescAttr>(getEncoding());
+    }
+
+    xegpu::MemoryScope getMemoryScope() const {
+      auto attr = getEncodingAsTensorDescAttr();
+      if (attr && attr.getMemoryScope())
+        return attr.getMemoryScope().getValue();
+      // return default value
+      return MemoryScope::Global;
+    }
+
+    int getArrayLength() {
+      auto attr = getEncodingAsTensorDescAttr();
+      if (attr && attr.getArrayLength())
+        return attr.getArrayLength().getInt();
+      // return default value
+      return 1; 
+    }
+
+    bool getBoundaryCheck() {
+      auto attr = getEncodingAsTensorDescAttr();
+      if (attr && attr.getBoundaryCheck())
+        return attr.getBoundaryCheck().getValue();
+      // return default value
+      return true;
+    }
+  }];
+
+  let hasCustomAssemblyFormat = true;
+  
+}
+
 #endif // MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 4f839ee773476b..bd72d5c17b6ea1 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -6,7 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <llvm/ADT/TypeSwitch.h>
 #include <mlir/Dialect/XeGPU/IR/XeGPU.h>
+#include <mlir/IR/Builders.h>
+#include <mlir/IR/DialectImplementation.h>
 
 namespace mlir {
 namespace xegpu {
@@ -26,8 +29,73 @@ void XeGPUDialect::initialize() {
       >();
 }
 
-// this file is for position occupation,
-// we will add functions in following PRs.
+
+//===----------------------------------------------------------------------===//
+// XeGPU_TensorDescAttr
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XeGPU_TensorDescType
+//===----------------------------------------------------------------------===//
+mlir::Type TensorDescType::parse(::mlir::AsmParser &parser) {
+  llvm::SmallVector<int64_t> shape;
+  mlir::Type elementType;
+  mlir::FailureOr<mlir::Attribute> encoding;
+
+  // Parse literal '<'
+  if (parser.parseLess())
+    return {};
+
+  auto shapeLoc = parser.getCurrentLocation();
+  if (mlir::failed(parser.parseDimensionList(shape))) {
+    parser.emitError(shapeLoc, "failed to parse parameter 'shape'");
+    return {};
+  }
+
+  auto elemTypeLoc = parser.getCurrentLocation();
+  if (mlir::failed(parser.parseType(elementType))) {
+    parser.emitError(elemTypeLoc, "failed to parse parameter 'elementType'");
+    return {};
+  }
+
+  // parse optional attributes
+  if (mlir::succeeded(parser.parseOptionalComma())) {
+    encoding = mlir::FieldParser<mlir::Attribute>::parse(parser);
+    if (mlir::failed(encoding)) {
+      parser.emitError(parser.getCurrentLocation(),
+          "Failed to parse the attribute field for TensorDescType.\n");
+      return {};
+    }
+  }
+
+  // Parse literal '>'
+  if (parser.parseGreater())
+    return {};
+
+  return TensorDescType::get(parser.getContext(), shape, elementType,
+                             encoding.value_or(mlir::Attribute()));
+}
+
+void TensorDescType::print(::mlir::AsmPrinter &printer) const {
+  printer << "<";
+
+  auto shape = getShape();
+  for (int64_t dim : shape) {
+    if (mlir::ShapedType::isDynamic(dim))
+      printer << '?';
+    else
+      printer << dim;
+    printer << 'x';
+  }
+
+  printer << getElementType();
+
+  if (auto encoding = getEncoding())
+    printer << ", " << encoding;
+
+  printer << ">";
+}
+
 
 } // namespace xegpu
 } // namespace mlir
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 0e89ac4df6ef28..a388db4f5c2dc6 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -6,14 +6,286 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <mlir/Dialect/Utils/StaticValueUtils.h>
 #include <mlir/Dialect/XeGPU/IR/XeGPU.h>
+#include <mlir/Interfaces/ViewLikeInterface.h>
+#include <mlir/IR/Builders.h>
 
 #define DEBUG_TYPE "xegpu"
 
 namespace mlir {
 namespace xegpu {
-// this file is for position occupation,
-// we will add functions in following PRs.
+
+static size_t getRankOf(Value value) {
+  if (value.getType().isIntOrIndexOrFloat())
+    return 0;
+  if (auto ty = llvm::dyn_cast_if_present<MemRefType>(value.getType()))
+    return ty.getRank();
+  if (auto ty = llvm::dyn_cast_if_present<VectorType>(value.getType()))
+    return ty.getRank();
+  llvm_unreachable("Unsupported value for getRankOf");
+}
+
+static void transpose(llvm::ArrayRef<int64_t> trans,
+                      std::vector<int64_t> &shape) {
+  std::vector<int64_t> old = shape;
+  for (size_t i = 0; i < trans.size(); i++)
+    shape[i] = old[trans[i]];
+}
+
+template <typename T>
+static std::string makeString(T array, bool breakline = false) {
+  std::string buf;
+  buf.clear();
+  llvm::raw_string_ostream os(buf);
+  os << "[";
+  for (size_t i = 1; i < array.size(); i++) {
+    os << array[i - 1] << ", ";
+    if (breakline)
+      os << "\n\t\t";
+  }
+  os << array.back() << "]";
+  os.flush();
+  return buf;
+}
+
+//===----------------------------------------------------------------------===//
+// XeGPU_CreateNdDescOp
+//===----------------------------------------------------------------------===//
+void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
+                           Type TensorDesc, Value source, ValueRange offsets,
+                           ValueRange shape, ValueRange strides,
+                           llvm::ArrayRef<int64_t> static_offsets) {
+  auto offsetRank = static_offsets.size();
+  auto shapeRank = shape.size() ? shape.size() : getRankOf(source);
+
+  size_t dynOffsetRank =
+      std::count_if(static_offsets.begin(), static_offsets.end(),
+                    [](int64_t d) { return ShapedType::isDynamic(d); });
+
+  // shape and strides should exists at the same time
+  // and the final rank for shape and offset (dynamic + static)
+  // should be the same
+  assert(shape.size() == strides.size() && shapeRank == offsetRank &&
+         offsets.size() == dynOffsetRank);
+
+  state.addOperands(source);
+  state.addOperands(offsets);
+  state.addOperands(shape);
+  state.addOperands(strides);
+  state.addAttribute(
+      getOperandSegmentSizesAttrName(state.name),
+      builder.getDenseI32ArrayAttr({1, static_cast<int32_t>(offsets.size()),
+                                    static_cast<int32_t>(shape.size()),
+                                    static_cast<int32_t>(strides.size())}));
+  state.addAttribute(getStaticOffsetsAttrName(state.name),
+                     builder.getDenseI64ArrayAttr(static_offsets));
+  state.addTypes(TensorDesc);
+}
+
+void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
+                           Type tdesc, Value source,
+                           llvm::ArrayRef<OpFoldResult> offsets) {
+  auto ty = llvm::dyn_cast_if_present<MemRefType>(source.getType());
+  assert(ty && ty.hasStaticShape() && offsets.size() == getRankOf(source));
+
+  llvm::SmallVector<int64_t> staticOffsets;
+  llvm::SmallVector<Value> dynamicOffsets;
+  dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
+
+  build(builder, state, tdesc, source, dynamicOffsets /* dynamic offsets */,
+        ValueRange({}) /* empty dynamic shape */,
+        ValueRange({}) /* empty dynamic strides */,
+        staticOffsets /* static offsets */);
+}
+
+void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
+                           Type tdesc, Value source,
+                           llvm::ArrayRef<OpFoldResult> offsets,
+                           ValueRange shape, ValueRange stride) {
+  assert(shape.size() && offsets.size() && stride.size() &&
+         shape.size() == stride.size() && shape.size() == offsets.size());
+
+  llvm::SmallVector<int64_t> staticOffsets;
+  llvm::SmallVector<Value> dynamicOffsets;
+
+  dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
+
+  build(builder, state, tdesc, source, /* dynamic_offsets = */ dynamicOffsets,
+        /* dynamic shape = */ shape , /* dynamic strides = */ stride,
+        /* static offsets = */ staticOffsets);
+}
+
+
+LogicalResult CreateNdDescOp::verify() {
+  auto offsetRank = getOffsets().size();
+  auto shapeRank = getShape().size();
+  auto stridesRank = getStrides().size();
+  auto baseRank = getRankOf(getSource()) ? getRankOf(getSource()) : 2;
+
+  if (offsetRank != shapeRank || shapeRank != stridesRank ||
+      shapeRank != baseRank)
+
+    return emitOpError(
+        "Expecting the rank of shape, strides, offsets and memref type "
+        "should match with each other (they currently should be 2D).");
+  return success();
+}
+
+// compute consolidated offsets from dynamic_offsets and static_offsets parameters
+llvm::SmallVector<OpFoldResult> CreateNdDescOp::getOffsets() {
+  llvm::SmallVector<OpFoldResult> offsets;
+  auto dynamicOffsets = getDynamicOffsets(); // dynamic_offsets variable
+  auto staticOffsets = getStaticOffsets();   // static_offsets attribute
+
+  // in case static_offsets is missing, dynamic_offsets will be used
+  if (staticOffsets.size() == 0) {
+    offsets.assign(dynamicOffsets.begin(), dynamicOffsets.end());
+    return offsets;
+  }
+
+  // use static offsets for each dim if it has valid value, 
+  // othwise use the value from dynamic_offsets
+  for (size_t i = 0, j = 0; i < staticOffsets.size(); i++) {
+    if (ShapedType::isDynamic(staticOffsets[i])) {
+      assert(j < dynamicOffsets.size());
+      offsets.push_back(dynamicOffsets[j++]);
+    } else {
+      auto ty = IndexType::get(getContext());
+      auto attr = IntegerAttr::get(ty, staticOffsets[i]);
+      offsets.push_back(attr);
+    }
+  }
+  return offsets;
+}
+
+// get the consolidated shape of the 2D memory region. 
+// It prefer dynamic_shape than the static shape of 
+// memref type.
+llvm::SmallVector<OpFoldResult> CreateNdDescOp::getShape() {
+  llvm::SmallVector<OpFoldResult> shape;
+  auto dynShape = getDynamicShape();
+  if (dynShape.size()) {
+    shape.append(dynShape.begin(), dynShape.end());
+    return shape;
+  }
+
+  auto ty = llvm::dyn_cast_if_present<MemRefType>(getSourceType());
+  if (ty && ty.hasStaticShape()) {
+    for (auto dim : ty.getShape()) {
+      auto attr = IntegerAttr::get(IndexType::get(getContext()), dim);
+      shape.push_back(attr);
+    }
+    return shape;
+  }
+  
+  this->emitError("The shape information of the memory is missing.\n");
+  return {};
+}
+
+// get the consolidated strides of the 2D memory region. 
+// It prefer dynamic_stride than the static strides of 
+// memref type.
+llvm::SmallVector<OpFoldResult> CreateNdDescOp::getStrides() {
+  llvm::SmallVector<OpFoldResult> strides;
+
+  auto dynStrides = getDynamicStrides();
+  if (dynStrides.size()) {
+    strides.append(dynStrides.begin(), dynStrides.end());
+    return strides;
+  }
+
+  auto ty = llvm::dyn_cast_if_present<MemRefType>(getSourceType());
+  if (ty && ty.hasStaticShape()) {
+    auto [staticStrides, offset] = getStridesAndOffset(ty);
+    for (auto dim : staticStrides) {
+      auto attr = IntegerAttr::get(IndexType::get(getContext()), dim);
+      strides.push_back(attr);
+    }
+    return strides;
+  }
+
+  this->emitError("The strides information of the memory is missing.\n");
+  return {};
+}
+
+//===----------------------------------------------------------------------===//
+// XeGPU_LoadNDOp
+//===----------------------------------------------------------------------===//
+LogicalResult LoadNDOp::verify() {
+  auto tdescTy = getTensorDescType();
+  auto valueTy = getType();
+
+  if (tdescTy.getRank() != 2)
+    return emitOpError(
+        "The TensorDesc for LoadNDOp should be a 2D TensorDesc.");
+
+  if (!valueTy)
+    return emitOpError("Invalid result, it should be a VectorType.\n");
+
+  auto tdescElemTy = tdescTy.getElementType();
+  auto valueElemTy = valueTy.getElementType();
+
+  if (tdescElemTy != valueElemTy)
+    return emitOpError(
+        "Value should have the same element type as TensorDesc.");
+
+  auto array_len = tdescTy.getArrayLength();
+  auto tdescShape = tdescTy.getShape().vec();
+  auto valueShape = valueTy.getShape().vec();
+
+  if (getTranspose()) {
+    auto trans = getTranspose().value();
+    if (tdescShape.size() >= trans.size())
+      transpose(trans, tdescShape);
+    else
+      emitWarning("Invalid transpose attr. It is ignored.");
+  }
+
+  if (getVnniAxis()) {
+    auto axis = getVnniAxis().value();
+    auto vnni_factor = valueShape.back();
+    tdescShape[axis] /= vnni_factor;
+    tdescShape.push_back(vnni_factor);
+  }
+
+  if (array_len > 1) {
+    auto it = tdescShape.begin();
+    tdescShape.insert(it, array_len);
+  }
+
+  if (tdescShape != valueShape)
+    return emitOpError() <<"Result shape doesn't match TensorDesc shape."
+           << "The expected shape is " << makeString(tdescShape) << ". "
+           << "But the given shape is " << makeString(valueShape) << ".\n";
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// XeGPU_StoreNDOp
+//===----------------------------------------------------------------------===//
+LogicalResult StoreNDOp::verify() {
+  auto dstTy = getTensorDesc().getType();               // Tile
+  auto valTy = getValue().getType().cast<VectorType>(); // Vector
+
+  if (dstTy.getRank() != 2)
+    return emitOpError("Expecting a 2D TensorDesc shape.\n");
+
+  if (!valTy)
+    return emitOpError("Exepcting a VectorType result.\n");
+
+  auto dstElemTy = dstTy.getElementType();
+  auto valElemTy = valTy.getElementType();
+
+  if (dstElemTy != valElemTy) {
+    return emitOpError() << "The element type of the value should "
+                       "match the elementtype of the TensorDesc.\n";
+  }
+
+  if (dstTy.getShape() != valTy.getShape())
+    return emitOpError() << "The result shape should match the TensorDesc shape.\n";
+  return success();
+}
 
 } // namespace xegpu
 } // namespace mlir
diff --git a/mlir/test/Dialect/XeGPU/XeGPUOps.mlir b/mlir/test/Dialect/XeGPU/XeGPUOps.mlir
new file mode 100644
index 00000000000000..f9b3510beb4335
--- /dev/null
+++ b/mlir/test/Dialect/XeGPU/XeGPUOps.mlir
@@ -0,0 +1,62 @@
+// RUN: mlir-opt %s | FileCheck %s
+// Verify the printed output can be parsed.
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+// Verify the generic form can be parsed.
+// RUN: mlir-opt -mlir-print-op-generic %s | mlir-opt | FileCheck %s
+
+// CHECK-LABEL: gpu.module @test {
+gpu.module @test {
+// CHECK: gpu.func @test_create_nd_tdesc_vc_1(%[[arg0:.*]]: memref<24x32xf32>) {
+gpu.func @test_create_nd_tdesc_vc_1(%src: memref<24x32xf32>) {
+  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32>
+  %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32>
+  gpu.return
+}
+
+// CHECK: gpu.func @test_create_nd_tdesc_vc_2(%[[arg0:.*]]: ui64, %[[arg1:.*]]: index, %[[arg2:.*]]: index, %[[arg3:.*]]: index, %[[arg4:.*]]: index) {
+gpu.func @test_create_nd_tdesc_vc_2(%src: ui64, %w : index, %h : index, %x : index, %y : index) {
+  //CHECK: %[[C:.*]] = arith.constant 1 : index
+  %c1 = arith.constant 1 : index
+  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][%[[arg3]], %[[arg4]]], [%[[arg2]], %[[arg1]]], [%[[arg1]], %[[C]]] : ui64 -> !xegpu.tensor_desc<8x16xf32>
+  %1 = xegpu.create_nd_tdesc %src[%x, %y], [%h, %w], [%w, %c1] : ui64 -> !xegpu.tensor_desc<8x16xf32>
+  gpu.return
+}
+
+// CHECK: gpu.func @test_create_nd_tdesc_vc_3(%[[arg0:.*]]: memref<24x32xf32>) {
+gpu.func @test_create_nd_tdesc_vc_3(%src: memref<24x32xf32>) {
+  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x16xf32, #xegpu.tdesc_attr<array_length = 2 : i64>
+  %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x16xf32, #xegpu.tdesc_attr<array_length = 2>>
+  gpu.return
+}
+
+// CHECK: gpu.func @test_prefetch_nd_vc(%[[arg0:.*]]: memref<24x32xf16>) {
+gpu.func @test_prefetch_nd_vc(%src: memref<24x32xf16>) {
+  // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16>
+  %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16>
+  // CHECK: xegpu.prefetch_nd %[[R0]] {l2_hint = #xegpu.cache_hint<uncached>, li_hint = #xegpu.cache_hint<cached>} : !xegpu.tensor_desc<8x16xf16>
+  xegpu.prefetch_nd %1 {li_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}: !xegpu.tensor_desc<8x16xf16>
+  gpu.return
+}
+
+// CHECK: func @test_load_nd_vc(%[[arg0:.*]]: memref<8x16xf16>) {
+gpu.func @test_load_nd_vc(%src: memref<8x16xf16>) {
+  // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
+  %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
+  // CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] {l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>, vnni_axis = 0 : i64} : !xegpu.tensor_desc<8x16xf16> -> vector<4x16x2xf16>
+  %2 = xegpu.load_nd %1 {vnni_axis = 0, l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>} 
+       : !xegpu.tensor_desc<8x16xf16> -> vector<4x16x2xf16>
+  gpu.return
+}
+
+// CHECK: func @test_store_nd_vc(%[[arg0:.*]]: memref<24x32xf16>) {
+gpu.func @test_store_nd_vc(%dst: memref<24x32xf16>) {
+  // CHECK: %[[C:.*]] = arith.constant dense<1.000000e+00> : vector<24x32xf16>
+  %1 = arith.constant dense<1.0>: vector<24x32xf16>
+  // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<24x32xf16>
+  %2 = xegpu.create_nd_tdesc %dst[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<24x32xf16>
+  // CHECK: xegpu.store_nd %[[C]], %[[R0]] {l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>} : vector<24x32xf16>, !xegpu.tensor_desc<24x32xf16>
+  xegpu.store_nd %1, %2 {l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>} : vector<24x32xf16>, !xegpu.tensor_desc<24x32xf16>
+  gpu.return
+}
+
+}
\ No newline at end of file

>From 74bd038f61985874694c01023c16f04e070e1419 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Sun, 10 Mar 2024 20:38:46 +0000
Subject: [PATCH 2/5] run clang-format

---
 mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp |  5 ++--
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp     | 33 ++++++++++++----------
 2 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index bd72d5c17b6ea1..43337a6ab43dcd 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -29,7 +29,6 @@ void XeGPUDialect::initialize() {
       >();
 }
 
-
 //===----------------------------------------------------------------------===//
 // XeGPU_TensorDescAttr
 //===----------------------------------------------------------------------===//
@@ -62,7 +61,8 @@ mlir::Type TensorDescType::parse(::mlir::AsmParser &parser) {
   if (mlir::succeeded(parser.parseOptionalComma())) {
     encoding = mlir::FieldParser<mlir::Attribute>::parse(parser);
     if (mlir::failed(encoding)) {
-      parser.emitError(parser.getCurrentLocation(),
+      parser.emitError(
+          parser.getCurrentLocation(),
           "Failed to parse the attribute field for TensorDescType.\n");
       return {};
     }
@@ -96,7 +96,6 @@ void TensorDescType::print(::mlir::AsmPrinter &printer) const {
   printer << ">";
 }
 
-
 } // namespace xegpu
 } // namespace mlir
 
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index a388db4f5c2dc6..be631c4678eacb 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -8,8 +8,8 @@
 
 #include <mlir/Dialect/Utils/StaticValueUtils.h>
 #include <mlir/Dialect/XeGPU/IR/XeGPU.h>
-#include <mlir/Interfaces/ViewLikeInterface.h>
 #include <mlir/IR/Builders.h>
+#include <mlir/Interfaces/ViewLikeInterface.h>
 
 #define DEBUG_TYPE "xegpu"
 
@@ -112,11 +112,10 @@ void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
   dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
 
   build(builder, state, tdesc, source, /* dynamic_offsets = */ dynamicOffsets,
-        /* dynamic shape = */ shape , /* dynamic strides = */ stride,
+        /* dynamic shape = */ shape, /* dynamic strides = */ stride,
         /* static offsets = */ staticOffsets);
 }
 
-
 LogicalResult CreateNdDescOp::verify() {
   auto offsetRank = getOffsets().size();
   auto shapeRank = getShape().size();
@@ -132,7 +131,8 @@ LogicalResult CreateNdDescOp::verify() {
   return success();
 }
 
-// compute consolidated offsets from dynamic_offsets and static_offsets parameters
+// compute consolidated offsets from dynamic_offsets and static_offsets
+// parameters
 llvm::SmallVector<OpFoldResult> CreateNdDescOp::getOffsets() {
   llvm::SmallVector<OpFoldResult> offsets;
   auto dynamicOffsets = getDynamicOffsets(); // dynamic_offsets variable
@@ -144,7 +144,7 @@ llvm::SmallVector<OpFoldResult> CreateNdDescOp::getOffsets() {
     return offsets;
   }
 
-  // use static offsets for each dim if it has valid value, 
+  // use static offsets for each dim if it has valid value,
   // othwise use the value from dynamic_offsets
   for (size_t i = 0, j = 0; i < staticOffsets.size(); i++) {
     if (ShapedType::isDynamic(staticOffsets[i])) {
@@ -159,8 +159,8 @@ llvm::SmallVector<OpFoldResult> CreateNdDescOp::getOffsets() {
   return offsets;
 }
 
-// get the consolidated shape of the 2D memory region. 
-// It prefer dynamic_shape than the static shape of 
+// get the consolidated shape of the 2D memory region.
+// It prefer dynamic_shape than the static shape of
 // memref type.
 llvm::SmallVector<OpFoldResult> CreateNdDescOp::getShape() {
   llvm::SmallVector<OpFoldResult> shape;
@@ -178,13 +178,13 @@ llvm::SmallVector<OpFoldResult> CreateNdDescOp::getShape() {
     }
     return shape;
   }
-  
+
   this->emitError("The shape information of the memory is missing.\n");
   return {};
 }
 
-// get the consolidated strides of the 2D memory region. 
-// It prefer dynamic_stride than the static strides of 
+// get the consolidated strides of the 2D memory region.
+// It prefer dynamic_stride than the static strides of
 // memref type.
 llvm::SmallVector<OpFoldResult> CreateNdDescOp::getStrides() {
   llvm::SmallVector<OpFoldResult> strides;
@@ -255,9 +255,11 @@ LogicalResult LoadNDOp::verify() {
   }
 
   if (tdescShape != valueShape)
-    return emitOpError() <<"Result shape doesn't match TensorDesc shape."
-           << "The expected shape is " << makeString(tdescShape) << ". "
-           << "But the given shape is " << makeString(valueShape) << ".\n";
+    return emitOpError() << "Result shape doesn't match TensorDesc shape."
+                         << "The expected shape is " << makeString(tdescShape)
+                         << ". "
+                         << "But the given shape is " << makeString(valueShape)
+                         << ".\n";
   return success();
 }
 
@@ -279,11 +281,12 @@ LogicalResult StoreNDOp::verify() {
 
   if (dstElemTy != valElemTy) {
     return emitOpError() << "The element type of the value should "
-                       "match the elementtype of the TensorDesc.\n";
+                            "match the elementtype of the TensorDesc.\n";
   }
 
   if (dstTy.getShape() != valTy.getShape())
-    return emitOpError() << "The result shape should match the TensorDesc shape.\n";
+    return emitOpError()
+           << "The result shape should match the TensorDesc shape.\n";
   return success();
 }
 

>From ff338280afdd024f7bc95417f947f64266f6b90e Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Tue, 12 Mar 2024 16:19:14 +0000
Subject: [PATCH 3/5] add ViewLikeOpInterface and
 OffsetSizeAndStrideOpInterface to createNdDescOp

---
 mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h    |  11 +-
 .../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 121 ++++++------
 .../mlir/Dialect/XeGPU/IR/XeGPUTypes.td       |   8 +-
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp        | 174 ++++--------------
 4 files changed, 113 insertions(+), 201 deletions(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
index 8dc3ff78d25ede..87aabdc015fea5 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
@@ -9,11 +9,12 @@
 #ifndef MLIR_DIALECT_XEGPU_IR_XEGPU_H
 #define MLIR_DIALECT_XEGPU_IR_XEGPU_H
 
-#include <mlir/Bytecode/BytecodeOpInterface.h>
-#include <mlir/IR/BuiltinTypes.h>
-#include <mlir/IR/Dialect.h>
-#include <mlir/Interfaces/ShapedOpInterfaces.h>
-#include <mlir/Interfaces/SideEffectInterfaces.h>
+#include "mlir/Bytecode/BytecodeOpInterface.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Dialect.h"
+#include "mlir/Interfaces/ShapedOpInterfaces.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
+#include "mlir/Interfaces/ViewLikeInterface.h"
 
 namespace mlir {
 namespace xegpu {
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 9d37d77e03a0c5..24fae9596994e3 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -9,26 +9,13 @@
 #ifndef MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
 #define MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
 
+include "mlir/IR/AttrTypeBase.td"
 include "mlir/Dialect/XeGPU/IR/XeGPUAttrs.td"
 include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td"
 include "mlir/Dialect/XeGPU/IR/XeGPUTypes.td"
-include "mlir/Interfaces/SideEffectInterfaces.td"
-include "mlir/IR/AttrTypeBase.td"
-
-
-include "mlir/IR/OpBase.td"
-include "mlir/IR/OpAsmInterface.td"
-include "mlir/IR/AttrTypeBase.td"
-include "mlir/IR/BuiltinTypes.td"
-include "mlir/IR/BuiltinTypeInterfaces.td"
+include "mlir/Interfaces/ShapedOpInterfaces.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/Interfaces/ViewLikeInterface.td"
-include "mlir/Interfaces/CastInterfaces.td"
-include "mlir/Interfaces/ControlFlowInterfaces.td"
-include "mlir/Interfaces/CopyOpInterface.td"
-include "mlir/Interfaces/InferTypeOpInterface.td"
-include "mlir/Interfaces/ShapedOpInterfaces.td"
-
 
 // Base class for dialect operations. This operation inherits from the base
 // `Op` class in OpBase.td, and provides:
@@ -39,12 +26,13 @@ class XeGPU_Op<string mnemonic, list<Trait> traits = []>:
           Op<XeGPU_Dialect, mnemonic, traits>;
 
 
-def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSegments]> {
+def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface, 
+                        AttrSizedOperandSegments, OffsetSizeAndStrideOpInterface]> {
 
   let summary = "create nd tensor descriptor operation";
   let description = [{
     The "create_nd_tdesc" operation creates a TensorDescType which represents
-    a sub-view of a 2D memory region (It can be extended to support N-D memory
+    a sub-view of a 2D memory region (It can be extended to support n-D memory
     region if needed in future). Elements in the subview continuous in each 
     dimention. It encodes the following important information for supporting 
     Intel hardware features:
@@ -90,31 +78,27 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSeg
 
   let arguments = (ins 
     XeGPU_BaseAddrType: $source, 
-    Variadic<Index>: $dynamic_offsets, 
-    Variadic<Index>: $dynamic_shape, 
-    Variadic<Index>: $dynamic_strides,
+    Variadic<Index>: $offsets, 
+    Variadic<Index>: $shape, 
+    Variadic<Index>: $strides,
     DenseI64ArrayAttr: $static_offsets
   );
   let results = (outs XeGPU_TensorDesc: $TensorDesc);
 
   let assemblyFormat = [{
     $source ``
-    custom<DynamicIndexList>($dynamic_offsets, $static_offsets)
-    (`,` `[` $dynamic_shape^ `]` `,` `[` $dynamic_strides `]`)?
+    custom<DynamicIndexList>($offsets, $static_offsets)
+    (`,` `[` $shape^ `]` `,` `[` $strides `]`)?
     attr-dict `:` type($source) `->` qualified(type($TensorDesc))
   }];
-  let skipDefaultBuilders = 1;
+
   let hasVerifier = 1;
 
   let builders = [
-    OpBuilder<(ins "Type": $TensorDesc, "Value": $source, "ValueRange": $offsets, 
-                   "ValueRange": $shape, "ValueRange": $strides, 
-                   "llvm::ArrayRef<int64_t>": $static_offsets)>,
-
-    OpBuilder<(ins "Type": $tdesc, "Value": $source, 
+    OpBuilder<(ins "Type": $tdesc, "TypedValue<MemRefType>": $source, 
                    "llvm::ArrayRef<OpFoldResult>": $offsets)>,
 
-    OpBuilder<(ins "Type": $tdesc, "Value": $source, 
+    OpBuilder<(ins "Type": $tdesc, "TypedValue<IntegerType> ": $source, 
                    "llvm::ArrayRef<OpFoldResult>": $offsets,
                    "ValueRange": $shape, "ValueRange": $stride)>
   ];
@@ -130,27 +114,6 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSeg
       return getTensorDesc().getType();
     }
 
-    /// Returns the offsets info to the source. It consolidates
-    /// information from both dynamic_offsets and static_offsets
-    /// parameters. static_offsets parameter always has the expected
-    /// ranks with some dim could have ShapeType::kDynamic value
-    /// indicating the corresponding value should be from dynamic_offsets.
-    llvm::SmallVector<OpFoldResult> getOffsets();
-
-    /// returns the shape info of the source. It is either from the
-    /// memref type, if source is a memref with static shape
-    /// information or from the dynamic_shape parameter. If both
-    /// exists, the dynamic_shape parameter will be used and the
-    /// shape information from  memref type will be ignored.
-    llvm::SmallVector<OpFoldResult> getShape();
-
-    /// returns the strides info of the source. It is either from the
-    /// memref type, if source is a memref with static shape
-    /// information or from the dynamic_stride parameter. If both
-    /// exists, the dynamic_strides parameter will be used and the
-    /// strides information from  memref type will be ignored.
-    llvm::SmallVector<OpFoldResult> getStrides();
-
     /// Return the element type of the TensorDesc
     Type getElementType() {
       return getType().getElementType();
@@ -160,6 +123,58 @@ def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSeg
     llvm::ArrayRef<int64_t> getTensorDescShape() {
       return getType().getShape();
     }
+
+    /// wrapper for matching with OffsetSizeAndStrideOpInterface
+    OperandRange getSizes() {
+      return getShape();
+    }
+
+    /// wrapper for matching with OffsetSizeAndStrideOpInterface
+    /// If source is IntegerType and `shape` is filled, it will 
+    /// return an array of ShapedType::kDynamic representing dynamic 
+    /// shape encoded in the `shape` argument will be used. Presence
+    /// of `shape` overides static shape from source memref type.
+    SmallVector<int64_t> getStaticSizes() {
+      if (getSourceType().isa<IntegerType>() || getShape().size()) {
+        auto dims = getMixedOffsets().size();
+        return SmallVector<int64_t>(dims, ShapedType::kDynamic);
+      }
+      auto memrefType = getSourceType().dyn_cast<MemRefType>();
+      return SmallVector<int64_t>(memrefType.getShape());
+    }
+
+    /// wrapper for matching with OffsetSizeAndStrideOpInterface
+    /// If source is IntegerType or `strides` is filled, it will 
+    /// return an array of ShapedType::kDynamic representing dynamic 
+    /// strides encoded in the `strides` argument will be used. Presence
+    /// of `strides` overides static strides from source memref type.
+    SmallVector<int64_t> getStaticStrides() {
+      if (getSourceType().isa<IntegerType>() || getStrides().size()) {
+        auto dims = getMixedOffsets().size();
+        return SmallVector<int64_t>(dims, ShapedType::kDynamic);
+      }
+      auto memrefType = getSourceType().dyn_cast<MemRefType>();
+      auto [strides, offset] = getStridesAndOffset(memrefType);
+      return strides;
+    }
+
+    /// Return the expected rank of each of the`static_offsets`, 
+    /// `static_shape` and `static_strides` attributes.
+    std::array<unsigned, 3> getArrayAttrMaxRanks() {
+      unsigned rank;
+      if (auto ty = getSourceType().dyn_cast<MemRefType>()) {
+        rank = ty.getRank();
+      } else {
+        rank = (unsigned)getMixedOffsets().size();
+      }
+      return {rank, rank, rank};
+    }
+    
+    /// Return the number of leading operands before the `offsets`, 
+    /// `shape` and `strides` operands.
+    static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; }
+
+    mlir::Value getViewSource() { return getSource(); }
   }];
 }
 
@@ -178,11 +193,11 @@ def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> {
 }
 
 
-def XeGPU_LoadNDOp : XeGPU_Op<"load_nd"> {
+def XeGPU_LoadNdOp : XeGPU_Op<"load_nd"> {
   let summary = "loads a n-D block from memory (represented by TensorDesc)" 
                 "to registers (represented by vector)";
   let description = [{
-    LoadNDOp essentially mimics the hardware block read instruction to read 
+    LoadNdOp essentially mimics the hardware block read instruction to read 
     a block of data from memory to register. It takes a set of cache hints 
     for each level of cache, L1, L2 and L3. If hardware does not have a 
     correspoding cache, Corresponding cache hint attribute will be masked.
@@ -218,7 +233,7 @@ def XeGPU_LoadNDOp : XeGPU_Op<"load_nd"> {
   let hasVerifier = 1;
 }
 
-def XeGPU_StoreNDOp : XeGPU_Op<"store_nd", []> {
+def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", []> {
   let summary = "stores a n-D block register region back to memory, currently only supports 2D";
   let arguments = (ins XeGPU_ValueType: $value,
                        XeGPU_TensorDesc: $TensorDesc,
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
index 36b04ea12bcad0..19ac1693712dd8 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
@@ -37,12 +37,12 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
     TensorDesc is a type designed to describe regions of the interested data as well as some 
     features that are unique to Intel hardware. Different with the builtin tensor type in MLIR, 
     it essentially only contains the meta data, and doesn't hold the data by itself. It is designed 
-    to mainly support 2d block load/store and DPAS (matrix multiplication instruction) on Intel GPU. 
+    to mainly support 2D block load/store and DPAS (matrix multiplication instruction) on Intel GPU. 
     It encodes the following information:
 
     * shape:  the sizes/shape of the intereted data block, e.g., 8x16 means 8 rows
-              and each row contains 16 continious data element. The rows could be
-              either continuous or not, depends on whether the encoding attribute
+              and each row contains 16 contiguous data element. The rows could be
+              either contiguous or not, depends on whether the encoding attribute
               is set or not.
     * element_type: the data type of the data element, e.g., f16, f32.
 
@@ -50,7 +50,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
     the following information via the TensorDescAttr object:
     * memory_scope (xegpu::MemoryScope): [optional] where the data is located, 
                 global memory or shared memory. It is default to Global.
-    * array_length (int): [optional] The number of continuous blocks with size as `shape`,
+    * array_length (int): [optional] The number of contiguous blocks with size as `shape`,
                that will be loaded by block load at a time. It is default to 1.
     * boundary_check (bool): [optional] indicates whether the operation detects the boundary 
                 and pads with zero for out-of-boundary access. It is default to do boundary check.
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index be631c4678eacb..08723d12c278c8 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -9,23 +9,12 @@
 #include <mlir/Dialect/Utils/StaticValueUtils.h>
 #include <mlir/Dialect/XeGPU/IR/XeGPU.h>
 #include <mlir/IR/Builders.h>
-#include <mlir/Interfaces/ViewLikeInterface.h>
 
 #define DEBUG_TYPE "xegpu"
 
 namespace mlir {
 namespace xegpu {
 
-static size_t getRankOf(Value value) {
-  if (value.getType().isIntOrIndexOrFloat())
-    return 0;
-  if (auto ty = llvm::dyn_cast_if_present<MemRefType>(value.getType()))
-    return ty.getRank();
-  if (auto ty = llvm::dyn_cast_if_present<VectorType>(value.getType()))
-    return ty.getRank();
-  llvm_unreachable("Unsupported value for getRankOf");
-}
-
 static void transpose(llvm::ArrayRef<int64_t> trans,
                       std::vector<int64_t> &shape) {
   std::vector<int64_t> old = shape;
@@ -53,41 +42,10 @@ static std::string makeString(T array, bool breakline = false) {
 // XeGPU_CreateNdDescOp
 //===----------------------------------------------------------------------===//
 void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
-                           Type TensorDesc, Value source, ValueRange offsets,
-                           ValueRange shape, ValueRange strides,
-                           llvm::ArrayRef<int64_t> static_offsets) {
-  auto offsetRank = static_offsets.size();
-  auto shapeRank = shape.size() ? shape.size() : getRankOf(source);
-
-  size_t dynOffsetRank =
-      std::count_if(static_offsets.begin(), static_offsets.end(),
-                    [](int64_t d) { return ShapedType::isDynamic(d); });
-
-  // shape and strides should exists at the same time
-  // and the final rank for shape and offset (dynamic + static)
-  // should be the same
-  assert(shape.size() == strides.size() && shapeRank == offsetRank &&
-         offsets.size() == dynOffsetRank);
-
-  state.addOperands(source);
-  state.addOperands(offsets);
-  state.addOperands(shape);
-  state.addOperands(strides);
-  state.addAttribute(
-      getOperandSegmentSizesAttrName(state.name),
-      builder.getDenseI32ArrayAttr({1, static_cast<int32_t>(offsets.size()),
-                                    static_cast<int32_t>(shape.size()),
-                                    static_cast<int32_t>(strides.size())}));
-  state.addAttribute(getStaticOffsetsAttrName(state.name),
-                     builder.getDenseI64ArrayAttr(static_offsets));
-  state.addTypes(TensorDesc);
-}
-
-void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
-                           Type tdesc, Value source,
+                           Type tdesc, TypedValue<MemRefType> source,
                            llvm::ArrayRef<OpFoldResult> offsets) {
-  auto ty = llvm::dyn_cast_if_present<MemRefType>(source.getType());
-  assert(ty && ty.hasStaticShape() && offsets.size() == getRankOf(source));
+  auto ty = source.getType();
+  assert(ty && ty.hasStaticShape() && offsets.size() == (size_t)ty.getRank());
 
   llvm::SmallVector<int64_t> staticOffsets;
   llvm::SmallVector<Value> dynamicOffsets;
@@ -100,7 +58,7 @@ void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
 }
 
 void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
-                           Type tdesc, Value source,
+                           Type tdesc, TypedValue<IntegerType> source,
                            llvm::ArrayRef<OpFoldResult> offsets,
                            ValueRange shape, ValueRange stride) {
   assert(shape.size() && offsets.size() && stride.size() &&
@@ -117,108 +75,47 @@ void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
 }
 
 LogicalResult CreateNdDescOp::verify() {
-  auto offsetRank = getOffsets().size();
-  auto shapeRank = getShape().size();
-  auto stridesRank = getStrides().size();
-  auto baseRank = getRankOf(getSource()) ? getRankOf(getSource()) : 2;
-
-  if (offsetRank != shapeRank || shapeRank != stridesRank ||
-      shapeRank != baseRank)
-
-    return emitOpError(
-        "Expecting the rank of shape, strides, offsets and memref type "
-        "should match with each other (they currently should be 2D).");
-  return success();
-}
-
-// compute consolidated offsets from dynamic_offsets and static_offsets
-// parameters
-llvm::SmallVector<OpFoldResult> CreateNdDescOp::getOffsets() {
-  llvm::SmallVector<OpFoldResult> offsets;
-  auto dynamicOffsets = getDynamicOffsets(); // dynamic_offsets variable
-  auto staticOffsets = getStaticOffsets();   // static_offsets attribute
-
-  // in case static_offsets is missing, dynamic_offsets will be used
-  if (staticOffsets.size() == 0) {
-    offsets.assign(dynamicOffsets.begin(), dynamicOffsets.end());
-    return offsets;
-  }
-
-  // use static offsets for each dim if it has valid value,
-  // othwise use the value from dynamic_offsets
-  for (size_t i = 0, j = 0; i < staticOffsets.size(); i++) {
-    if (ShapedType::isDynamic(staticOffsets[i])) {
-      assert(j < dynamicOffsets.size());
-      offsets.push_back(dynamicOffsets[j++]);
-    } else {
-      auto ty = IndexType::get(getContext());
-      auto attr = IntegerAttr::get(ty, staticOffsets[i]);
-      offsets.push_back(attr);
-    }
-  }
-  return offsets;
-}
-
-// get the consolidated shape of the 2D memory region.
-// It prefer dynamic_shape than the static shape of
-// memref type.
-llvm::SmallVector<OpFoldResult> CreateNdDescOp::getShape() {
-  llvm::SmallVector<OpFoldResult> shape;
-  auto dynShape = getDynamicShape();
-  if (dynShape.size()) {
-    shape.append(dynShape.begin(), dynShape.end());
-    return shape;
-  }
-
-  auto ty = llvm::dyn_cast_if_present<MemRefType>(getSourceType());
-  if (ty && ty.hasStaticShape()) {
-    for (auto dim : ty.getShape()) {
-      auto attr = IntegerAttr::get(IndexType::get(getContext()), dim);
-      shape.push_back(attr);
-    }
-    return shape;
+  auto rank = (int64_t)getMixedOffsets().size();
+  bool invalidRank = (rank != 2);
+  bool invalidElemTy = false;
+
+  // check source type matches the rank if it is a memref.
+  // It also should have the same ElementType as TensorDesc.
+  auto memrefTy = getSourceType().dyn_cast<MemRefType>();
+  if (memrefTy) {
+    invalidRank |= (memrefTy.getRank() != rank);
+    invalidElemTy |= memrefTy.getElementType() != getElementType();
   }
 
-  this->emitError("The shape information of the memory is missing.\n");
-  return {};
-}
-
-// get the consolidated strides of the 2D memory region.
-// It prefer dynamic_stride than the static strides of
-// memref type.
-llvm::SmallVector<OpFoldResult> CreateNdDescOp::getStrides() {
-  llvm::SmallVector<OpFoldResult> strides;
+  // check result type matches the rank
+  invalidRank = (getType().getRank() != rank);
 
-  auto dynStrides = getDynamicStrides();
-  if (dynStrides.size()) {
-    strides.append(dynStrides.begin(), dynStrides.end());
-    return strides;
-  }
+  // mismatches among shape, strides, and offsets are
+  // already handeled by OffsetSizeAndStrideOpInterface.
+  // So they are not check here.
+  if (invalidRank)
+    return emitOpError(
+        "Expecting the rank of shape, strides, offsets, "
+        "source memref type (if source is a memref) and TensorDesc "
+        "should match with each other. They currenlty are 2D.");
 
-  auto ty = llvm::dyn_cast_if_present<MemRefType>(getSourceType());
-  if (ty && ty.hasStaticShape()) {
-    auto [staticStrides, offset] = getStridesAndOffset(ty);
-    for (auto dim : staticStrides) {
-      auto attr = IntegerAttr::get(IndexType::get(getContext()), dim);
-      strides.push_back(attr);
-    }
-    return strides;
-  }
+  if (invalidElemTy)
+    return emitOpError("TensorDesc should have the same element "
+                       "type with the source if it is a memref.\n");
 
-  this->emitError("The strides information of the memory is missing.\n");
-  return {};
+  return success();
 }
 
 //===----------------------------------------------------------------------===//
-// XeGPU_LoadNDOp
+// XeGPU_LoadNdOp
 //===----------------------------------------------------------------------===//
-LogicalResult LoadNDOp::verify() {
+LogicalResult LoadNdOp::verify() {
   auto tdescTy = getTensorDescType();
   auto valueTy = getType();
 
   if (tdescTy.getRank() != 2)
     return emitOpError(
-        "The TensorDesc for LoadNDOp should be a 2D TensorDesc.");
+        "The TensorDesc for LoadNdOp should be a 2D TensorDesc.");
 
   if (!valueTy)
     return emitOpError("Invalid result, it should be a VectorType.\n");
@@ -257,16 +154,15 @@ LogicalResult LoadNDOp::verify() {
   if (tdescShape != valueShape)
     return emitOpError() << "Result shape doesn't match TensorDesc shape."
                          << "The expected shape is " << makeString(tdescShape)
-                         << ". "
-                         << "But the given shape is " << makeString(valueShape)
-                         << ".\n";
+                         << ". But the given shape is "
+                         << makeString(valueShape) << ".\n";
   return success();
 }
 
 //===----------------------------------------------------------------------===//
-// XeGPU_StoreNDOp
+// XeGPU_StoreNdOp
 //===----------------------------------------------------------------------===//
-LogicalResult StoreNDOp::verify() {
+LogicalResult StoreNdOp::verify() {
   auto dstTy = getTensorDesc().getType();               // Tile
   auto valTy = getValue().getType().cast<VectorType>(); // Vector
 

>From 8a9df4ba16cdaf422d7980f565f3c2046b141a90 Mon Sep 17 00:00:00 2001
From: Chao Chen <chao.chen at intel.com>
Date: Tue, 12 Mar 2024 20:03:34 -0500
Subject: [PATCH 4/5] fix include format

---
 mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 8 ++++----
 mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp     | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 43337a6ab43dcd..0b3f4b9c9dbeae 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -6,10 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <llvm/ADT/TypeSwitch.h>
-#include <mlir/Dialect/XeGPU/IR/XeGPU.h>
-#include <mlir/IR/Builders.h>
-#include <mlir/IR/DialectImplementation.h>
+#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/DialectImplementation.h"
+#include "llvm/ADT/TypeSwitch.h"
 
 namespace mlir {
 namespace xegpu {
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 08723d12c278c8..3a75b173b757c5 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -6,9 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <mlir/Dialect/Utils/StaticValueUtils.h>
-#include <mlir/Dialect/XeGPU/IR/XeGPU.h>
-#include <mlir/IR/Builders.h>
+#include "mlir/Dialect/Utils/StaticValueUtils.h"
+#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
+#include "mlir/IR/Builders.h"
 
 #define DEBUG_TYPE "xegpu"
 

>From e3857bb9971b1570108ec55b97f2a18d44354cdc Mon Sep 17 00:00:00 2001
From: Chao Chen <116223022+chencha3 at users.noreply.github.com>
Date: Thu, 14 Mar 2024 10:13:19 -0500
Subject: [PATCH 5/5] Update mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td

Co-authored-by: Mehdi Amini <joker.eph at gmail.com>
---
 mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 24fae9596994e3..cb768d5e6b9af3 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -29,7 +29,7 @@ class XeGPU_Op<string mnemonic, list<Trait> traits = []>:
 def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface, 
                         AttrSizedOperandSegments, OffsetSizeAndStrideOpInterface]> {
 
-  let summary = "create nd tensor descriptor operation";
+  let summary = "Create nd-tensor descriptor operation";
   let description = [{
     The "create_nd_tdesc" operation creates a TensorDescType which represents
     a sub-view of a 2D memory region (It can be extended to support n-D memory



More information about the Mlir-commits mailing list