[Mlir-commits] [mlir] 8d14204 - Revert "[MLIR][XeGPU] Adding XeGPU 2d block operators (#84692)" (#85653)

Mon Mar 18 09:28:02 PDT 2024

Author: Balaji V. Iyer
Date: 2024-03-18T11:27:58-05:00
New Revision: 8d142043e8c4c1144cd94f39f3cc7c88da5cec3f

URL: https://github.com/llvm/llvm-project/commit/8d142043e8c4c1144cd94f39f3cc7c88da5cec3f
DIFF: https://github.com/llvm/llvm-project/commit/8d142043e8c4c1144cd94f39f3cc7c88da5cec3f.diff

LOG: Revert "[MLIR][XeGPU] Adding XeGPU 2d block operators (#84692)" (#85653)

This reverts commit daebe5c4f27ba140ac8d13abf41e3fe4db72b91a.

This commit causes the following asan issue:

```
<snip>/llvm-project/build/bin/mlir-opt <snip>/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir | <snip>/llvm-project/build/bin/FileCheck <snip>/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir
# executed command: <snip>/llvm-project/build/bin/mlir-opt <snip>/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir
# .---command stderr------------
# | =================================================================
# | ==2772558==ERROR: AddressSanitizer: stack-use-after-return on address 0x7fd2c2c42b90 at pc 0x55e406d54614 bp 0x7ffc810e4070 sp 0x7ffc810e4068
# | READ of size 8 at 0x7fd2c2c42b90 thread T0
# |     #0 0x55e406d54613 in operator()<long int const*> /usr/include/c++/13/bits/predefined_ops.h:318
# |     #1 0x55e406d54613 in __count_if<long int const*, __gnu_cxx::__ops::_Iter_pred<mlir::verifyListOfOperandsOrIntegers(Operation*, llvm::StringRef, unsigned int, llvm::ArrayRef<long int>, ValueRange)::<lambda(int64_t)> > > /usr/include/c++/13/bits/stl_algobase.h:2125
# |     #2 0x55e406d54613 in count_if<long int const*, mlir::verifyListOfOperandsOrIntegers(Operation*, 
...
```

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
    mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
    mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
    mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
    mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
    mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
    mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp

Removed: 
    mlir/test/Dialect/XeGPU/XeGPUOps.mlir


################################################################################
diff  --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
index 87aabdc015fea5..7aaa4ecc7ee77a 100644

--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
@@ -9,12 +9,7 @@
 #ifndef MLIR_DIALECT_XEGPU_IR_XEGPU_H
 #define MLIR_DIALECT_XEGPU_IR_XEGPU_H
 
-#include "mlir/Bytecode/BytecodeOpInterface.h"
-#include "mlir/IR/BuiltinTypes.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/Interfaces/ShapedOpInterfaces.h"
-#include "mlir/Interfaces/SideEffectInterfaces.h"
-#include "mlir/Interfaces/ViewLikeInterface.h"
+#include <mlir/IR/Dialect.h>
 
 namespace mlir {
 namespace xegpu {

diff  --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index cd38549f1ccf43..bb325c272e3324 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -10,7 +10,6 @@
 #define MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
 
 include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td"
-include "mlir/IR/EnumAttr.td"
 
 class XeGPUAttr<string name, string attrMnemonic, list<Trait> traits = [],
                 string baseCppClass = "::mlir::Attribute">
@@ -18,64 +17,4 @@ class XeGPUAttr<string name, string attrMnemonic, list<Trait> traits = [],
   let mnemonic = attrMnemonic;
 }
 
-def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> {
-  let parameters = (ins
-    OptionalParameter<"MemoryScopeAttr">: $memory_scope,
-    OptionalParameter<"IntegerAttr", "1">: $array_length,
-    OptionalParameter<"BoolAttr", "true">: $boundary_check
-  );
-
-  let builders = [
-    AttrBuilder<(ins
-      CArg<"xegpu::MemoryScope", "xegpu::MemoryScope::Global">:$memory_scope,
-      CArg<"int", "1">:$array_length,
-      CArg<"bool", "true">: $boundary_check
-    )>
-  ];
-
-  let assemblyFormat = "`<` struct(params) `>`";
-}
-
-//===----------------------------------------------------------------------===//
-// XeGPU Memory Scope Enums.
-//===----------------------------------------------------------------------===//
-def XeGPU_MemoryScopeGlobal: I32EnumAttrCase<"Global", 0, "global">;
-def XeGPU_MemoryScopeShared: I32EnumAttrCase<"SLM", 1, "slm">;
-def XeGPU_MemoryScope: I32EnumAttr<"MemoryScope", 
-      "The address space of the memory the tensor descritor is created for", 
-      [XeGPU_MemoryScopeGlobal, XeGPU_MemoryScopeShared]> {
-  let genSpecializedAttr = 0;
-  let cppNamespace = "::mlir::xegpu";
-}
-
-def XeGPU_MemoryScopeAttr: 
-  EnumAttr<XeGPU_Dialect, XeGPU_MemoryScope, "memory_scope"> {
-    let assemblyFormat = "$value";
-}
-
-//===----------------------------------------------------------------------===//
-// XeGPU Cache Enums.
-//===----------------------------------------------------------------------===//
-def XeGPU_CachePolicyCached:        I32EnumAttrCase<"CACHED", 0, "cached">;                    // valid for read and write
-def XeGPU_CachePolicyUncached:      I32EnumAttrCase<"UNCACHED", 1, "uncached">;                // valid for read and write
-def XeGPU_CachePolicyStreaming:     I32EnumAttrCase<"STREAMING", 2, "streaming">;              // valid for read only
-def XeGPU_CachePolicyInvalid:       I32EnumAttrCase<"READ_INVALIDATE", 3, "read_invalidate">;  // valid for read only
-def XeGPU_CachePolicyWriteBack:     I32EnumAttrCase<"WRITE_BACK", 4, "write_back">;            // valid for write only
-def XeGPU_CachePolicyWriteThrough:  I32EnumAttrCase<"WRITE_THROUGH", 5, "write_through">;      // valid for write only
-
-def XeGPU_CachePolicyEnums : I32EnumAttr<"CachePolicy", "Cache policy", 
-  [XeGPU_CachePolicyCached, XeGPU_CachePolicyUncached, 
-   XeGPU_CachePolicyStreaming, XeGPU_CachePolicyInvalid,
-   XeGPU_CachePolicyWriteBack, XeGPU_CachePolicyWriteThrough]> {
-  let genSpecializedAttr = 0;
-  let cppNamespace = "::mlir::xegpu";
-}
-
-def XeGPU_CacheHintAttr 
-  : EnumAttr<XeGPU_Dialect, XeGPU_CachePolicyEnums, "cache_hint"> {
-    let assemblyFormat = "`<` $value `>`";
-}
-
-
-
 #endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD

diff  --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
index c2f09319c790e0..3851275ad30a0a 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
@@ -23,8 +23,8 @@ def XeGPU_Dialect : Dialect {
       the lower-level GPU compiler.
     }];
 
-    let useDefaultTypePrinterParser = true;
-    let useDefaultAttributePrinterParser = true;
+    // let useDefaultTypePrinterParser = true;
+    // let useDefaultAttributePrinterParser = true;
 }
 
 #endif // MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD

diff  --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 1f90dcb4bf55ad..5825ef9195b03f 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -9,13 +9,10 @@
 #ifndef MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
 #define MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
 
-include "mlir/IR/AttrTypeBase.td"
 include "mlir/Dialect/XeGPU/IR/XeGPUAttrs.td"
 include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td"
 include "mlir/Dialect/XeGPU/IR/XeGPUTypes.td"
-include "mlir/Interfaces/ShapedOpInterfaces.td"
-include "mlir/Interfaces/SideEffectInterfaces.td"
-include "mlir/Interfaces/ViewLikeInterface.td"
+
 
 // Base class for dialect operations. This operation inherits from the base
 // `Op` class in OpBase.td, and provides:
@@ -23,291 +20,7 @@ include "mlir/Interfaces/ViewLikeInterface.td"
 //   * The mnemonic for the operation, or the name without the dialect prefix.
 //   * A list of traits for the operation.
 class XeGPU_Op<string mnemonic, list<Trait> traits = []>:
-          Op<XeGPU_Dialect, mnemonic, traits> {
-
-  code extraBaseClassDeclaration = [{
-    void printProperties(::mlir::MLIRContext *ctx,
-            ::mlir::OpAsmPrinter &p, const Properties &prop) {
-      Attribute propAttr = getPropertiesAsAttr(ctx, prop);
-      if (propAttr)
-        p << "<" << propAttr << ">";
-    }
-
-    static ::mlir::ParseResult parseProperties(::mlir::OpAsmParser &parser,
-                                     ::mlir::OperationState &result) {
-      if (mlir::succeeded(parser.parseLess())) {
-        if (parser.parseAttribute(result.propertiesAttr) || parser.parseGreater())
-          return failure();
-      }
-      return success();
-    }
-
-  }];
-}
-
-
-def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface, 
-                        AttrSizedOperandSegments, OffsetSizeAndStrideOpInterface]> {
-
-  let summary = "Create nd-tensor descriptor operation";
-  let description = [{
-    The "create_nd_tdesc" operation creates a TensorDescType which represents
-    a sub-view of a 2D memory region (It can be extended to support n-D memory
-    region if needed in future). Elements in the subview continuous in each 
-    dimention. It encodes the following important information for supporting 
-    Intel hardware features:
-
-    * source: an object representing (starting address/pointer of) a 2D memory region. 
-        It can be either a 2D memref object, or simply a pointer represented by uint64_t type.
-        for the later case, the shape and layout information of the 2D memory region should 
-        be explicitly passed via `dynamic_shape` and `dynamic_strides` parameters.
-    * offsets: two index values represents offsets from the "source" at the each dimension 
-        at which the subview of the target memory will be created. It is encoded via two
-        variables, including "dynamic_offsets" and "static_offsets", such that it can
-        accept various forms, such as, operands (e.g., [%c0, %c]) and attributes (e.g., [2, 4])).
-    * shape: the shape information of the memory region pointed by the "source".  It is 
-        typically encoded via the MemRefType of the source, e.g., memref<4096x4096xf16>. 
-        But if "source" is simply a pointer represented as uint64_t type, or a memref 
-        type without shape information e.g., memref<?x?xf16>, the shape information has 
-        to be explicitly passed via the "dynamic_shape" argument. Currently "dynamic_shape" 
-        only accepts operands(e.g., [%c4096, %c4096]), not attributes(e.g., [4096, 4096]).
-    * strides: the strides of the memory region pointed by the "source". Similar to shape, 
-        it is typically encoded via the MemRefType of the source too. But if "source" is 
-        simply a pointer represented as uint64_t type, or a memref type without shape 
-        information e.g., memref<?x?xf16>, the strides information has to be explicitly 
-        passed via the "dynamic_strides" argument. And it currently only accepts operands two.
-
-    Example 1 (suppose the tensor shape inferred by the compiler is 8x16):
-    %0 = memref.alloc() : memref<1024x1024xf32>
-    %c0 = arith.constant 0 : index
-    %c1 = arith.constant 1 : index
-    %1 = xegpu.create_nd_tdesc %0[%c0, %c0]: memref<1024x1024xf32> -> TensorDesc<8x16xf32>
-
-    Example 2 (suppose the tensor shape inferred by the compiler is 8x16):
-    %0 = memref.alloc(%h, %w) : memref<?x?xf32>
-    %c0 = arith.constant 0 : index
-    %c1 = arith.constant 1 : index
-    %1 = xegpu.create_nd_tdesc %0[%c0, %c0], [%h, %w], [%w, %c1]: memref<?x?xf32> -> TensorDesc<8x16xf32>
-
-    Example 3 (suppose the tensor shape inferred by the compiler is 8x16):
-    %0 = ... : ui64
-    %c0 = arith.constant 0 : index
-    %c1 = arith.constant 1 : index
-    %1 = xegpu.create_nd_tdesc %0[%c0, %c0], [%h, %w], [%w, %c1]: ui64 -> TensorDesc<8x16xf32>
-  }];
-
-  let arguments = (ins 
-    XeGPU_BaseAddrType: $source, 
-    Variadic<Index>: $offsets, 
-    Variadic<Index>: $shape, 
-    Variadic<Index>: $strides,
-    DenseI64ArrayAttr: $static_offsets
-  );
-  let results = (outs XeGPU_TensorDesc: $TensorDesc);
-
-  let assemblyFormat = [{
-    $source ``
-    custom<DynamicIndexList>($offsets, $static_offsets)
-    (`,` `[` $shape^ `]` `,` `[` $strides `]`)?
-    attr-dict `:` type($source) `->` qualified(type($TensorDesc))
-  }];
-
-  let hasVerifier = 1;
-
-  let builders = [
-    OpBuilder<(ins "Type": $tdesc, "TypedValue<MemRefType>": $source, 
-                   "llvm::ArrayRef<OpFoldResult>": $offsets)>,
-
-    OpBuilder<(ins "Type": $tdesc, "TypedValue<IntegerType> ": $source, 
-                   "llvm::ArrayRef<OpFoldResult>": $offsets,
-                   "ValueRange": $shape, "ValueRange": $stride)>
-  ];
-
-  let extraClassDeclaration = extraBaseClassDeclaration # [{
-    /// Returns the type of the source memref operand.
-    Type getSourceType() {
-      return getSource().getType();
-    }
-
-    /// Returns the type of the result TensorDesc.
-    xegpu::TensorDescType getType() {
-      return getTensorDesc().getType();
-    }
-
-    /// Return the element type of the TensorDesc
-    Type getElementType() {
-      return getType().getElementType();
-    }
-
-    /// Return the shape of the TensorDesc
-    llvm::ArrayRef<int64_t> getTensorDescShape() {
-      return getType().getShape();
-    }
-
-    /// wrapper for matching with OffsetSizeAndStrideOpInterface
-    OperandRange getSizes() {
-      return getShape();
-    }
-
-    /// wrapper for matching with OffsetSizeAndStrideOpInterface
-    /// If source is IntegerType and `shape` is filled, it will 
-    /// return an array of ShapedType::kDynamic representing dynamic 
-    /// shape encoded in the `shape` argument will be used. Presence
-    /// of `shape` overides static shape from source memref type.
-    SmallVector<int64_t> getStaticSizes() {
-      if (getSourceType().isa<IntegerType>() || getShape().size()) {
-        auto dims = getMixedOffsets().size();
-        return SmallVector<int64_t>(dims, ShapedType::kDynamic);
-      }
-      auto memrefType = getSourceType().dyn_cast<MemRefType>();
-      return SmallVector<int64_t>(memrefType.getShape());
-    }
-
-    /// wrapper for matching with OffsetSizeAndStrideOpInterface
-    /// If source is IntegerType or `strides` is filled, it will 
-    /// return an array of ShapedType::kDynamic representing dynamic 
-    /// strides encoded in the `strides` argument will be used. Presence
-    /// of `strides` overides static strides from source memref type.
-    SmallVector<int64_t> getStaticStrides() {
-      if (getSourceType().isa<IntegerType>() || getStrides().size()) {
-        auto dims = getMixedOffsets().size();
-        return SmallVector<int64_t>(dims, ShapedType::kDynamic);
-      }
-      auto memrefType = getSourceType().dyn_cast<MemRefType>();
-      auto [strides, offset] = getStridesAndOffset(memrefType);
-      return strides;
-    }
-
-    /// Return the expected rank of each of the`static_offsets`, 
-    /// `static_shape` and `static_strides` attributes.
-    std::array<unsigned, 3> getArrayAttrMaxRanks() {
-      unsigned rank;
-      if (auto ty = getSourceType().dyn_cast<MemRefType>()) {
-        rank = ty.getRank();
-      } else {
-        rank = (unsigned)getMixedOffsets().size();
-      }
-      return {rank, rank, rank};
-    }
-    
-    /// Return the number of leading operands before the `offsets`, 
-    /// `shape` and `strides` operands.
-    static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; }
-
-    mlir::Value getViewSource() { return getSource(); }
-  }];
-}
-
-def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> {
-  let summary = "prefetches a nD block to cache";
-  let description = [{
-    It issues an instruction to prefetch the data from memory to each 
-    level of the cache based on their cache policy.
-
-    Example:
-    ```
-      xegpu.prefetch_nd %tdesc {l1_hint = #xegpu.cache_hint<cached>, 
-                                l2_hint = #xegpu.cache_hint<cached>, 
-                                l3_hint = #xegpu.cache_hint<cached>}
-        : !xegpu.tensor_desc<8x16xf16>
-    ```
-
-  }];
-
-  let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
-                       OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
-                       OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
-                       OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
-                       
-  let extraClassDeclaration = extraBaseClassDeclaration;
-
-  let assemblyFormat = "$TensorDesc prop-dict attr-dict `:` qualified(type($TensorDesc))";
-}
-
-
-def XeGPU_LoadNdOp : XeGPU_Op<"load_nd"> {
-  let summary = "loads a n-D block from memory (represented by TensorDesc)" 
-                "to registers (represented by vector)";
-  let description = [{
-    LoadNdOp essentially mimics the hardware block read instruction to read 
-    a block of data from memory to register. It takes a set of optional cache 
-    hints for each level of cache, L1, L2 and L3. If hardware does not have a 
-    correspoding cache, Corresponding cache hint attribute will be masked.
-    vnni transform is an hardware feature for Intel GPU, which is used to 
-    do data packing during the load for B operand of matrix operation, if 
-    the bit width of the data type is less then 32 bits, e.g., fp16. And 
-    transpose is another Intel hardware feature, which will do transpose
-    operation when loading the data if the bit width of the data type is 
-    fp32 or fp64. It implies that vnni and transpose cannot exit at the 
-    same time.
-
-    Example:
-    ```
-      xegpu.load_nd %1 {transpose = [1, 0],
-                        l1_hint = #xegpu.cache_hint<cached>, 
-                        l2_hint = #xegpu.cache_hint<uncached>, 
-                        l3_hint = #xegpu.cache_hint<streaming>}
-              : !xegpu.tensor_desc<8x16xf32> -> vector<16x8xf32>
-    ```
-
-
-  }];
-
-  let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
-                       OptionalAttr<I64Attr>: $vnni_axis,
-                       OptionalAttr<DenseI64ArrayAttr>: $transpose,
-                       OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
-                       OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
-                       OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
-
-  let results = (outs XeGPU_ValueType: $value);
-
-  let extraClassDeclaration = extraBaseClassDeclaration # [{
-    VectorType getType() {
-      return llvm::dyn_cast<VectorType>(getValue().getType());
-    }
-
-    xegpu::TensorDescType getTensorDescType() {
-      return getTensorDesc().getType();
-    }
-  }];
-
-  let assemblyFormat = "$TensorDesc prop-dict attr-dict `:` qualified(type($TensorDesc)) `->` type($value)";
-  let hasVerifier = 1;
-}
-
-def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", []> {
-  let summary = "stores a n-D block register region back to memory, currently only supports 2D";
-
-  let description = [{
-    StoreNdOp essentially mimics the hardware block write instruction io
-    write a block of data from register into the memory region as described 
-    by the TensorDesc. It takes a set of optional cache hints for each level 
-    of cache, L1, L2 and L3. If hardware does not have a correspoding cache, 
-    Corresponding cache hint attribute will be masked.
-
-    Example:
-    ```
-      xegpu.store_nd %3, %2 {l1_hint = #xegpu.cache_hint<uncached>,
-                             l2_hint = #xegpu.cache_hint<write_back>, 
-                             l3_hint = #xegpu.cache_hint<write_through>}
-                             : vector<8x16xf16>, !xegpu.tensor_desc<8x16xf16>
-    ```
-
-
-  }];
-
-  let arguments = (ins XeGPU_ValueType: $value,
-                       XeGPU_TensorDesc: $TensorDesc,
-                       OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
-                       OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
-                       OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
-
-  let extraClassDeclaration = extraBaseClassDeclaration;
+          Op<XeGPU_Dialect, mnemonic, traits>;
 
-  let assemblyFormat = [{$value `,` $TensorDesc prop-dict attr-dict 
-                        `:` type($value) `,` qualified(type($TensorDesc))}];
-  let hasVerifier = 1;
-}
 
 #endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD

diff  --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
index 19ac1693712dd8..1d75bb4e2906fe 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
@@ -9,9 +9,9 @@
 #ifndef MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD
 #define MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD
 
+include "mlir/IR/BuiltinTypes.td"
 include "mlir/Dialect/XeGPU/IR/XeGPUAttrs.td"
 include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td"
-include "mlir/IR/BuiltinTypes.td"
 
 def XeGPU_IntType: AnyTypeOf<[I1, I8, I16, I32, I64, SI1, SI8, SI16, SI32, SI64, UI1, UI8, UI16, UI32, UI64]>;
 def XeGPU_FloatType: AnyTypeOf<[F16, F32, F64, BF16, TF32]>;
@@ -30,106 +30,4 @@ class XeGPUTypeDef<string name, string typeMnemonic, list<Trait> traits = [],
   let mnemonic = typeMnemonic;
 }
 
-def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
-        [ShapedTypeInterface], "::mlir::TensorType"> {
-  let summary = "TensorDesc describing regions of interested data.";
-  let description = [{
-    TensorDesc is a type designed to describe regions of the interested data as well as some 
-    features that are unique to Intel hardware. Different with the builtin tensor type in MLIR, 
-    it essentially only contains the meta data, and doesn't hold the data by itself. It is designed 
-    to mainly support 2D block load/store and DPAS (matrix multiplication instruction) on Intel GPU. 
-    It encodes the following information:
-
-    * shape:  the sizes/shape of the intereted data block, e.g., 8x16 means 8 rows
-              and each row contains 16 contiguous data element. The rows could be
-              either contiguous or not, depends on whether the encoding attribute
-              is set or not.
-    * element_type: the data type of the data element, e.g., f16, f32.
-
-    Similar to the builtin tensor, it also provides an optinal attribute to encoding 
-    the following information via the TensorDescAttr object:
-    * memory_scope (xegpu::MemoryScope): [optional] where the data is located, 
-                global memory or shared memory. It is default to Global.
-    * array_length (int): [optional] The number of contiguous blocks with size as `shape`,
-               that will be loaded by block load at a time. It is default to 1.
-    * boundary_check (bool): [optional] indicates whether the operation detects the boundary 
-                and pads with zero for out-of-boundary access. It is default to do boundary check.
-    
-
-    Syntax:
-
-    ```
-    TensorDesc-type ::= `tensor_desc` `<` dim-list element-type (attr-list)? `>`
-    element-type ::= float-type | integer-type | index-type
-    dim-list := (static-dim-list `x`)?
-    static-dim-list ::= decimal-literal `x` decimal-literal
-    attr-list = (, memory_scope = value)? (, arr_len = value)? (, boundary_check = value)?
-    ```
-
-    Examples:
-
-    ```mlir
-    // A block TensorDesc with 8x16 i32 elements
-    xegpu.tensor_desc<8x16xi32>
-
-    // A block TensorDesc with 8x16 f32 elements
-    xegpu.tensor_desc<8x16xf32>
-
-    // A TensorDesc with 8x16 f32 elements for a memory region in shared memory space.
-    xegpu.tensor_desc<8x16xf32, #xegpu.tdesc_attr<memory_scope = slm>>
-    ```
-  }];
-
-  let parameters = (ins ArrayRefParameter<"int64_t">: $shape,
-                        "mlir::Type": $elementType,
-                        OptionalParameter<"mlir::Attribute">: $encoding);
-
-  let extraClassDeclaration = [{
-    using TensorType::clone;
-    using mlir::ShapedType::Trait<TensorDescType>::getElementTypeBitWidth;
-    using mlir::ShapedType::Trait<TensorDescType>::getRank;
-    using mlir::ShapedType::Trait<TensorDescType>::getNumElements;
-    using mlir::ShapedType::Trait<TensorDescType>::isDynamicDim;
-    using mlir::ShapedType::Trait<TensorDescType>::hasStaticShape;
-    using mlir::ShapedType::Trait<TensorDescType>::getNumDynamicDims;
-    using mlir::ShapedType::Trait<TensorDescType>::getDimSize;
-    using mlir::ShapedType::Trait<TensorDescType>::getDynamicDimIndex;
-
-    TensorDescType clone(::mlir::Type elementType) {
-      return llvm::cast<TensorDescType>(cloneWith(getShape(), elementType));
-    }
-
-    TensorDescAttr getEncodingAsTensorDescAttr() const {
-      return llvm::dyn_cast_if_present<TensorDescAttr>(getEncoding());
-    }
-
-    xegpu::MemoryScope getMemoryScope() const {
-      auto attr = getEncodingAsTensorDescAttr();
-      if (attr && attr.getMemoryScope())
-        return attr.getMemoryScope().getValue();
-      // return default value
-      return MemoryScope::Global;
-    }
-
-    int getArrayLength() {
-      auto attr = getEncodingAsTensorDescAttr();
-      if (attr && attr.getArrayLength())
-        return attr.getArrayLength().getInt();
-      // return default value
-      return 1; 
-    }
-
-    bool getBoundaryCheck() {
-      auto attr = getEncodingAsTensorDescAttr();
-      if (attr && attr.getBoundaryCheck())
-        return attr.getBoundaryCheck().getValue();
-      // return default value
-      return true;
-    }
-  }];
-
-  let hasCustomAssemblyFormat = true;
-  
-}
-
 #endif // MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD

diff  --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 0b3f4b9c9dbeae..4f839ee773476b 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -6,10 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/DialectImplementation.h"
-#include "llvm/ADT/TypeSwitch.h"
+#include <mlir/Dialect/XeGPU/IR/XeGPU.h>
 
 namespace mlir {
 namespace xegpu {
@@ -29,72 +26,8 @@ void XeGPUDialect::initialize() {
       >();
 }
 
-//===----------------------------------------------------------------------===//
-// XeGPU_TensorDescAttr
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// XeGPU_TensorDescType
-//===----------------------------------------------------------------------===//
-mlir::Type TensorDescType::parse(::mlir::AsmParser &parser) {
-  llvm::SmallVector<int64_t> shape;
-  mlir::Type elementType;
-  mlir::FailureOr<mlir::Attribute> encoding;
-
-  // Parse literal '<'
-  if (parser.parseLess())
-    return {};
-
-  auto shapeLoc = parser.getCurrentLocation();
-  if (mlir::failed(parser.parseDimensionList(shape))) {
-    parser.emitError(shapeLoc, "failed to parse parameter 'shape'");
-    return {};
-  }
-
-  auto elemTypeLoc = parser.getCurrentLocation();
-  if (mlir::failed(parser.parseType(elementType))) {
-    parser.emitError(elemTypeLoc, "failed to parse parameter 'elementType'");
-    return {};
-  }
-
-  // parse optional attributes
-  if (mlir::succeeded(parser.parseOptionalComma())) {
-    encoding = mlir::FieldParser<mlir::Attribute>::parse(parser);
-    if (mlir::failed(encoding)) {
-      parser.emitError(
-          parser.getCurrentLocation(),
-          "Failed to parse the attribute field for TensorDescType.\n");
-      return {};
-    }
-  }
-
-  // Parse literal '>'
-  if (parser.parseGreater())
-    return {};
-
-  return TensorDescType::get(parser.getContext(), shape, elementType,
-                             encoding.value_or(mlir::Attribute()));
-}
-
-void TensorDescType::print(::mlir::AsmPrinter &printer) const {
-  printer << "<";
-
-  auto shape = getShape();
-  for (int64_t dim : shape) {
-    if (mlir::ShapedType::isDynamic(dim))
-      printer << '?';
-    else
-      printer << dim;
-    printer << 'x';
-  }
-
-  printer << getElementType();
-
-  if (auto encoding = getEncoding())
-    printer << ", " << encoding;
-
-  printer << ">";
-}
+// this file is for position occupation,
+// we will add functions in following PRs.
 
 } // namespace xegpu
 } // namespace mlir

diff  --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index e6ecf26c224818..b356c397fb8369 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -6,186 +6,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Dialect/Utils/StaticValueUtils.h"
-#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
-#include "mlir/IR/Builders.h"
+#include <mlir/Dialect/XeGPU/IR/XeGPU.h>
 
 #define DEBUG_TYPE "xegpu"
 
 namespace mlir {
 namespace xegpu {
 
-static void transpose(llvm::ArrayRef<int64_t> trans,
-                      std::vector<int64_t> &shape) {
-  std::vector<int64_t> old = shape;
-  for (size_t i = 0; i < trans.size(); i++)
-    shape[i] = old[trans[i]];
-}
-
-template <typename T>
-static std::string makeString(T array, bool breakline = false) {
-  std::string buf;
-  buf.clear();
-  llvm::raw_string_ostream os(buf);
-  os << "[";
-  for (size_t i = 1; i < array.size(); i++) {
-    os << array[i - 1] << ", ";
-    if (breakline)
-      os << "\n\t\t";
-  }
-  os << array.back() << "]";
-  os.flush();
-  return buf;
-}
-
-//===----------------------------------------------------------------------===//
-// XeGPU_CreateNdDescOp
-//===----------------------------------------------------------------------===//
-void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
-                           Type tdesc, TypedValue<MemRefType> source,
-                           llvm::ArrayRef<OpFoldResult> offsets) {
-  auto ty = source.getType();
-  (void)ty;
-  assert(ty && ty.hasStaticShape() && offsets.size() == (size_t)ty.getRank());
-
-  llvm::SmallVector<int64_t> staticOffsets;
-  llvm::SmallVector<Value> dynamicOffsets;
-  dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
-
-  build(builder, state, tdesc, source, dynamicOffsets /* dynamic offsets */,
-        ValueRange({}) /* empty dynamic shape */,
-        ValueRange({}) /* empty dynamic strides */,
-        staticOffsets /* static offsets */);
-}
-
-void CreateNdDescOp::build(OpBuilder &builder, OperationState &state,
-                           Type tdesc, TypedValue<IntegerType> source,
-                           llvm::ArrayRef<OpFoldResult> offsets,
-                           ValueRange shape, ValueRange stride) {
-  assert(shape.size() && offsets.size() && stride.size() &&
-         shape.size() == stride.size() && shape.size() == offsets.size());
-
-  llvm::SmallVector<int64_t> staticOffsets;
-  llvm::SmallVector<Value> dynamicOffsets;
-
-  dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
-
-  build(builder, state, tdesc, source, /* dynamic_offsets = */ dynamicOffsets,
-        /* dynamic shape = */ shape, /* dynamic strides = */ stride,
-        /* static offsets = */ staticOffsets);
-}
-
-LogicalResult CreateNdDescOp::verify() {
-  auto rank = (int64_t)getMixedOffsets().size();
-  bool invalidRank = (rank != 2);
-  bool invalidElemTy = false;
-
-  // check source type matches the rank if it is a memref.
-  // It also should have the same ElementType as TensorDesc.
-  auto memrefTy = getSourceType().dyn_cast<MemRefType>();
-  if (memrefTy) {
-    invalidRank |= (memrefTy.getRank() != rank);
-    invalidElemTy |= memrefTy.getElementType() != getElementType();
-  }
-
-  // check result type matches the rank
-  invalidRank = (getType().getRank() != rank);
-
-  // mismatches among shape, strides, and offsets are
-  // already handeled by OffsetSizeAndStrideOpInterface.
-  // So they are not check here.
-  if (invalidRank)
-    return emitOpError(
-        "Expecting the rank of shape, strides, offsets, "
-        "source memref type (if source is a memref) and TensorDesc "
-        "should match with each other. They currenlty are 2D.");
-
-  if (invalidElemTy)
-    return emitOpError("TensorDesc should have the same element "
-                       "type with the source if it is a memref.\n");
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// XeGPU_LoadNdOp
-//===----------------------------------------------------------------------===//
-LogicalResult LoadNdOp::verify() {
-  auto tdescTy = getTensorDescType();
-  auto valueTy = getType();
-
-  if (tdescTy.getRank() != 2)
-    return emitOpError(
-        "The TensorDesc for LoadNdOp should be a 2D TensorDesc.");
-
-  if (!valueTy)
-    return emitOpError("Invalid result, it should be a VectorType.\n");
-
-  auto tdescElemTy = tdescTy.getElementType();
-  auto valueElemTy = valueTy.getElementType();
-
-  if (tdescElemTy != valueElemTy)
-    return emitOpError(
-        "Value should have the same element type as TensorDesc.");
-
-  auto array_len = tdescTy.getArrayLength();
-  auto tdescShape = tdescTy.getShape().vec();
-  auto valueShape = valueTy.getShape().vec();
-
-  if (getTranspose()) {
-    auto trans = getTranspose().value();
-    if (tdescShape.size() >= trans.size())
-      transpose(trans, tdescShape);
-    else
-      emitWarning("Invalid transpose attr. It is ignored.");
-  }
-
-  if (getVnniAxis()) {
-    auto axis = getVnniAxis().value();
-    auto vnni_factor = valueShape.back();
-    tdescShape[axis] /= vnni_factor;
-    tdescShape.push_back(vnni_factor);
-  }
-
-  if (array_len > 1) {
-    auto it = tdescShape.begin();
-    tdescShape.insert(it, array_len);
-  }
-
-  if (tdescShape != valueShape)
-    return emitOpError() << "Result shape doesn't match TensorDesc shape."
-                         << "The expected shape is " << makeString(tdescShape)
-                         << ". But the given shape is "
-                         << makeString(valueShape) << ".\n";
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// XeGPU_StoreNdOp
-//===----------------------------------------------------------------------===//
-LogicalResult StoreNdOp::verify() {
-  auto dstTy = getTensorDesc().getType();               // Tile
-  auto valTy = getValue().getType().cast<VectorType>(); // Vector
-
-  if (dstTy.getRank() != 2)
-    return emitOpError("Expecting a 2D TensorDesc shape.\n");
-
-  if (!valTy)
-    return emitOpError("Exepcting a VectorType result.\n");
-
-  auto dstElemTy = dstTy.getElementType();
-  auto valElemTy = valTy.getElementType();
-
-  if (dstElemTy != valElemTy) {
-    return emitOpError() << "The element type of the value should "
-                            "match the elementtype of the TensorDesc.\n";
-  }
-
-  if (dstTy.getShape() != valTy.getShape())
-    return emitOpError()
-           << "The result shape should match the TensorDesc shape.\n";
-  return success();
-}
+// this file is for position occupation,
+// we will add functions in following PRs.
 
 } // namespace xegpu
 } // namespace mlir

diff  --git a/mlir/test/Dialect/XeGPU/XeGPUOps.mlir b/mlir/test/Dialect/XeGPU/XeGPUOps.mlir
deleted file mode 100644
index 039346adbb851c..00000000000000
--- a/mlir/test/Dialect/XeGPU/XeGPUOps.mlir
+++ /dev/null
@@ -1,62 +0,0 @@
-// RUN: mlir-opt %s | FileCheck %s
-// Verify the printed output can be parsed.
-// RUN: mlir-opt %s | mlir-opt | FileCheck %s
-// Verify the generic form can be parsed.
-// RUN: mlir-opt -mlir-print-op-generic %s | mlir-opt | FileCheck %s
-
-// CHECK-LABEL: gpu.module @test {
-gpu.module @test {
-// CHECK: gpu.func @test_create_nd_tdesc_vc_1(%[[arg0:.*]]: memref<24x32xf32>) {
-gpu.func @test_create_nd_tdesc_vc_1(%src: memref<24x32xf32>) {
-  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32>
-  %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32>
-  gpu.return
-}
-
-// CHECK: gpu.func @test_create_nd_tdesc_vc_2(%[[arg0:.*]]: ui64, %[[arg1:.*]]: index, %[[arg2:.*]]: index, %[[arg3:.*]]: index, %[[arg4:.*]]: index) {
-gpu.func @test_create_nd_tdesc_vc_2(%src: ui64, %w : index, %h : index, %x : index, %y : index) {
-  //CHECK: %[[C:.*]] = arith.constant 1 : index
-  %c1 = arith.constant 1 : index
-  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][%[[arg3]], %[[arg4]]], [%[[arg2]], %[[arg1]]], [%[[arg1]], %[[C]]] : ui64 -> !xegpu.tensor_desc<8x16xf32>
-  %1 = xegpu.create_nd_tdesc %src[%x, %y], [%h, %w], [%w, %c1] : ui64 -> !xegpu.tensor_desc<8x16xf32>
-  gpu.return
-}
-
-// CHECK: gpu.func @test_create_nd_tdesc_vc_3(%[[arg0:.*]]: memref<24x32xf32>) {
-gpu.func @test_create_nd_tdesc_vc_3(%src: memref<24x32xf32>) {
-  // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x16xf32, #xegpu.tdesc_attr<array_length = 2 : i64>
-  %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<24x16xf32, #xegpu.tdesc_attr<array_length = 2>>
-  gpu.return
-}
-
-// CHECK: gpu.func @test_prefetch_nd_vc(%[[arg0:.*]]: memref<24x32xf16>) {
-gpu.func @test_prefetch_nd_vc(%src: memref<24x32xf16>) {
-  // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16>
-  %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16>
-  // CHECK: xegpu.prefetch_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<8x16xf16>
-  xegpu.prefetch_nd %1 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>: !xegpu.tensor_desc<8x16xf16>
-  gpu.return
-}
-
-// CHECK: func @test_load_nd_vc(%[[arg0:.*]]: memref<8x16xf16>) {
-gpu.func @test_load_nd_vc(%src: memref<8x16xf16>) {
-  // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
-  %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
-  // CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>, vnni_axis = 0 : i64}> : !xegpu.tensor_desc<8x16xf16> -> vector<4x16x2xf16>
-  %2 = xegpu.load_nd %1 <{vnni_axis = 0, l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
-       : !xegpu.tensor_desc<8x16xf16> -> vector<4x16x2xf16>
-  gpu.return
-}
-
-// CHECK: func @test_store_nd_vc(%[[arg0:.*]]: memref<24x32xf16>) {
-gpu.func @test_store_nd_vc(%dst: memref<24x32xf16>) {
-  // CHECK: %[[C:.*]] = arith.constant dense<1.000000e+00> : vector<24x32xf16>
-  %1 = arith.constant dense<1.0>: vector<24x32xf16>
-  // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<24x32xf16>
-  %2 = xegpu.create_nd_tdesc %dst[0, 0] : memref<24x32xf16> -> !xegpu.tensor_desc<24x32xf16>
-  // CHECK: xegpu.store_nd %[[C]], %[[R0]] <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>}> : vector<24x32xf16>, !xegpu.tensor_desc<24x32xf16>
-  xegpu.store_nd %1, %2 <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>}>: vector<24x32xf16>, !xegpu.tensor_desc<24x32xf16>
-  gpu.return
-}
-
-}
\ No newline at end of file