[Mlir-commits] [mlir] [MLIR] Add XeGPU dialect for Intel GPU (PR #78483)

Wed Jan 17 09:58:51 PST 2024

llvmbot wrote:



@llvm/pr-subscribers-mlir-gpu

@llvm/pr-subscribers-mlir

Author: None (chencha3)

<details>
<summary>Changes</summary>

This PR follows our previous [RFC ](https://discourse.llvm.org/t/rfc-add-xegpu-dialect-for-intel-gpus/75723) to add XeGPU dialect definition for Intel GPUs. It contains dialect, type, attributes and operators definitions, as well as testcases for semantic checks. The lowering and optimization passes will be issued with separated passes. 

---

Patch is 194.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78483.diff


35 Files Affected:

- (modified) mlir/include/mlir/Dialect/CMakeLists.txt (+1) 
- (added) mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt (+1) 
- (added) mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt (+14) 
- (added) mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h (+52) 
- (added) mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.td (+14) 
- (added) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td (+150) 
- (added) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td (+46) 
- (added) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td (+505) 
- (added) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td (+170) 
- (modified) mlir/include/mlir/InitAllDialects.h (+3-1) 
- (modified) mlir/lib/Dialect/CMakeLists.txt (+1) 
- (added) mlir/lib/Dialect/XeGPU/CMakeLists.txt (+1) 
- (added) mlir/lib/Dialect/XeGPU/IR/CMakeLists.txt (+15) 
- (added) mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp (+385) 
- (added) mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp (+1929) 
- (added) mlir/test/Dialect/XeGPU/IR/XeGPUOps.mlir (+110) 
- (added) mlir/test/Dialect/XeGPU/IR/atomic_rmw.mlir (+43) 
- (added) mlir/test/Dialect/XeGPU/IR/atomic_rmw_vc.mlir (+38) 
- (added) mlir/test/Dialect/XeGPU/IR/barrier_ops.mlir (+54) 
- (added) mlir/test/Dialect/XeGPU/IR/create_nd_tdesc.mlir (+111) 
- (added) mlir/test/Dialect/XeGPU/IR/create_nd_tdesc_vc.mlir (+115) 
- (added) mlir/test/Dialect/XeGPU/IR/create_tdesc.mlir (+11) 
- (added) mlir/test/Dialect/XeGPU/IR/create_tdesc_vc.mlir (+51) 
- (added) mlir/test/Dialect/XeGPU/IR/invalid_vc.mlir (+70) 
- (added) mlir/test/Dialect/XeGPU/IR/load_gather_vc.mlir (+50) 
- (added) mlir/test/Dialect/XeGPU/IR/load_nd.mlir (+164) 
- (added) mlir/test/Dialect/XeGPU/IR/load_nd_vc.mlir (+69) 
- (added) mlir/test/Dialect/XeGPU/IR/prefetch_nd_vc.mlir (+62) 
- (added) mlir/test/Dialect/XeGPU/IR/simple_gemm.mlir (+71) 
- (added) mlir/test/Dialect/XeGPU/IR/simple_gemm_vc.mlir (+65) 
- (added) mlir/test/Dialect/XeGPU/IR/store_nd_vc.mlir (+83) 
- (added) mlir/test/Dialect/XeGPU/IR/store_scatter.mlir (+29) 
- (added) mlir/test/Dialect/XeGPU/IR/store_scatter_vc.mlir (+29) 
- (added) mlir/test/Dialect/XeGPU/IR/update_nd_offset.mlir (+27) 
- (added) mlir/test/Dialect/XeGPU/IR/update_offset_vc.mlir (+29) 


``````````diff

diff --git a/mlir/include/mlir/Dialect/CMakeLists.txt b/mlir/include/mlir/Dialect/CMakeLists.txt
index 1c4569ecfa5848..e0eb421291ded7 100644
--- a/mlir/include/mlir/Dialect/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/CMakeLists.txt
@@ -39,3 +39,4 @@ add_subdirectory(UB)
 add_subdirectory(Utils)
 add_subdirectory(Vector)
 add_subdirectory(X86Vector)
+add_subdirectory(XeGPU)
diff --git a/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt b/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt
new file mode 100644
index 00000000000000..f33061b2d87cff
--- /dev/null
+++ b/mlir/include/mlir/Dialect/XeGPU/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(IR)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt
new file mode 100644
index 00000000000000..f1740e9ed929a6
--- /dev/null
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/CMakeLists.txt
@@ -0,0 +1,14 @@
+add_mlir_dialect(XeGPU xegpu)
+add_mlir_doc(XeGPU XeGPU Dialects/ -gen-dialect-doc -dialect=xegpu)
+
+set(LLVM_TARGET_DEFINITIONS XeGPU.td)
+mlir_tablegen(XeGPUAttrs.h.inc -gen-attrdef-decls)
+mlir_tablegen(XeGPUAttrs.cpp.inc -gen-attrdef-defs)
+add_public_tablegen_target(MLIRXeGPUAttrsIncGen)
+add_dependencies(mlir-headers MLIRXeGPUAttrsIncGen)
+
+set(LLVM_TARGET_DEFINITIONS XeGPU.td)
+mlir_tablegen(XeGPUEnums.h.inc -gen-enum-decls)
+mlir_tablegen(XeGPUEnums.cpp.inc -gen-enum-defs)
+add_public_tablegen_target(MLIRXeGPUEnumsIncGen)
+add_dependencies(mlir-headers MLIRXeGPUEnumsIncGen)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
new file mode 100644
index 00000000000000..a05e046a0e0c0b
--- /dev/null
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
@@ -0,0 +1,52 @@
+//===- XeGPU.h - MLIR dialect for XeGPU -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_XEGPU_IR_XEGPU_H
+#define MLIR_DIALECT_XEGPU_IR_XEGPU_H
+
+#include <mlir/IR/BuiltinTypes.h>
+#include <mlir/IR/Dialect.h>
+#include <mlir/IR/OpDefinition.h>
+#include <mlir/IR/Region.h>
+#include <mlir/IR/Types.h>
+#include <mlir/Interfaces/CastInterfaces.h>
+#include <mlir/Interfaces/ControlFlowInterfaces.h>
+#include <mlir/Interfaces/CopyOpInterface.h>
+#include <mlir/Interfaces/InferTypeOpInterface.h>
+#include <mlir/Interfaces/ShapedOpInterfaces.h>
+#include <mlir/Interfaces/SideEffectInterfaces.h>
+#include <mlir/Interfaces/ViewLikeInterface.h>
+
+namespace mlir {
+
+/// Return the list of Range (i.e. offset, size, stride). Each Range
+/// entry contains either the dynamic value or a ConstantIndexOp constructed
+/// with `b` at location `loc`.
+SmallVector<Range, 8> getOrCreateRanges(OffsetSizeAndStrideOpInterface op,
+                                        OpBuilder &b, Location loc);
+
+} // namespace mlir
+
+namespace mlir {
+namespace xegpu {
+
+class TensorDescType;
+
+} // namespace xegpu
+} // namespace mlir
+
+#include <mlir/Dialect/XeGPU/IR/XeGPUDialect.h.inc>
+#include <mlir/Dialect/XeGPU/IR/XeGPUEnums.h.inc>
+#define GET_ATTRDEF_CLASSES
+#include <mlir/Dialect/XeGPU/IR/XeGPUAttrs.h.inc>
+#define GET_TYPEDEF_CLASSES
+#include <mlir/Dialect/XeGPU/IR/XeGPUTypes.h.inc>
+#define GET_OP_CLASSES
+#include <mlir/Dialect/XeGPU/IR/XeGPU.h.inc>
+
+#endif // MLIR_DIALECT_XEGPU_IR_XEGPU_H
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.td
new file mode 100644
index 00000000000000..232e962870716c
--- /dev/null
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.td
@@ -0,0 +1,14 @@
+//===- XeGPU.td - XeGPU dialect definition ------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_XEGPU_IR_XEGPU_TD
+#define MLIR_DIALECT_XEGPU_IR_XEGPU_TD
+
+include "mlir/Dialect/XeGPU/IR/XeGPUOps.td"
+
+#endif // MLIR_DIALECT_XEGPU_IR_XEGPU_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
new file mode 100644
index 00000000000000..ed3d9bbc772567
--- /dev/null
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -0,0 +1,150 @@
+//===- XeGPUAttrs.td - XeGPU dialect attributes definition --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
+#define MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
+
+include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td"
+include "mlir/IR/EnumAttr.td"
+
+class XeGPUAttr<string name, string attrMnemonic, list<Trait> traits = [],
+                string baseCppClass = "::mlir::Attribute">
+    : AttrDef<XeGPU_Dialect, name, traits, baseCppClass> {
+  let mnemonic = attrMnemonic;
+}
+
+def XeGPU_ScatteredAttr : XeGPUAttr<"Scattered", "scattered"> {
+  let summary = "Scattered attribute for scattered read and write operation.";
+  let description = [{An attribute represent scattered read and write operation.
+    It does not (need to) have meaningful input values. The existence of itself
+    implies scattered read/write.}];
+
+  let assemblyFormat = "";
+}
+
+def XeGPU_SgMapAttr: XeGPUAttr<"SubGroupMap", "sg_map"> {
+  let parameters = (ins
+        "mlir::DenseI32ArrayAttr":$wi_layout,
+        "mlir::DenseI32ArrayAttr":$wi_data
+      );
+
+  // In format of #xegpu.sg_map<{mma_block_size = [2, 4], wi_layout = [2, 4], wi_data = [2, 4]}>
+  let assemblyFormat = "`<` struct(params) `>`";
+
+  let genVerifyDecl = true;
+
+  let builders = [
+    AttrBuilder<(ins
+      "llvm::ArrayRef<int32_t>":$wiLayout,
+      "llvm::ArrayRef<int32_t>":$wiData
+    )>
+  ];
+}
+
+def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> {
+  let parameters = (ins
+        DefaultValuedParameter<"xegpu::MemoryScopeKind", "xegpu::MemoryScopeKind::GLOBAL">: $memory_scope,
+        DefaultValuedParameter<"int", "1">: $array_length,
+        DefaultValuedParameter<"bool", "true">: $boundary_check,
+        OptionalParameter<"xegpu::ScatteredAttr">: $scattered,
+        OptionalParameter<"xegpu::SubGroupMapAttr"> : $map
+      );
+
+  let builders = [
+    AttrBuilder<(ins
+      CArg<"xegpu::MemoryScopeKind", "xegpu::MemoryScopeKind::GLOBAL">:$memory_scope,
+      CArg<"int", "1">:$array_length,
+      CArg<"xegpu::ScatteredAttr", "{}">:$scattered,
+      CArg<"xegpu::SubGroupMapAttr", "{}">:$map
+    )>
+  ];
+
+  let extraClassDeclaration = [{
+    bool hasNonDefaultAttrs();
+  }];
+
+  let hasCustomAssemblyFormat = true;
+}
+
+def ARG_TYPE_VECTOR : I32EnumAttrCase<"VECTOR", 0, "vector">; 
+def ARG_TYPE_SCALAR : I32EnumAttrCase<"SCALAR", 1, "scalar">; 
+def XeGPU_ArgTypeKind : I32EnumAttr<"ArgTypeKind", 
+               "Argument type for Invoke_SIMD op", 
+  [ARG_TYPE_VECTOR, ARG_TYPE_SCALAR]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::mlir::xegpu";
+}
+
+def MODE_SIMT : I32EnumAttrCase<"SIMT", 0, "simt">;
+def MODE_VC : I32EnumAttrCase<"VC", 1, "vc">;
+def XeGPU_ModeKind : I32EnumAttr<"ModeKind", 
+             "The Mode an operator runs on", 
+  [MODE_SIMT, MODE_VC]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::mlir::xegpu";
+}
+
+def MEMORY_SCOPE_GLOBAL: I32EnumAttrCase<"GLOBAL", 0, "global">;
+def MEMORY_SCOPE_SHARED: I32EnumAttrCase<"SLM", 1, "slm">;
+def XeGPU_MemoryScopeKind: I32EnumAttr<"MemoryScopeKind", 
+      "The scope of the memory the tensor descritor is created for", 
+  [MEMORY_SCOPE_GLOBAL, MEMORY_SCOPE_SHARED]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::mlir::xegpu";
+}
+
+def CACHE_KIND_CACHED:        I32EnumAttrCase<"CACHED", 0, "cached">;                    // valid for read and write
+def CACHE_KIND_UNCACHED:      I32EnumAttrCase<"UNCACHED", 1, "uncached">;                // valid for read and write
+def CACHE_KIND_STREAMING:     I32EnumAttrCase<"STREAMING", 2, "streaming">;              // valid for read only
+def CACHE_KIND_INVALIDATE:    I32EnumAttrCase<"READ_INVALIDATE", 3, "read_invalidate">;  // valid for read only
+def CACHE_KIND_WRITE_BACK:    I32EnumAttrCase<"WRITE_BACK", 4, "write_back">;            // valid for write only
+def CACHE_KIND_WRITE_THROUGH: I32EnumAttrCase<"WRITE_THROUGH", 5, "write_through">;      // valid for write only
+
+
+
+def XeGPU_CacheKind : I32EnumAttr<"CacheKind", "Cache kind", 
+  [CACHE_KIND_CACHED, CACHE_KIND_UNCACHED, 
+   CACHE_KIND_STREAMING, CACHE_KIND_INVALIDATE,
+   CACHE_KIND_WRITE_BACK, CACHE_KIND_WRITE_THROUGH]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::mlir::xegpu";
+}
+
+def XeGPU_ArgTypeAttr : EnumAttr<XeGPU_Dialect, XeGPU_ArgTypeKind, "arg_type_kind">;
+def XeGPU_ModeAttr : EnumAttr<XeGPU_Dialect, XeGPU_ModeKind, "mode_kind">;
+def XeGPU_MemoryScopeAttr : EnumAttr<XeGPU_Dialect, XeGPU_MemoryScopeKind, "memory_scope_kind">;
+def XeGPU_CacheAttr : EnumAttr<XeGPU_Dialect, XeGPU_CacheKind, "cache_kind">;
+
+// RMW kind attribute
+def ATOMIC_RMW_KIND_ADDF    : I32EnumAttrCase<"addf", 0>;
+def ATOMIC_RMW_KIND_ADDI    : I32EnumAttrCase<"addi", 1>;
+def ATOMIC_RMW_KIND_ASSIGN  : I32EnumAttrCase<"assign", 2>;
+def ATOMIC_RMW_KIND_MAXF    : I32EnumAttrCase<"maxf", 3>;
+def ATOMIC_RMW_KIND_MAXS    : I32EnumAttrCase<"maxs", 4>;
+def ATOMIC_RMW_KIND_MAXU    : I32EnumAttrCase<"maxu", 5>;
+def ATOMIC_RMW_KIND_MINF    : I32EnumAttrCase<"minf", 6>;
+def ATOMIC_RMW_KIND_MINS    : I32EnumAttrCase<"mins", 7>;
+def ATOMIC_RMW_KIND_MINU    : I32EnumAttrCase<"minu", 8>;
+def ATOMIC_RMW_KIND_MULF    : I32EnumAttrCase<"mulf", 9>;
+def ATOMIC_RMW_KIND_MULI    : I32EnumAttrCase<"muli", 10>;
+def ATOMIC_RMW_KIND_ORI     : I32EnumAttrCase<"ori", 11>;
+def ATOMIC_RMW_KIND_ANDI    : I32EnumAttrCase<"andi", 12>;
+
+def XeGPU_AtomicRMWKind : I32EnumAttr<"AtomicRMWKind", 
+                       "Operation type for AtomicRMW",
+  [ATOMIC_RMW_KIND_ADDF, ATOMIC_RMW_KIND_ADDI, ATOMIC_RMW_KIND_ASSIGN,
+   ATOMIC_RMW_KIND_MAXF, ATOMIC_RMW_KIND_MAXS, ATOMIC_RMW_KIND_MAXU,
+   ATOMIC_RMW_KIND_MINF, ATOMIC_RMW_KIND_MINS, ATOMIC_RMW_KIND_MINU,
+   ATOMIC_RMW_KIND_MULF, ATOMIC_RMW_KIND_MULI, ATOMIC_RMW_KIND_ORI,
+   ATOMIC_RMW_KIND_ANDI]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::mlir::xegpu";
+}
+def XeGPU_AtomicRMWKindAttr : EnumAttr<XeGPU_Dialect, XeGPU_AtomicRMWKind, "atomic_rmw_kind">;
+
+#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
new file mode 100644
index 00000000000000..f85ccb32cc43b0
--- /dev/null
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
@@ -0,0 +1,46 @@
+//===- XeGPUDialect.td - XeGPU dialect definition -----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD
+#define MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD
+
+include "mlir/IR/OpBase.td"
+include "mlir/IR/OpAsmInterface.td"
+include "mlir/IR/AttrTypeBase.td"
+include "mlir/IR/BuiltinTypes.td"
+include "mlir/IR/BuiltinTypeInterfaces.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/ViewLikeInterface.td"
+include "mlir/Interfaces/CastInterfaces.td"
+include "mlir/Interfaces/ControlFlowInterfaces.td"
+include "mlir/Interfaces/CopyOpInterface.td"
+include "mlir/Interfaces/InferTypeOpInterface.td"
+include "mlir/Interfaces/ShapedOpInterfaces.td"
+
+def XeGPU_Dialect : Dialect {
+    let name = "xegpu";
+    let cppNamespace = "::mlir::xegpu";
+    let summary = "The XeGPU dialect that models Intel GPU's ISA";
+    let description = [{
+      The XeGPU dialect models Intel Xe ISA semantics but works at vector and
+      TensorDesc data type. It provides 1:1 mappings to match Xe instructions 
+      like DPAS and 2D block load. The matrix size being processed at this level
+      exactly matches the hardware instructions or the intrinsic supported by
+      the lower-level GPU compiler.
+    }];
+
+    let dependentDialects = [
+      "arith::ArithDialect",
+      "memref::MemRefDialect"
+    ];
+
+    let useDefaultTypePrinterParser = true;
+    let useDefaultAttributePrinterParser = true;
+}
+
+#endif // MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
new file mode 100644
index 00000000000000..766590f6a3f878
--- /dev/null
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -0,0 +1,505 @@
+//===- XeGPUOps.td - XeGPU dialect operations definition ----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
+#define MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
+
+include "mlir/Dialect/XeGPU/IR/XeGPUAttrs.td"
+include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td"
+include "mlir/Dialect/XeGPU/IR/XeGPUTypes.td"
+
+
+// Base class for dialect operations. This operation inherits from the base
+// `Op` class in OpBase.td, and provides:
+//   * The parent dialect of the operation.
+//   * The mnemonic for the operation, or the name without the dialect prefix.
+//   * A list of traits for the operation.
+class XeGPU_Op<string mnemonic, list<Trait> traits = []>:
+          Op<XeGPU_Dialect, mnemonic, traits>;
+
+def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSegments]> {
+
+  let summary = "create nd tensor descriptor operation";
+  let description = [{
+    The "create_nd_tdesc" operation creates a TensorDescType which represents
+    a sub-view of a 2D memory region (It can be extended to support N-D memory
+    region if needed in future). Elements in the subview continuous in each 
+    dimention. It encodes the following important information for supporting 
+    Intel hardware features:
+
+    * source: an object representing (starting address/pointer of) a 2D memory reagion. 
+        It can be either a 2D memref object, or simply a pointer represented by uint64_t type.
+    * offsets: two index values represents offsets from the "source" at the each dimension 
+        at which the subview of the target memory will be created. It is encoded via two
+        variables, including "dynamic_offsets" and "static_offsets", such that it can
+        accept various forms, such as, operands (e.g., [%c0, %c]) and attributes (e.g., [2, 4])).
+    * shape: the shape information of the memory region pointed by the "source".  It is 
+        typically encoded via the MemRefType of the source, e.g., memref<4096x4096xf16>. 
+        But if "source" is simply a pointer represented as uint64_t type, or a memref 
+        type without shape information e.g., memref<?x?xf16>, the shape information has 
+        to be explicitly passed via the "dynamic_shape" argument. Currently "dynamic_shape" 
+        only accepts operands(e.g., [%c4096, %c4096]), not attributes(e.g., [4096, 4096]).
+    * strides: the strides of the memory region pointed by the "source". Similar to shape, 
+        it is typically encoded via the MemRefType of the source too. But if "source" is 
+        simply a pointer represented as uint64_t type, or a memref type without shape 
+        information e.g., memref<?x?xf16>, the strides information has to be explicitly 
+        passed via the "dynamic_strides" argument. And it currently only accepts operands two.
+
+    Example 1 (suppose the tensor shape inferred by the compiler is 8x16):
+    %0 = memref.alloc() : memref<32x24xf32>
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+    %1 = xegpu.create_nd_tdesc %0[%c0, %c1]: memref<32x24xf32> -> TensorDesc<8x16xf32>
+
+    Example 2 (suppose the tensor shape inferred by the compiler is 8x16):
+    %0 = memref.alloc(%h, %w) : memref<?x?xf32>
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+    %1 = xegpu.create_nd_tdesc %0[%c0, %c1], [%h, %w], [%w, %c1]: memref<?x?xf32> -> TensorDesc<8x16xf32>
+
+    Example 3 (suppose the tensor shape inferred by the compiler is 8x16):
+    %0 = ... : ui64
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+    %1 = xegpu.create_nd_tdesc %0[%c0, %c1], [%h, %w], [%w, %c1]: ui64 -> TensorDesc<8x16xf32>
+  }];
+
+  let arguments = (ins XeGPU_BaseAddrType: $source, 
+                 Variadic<Index>: $dynamic_offsets, 
+                 Variadic<Index>: $dynamic_shape, 
+                 Variadic<Index>: $dynamic_strides,
+                 DenseI64ArrayAttr: $static_offsets,
+                 DefaultValuedAttr<XeGPU_ModeAttr, "xegpu::ModeKind::SIMT">: $mode);
+  let results = (outs XeGPU_TensorDesc:$TensorDesc);
+
+  let hasCustomAssemblyFormat = 1;
+  let skipDefaultBuilders = 1;
+  let hasVerifier = 1;
+
+  let builders = [
+    OpBuilder<(ins "Type": $TensorDesc, "Value": $source, "ValueRange": $offsets, 
+                   "ValueRange": $shape, "ValueRange": $strides, 
+                   "llvm::ArrayRef<int64_t>": $static_offsets,
+                    CArg<"xegpu::ModeKind", "xegpu::ModeKind::SIMT">: $mode)>,
+
+    OpBuilder<(ins "Type": $tdesc, "Value": $source, 
+                   "llvm::ArrayRef<OpFoldResult>": $offsets,
+                   CArg<"xegpu::ModeKind", "xegpu::ModeKind::SIMT">: $mode)>,
+
+    OpBuilder<(ins "Type": $tdesc, "Value": $source, 
+                   "llvm::ArrayRef<OpFoldResult>": $offsets,
+                   "ValueRange": $shape, "ValueRange": $stride,
+                   CArg<"xegpu::ModeKind", "xegpu::ModeKind::SIMT">: $mode)>
+  ];
+
+  let extraClassDeclaration = [{
+    /// Returns the type of the source memref operand.
+    Type getSourceType() {
+      return getSource().getType();
+    }
+
+    /// Returns the type of the result TensorDesc.
+    xegpu::TensorDescType getTensorDescType();
+
+    /// Returns the offsets info to the source. It consolidates
+    /// information from both dynamic_offsets and static_offsets
+    /// parameters. static_offsets parameter always has the expected
+    /// ranks with some dim could have ShapeType::kDynamic value
+    /// indicating the corresponding value should be from dynamic_offsets.
+    llvm::SmallVector<OpFoldResult> getOffsets();
+
+    /// returns the shape info of the source. It is either from the
+    /// memref type, if source is a memref with static shape
+    /// information or from the dynamic_shape parameter. If both
+    /// exists, the dynamic_shape parameter will be used and the
+    /// shape information from  memref type will be ignored.
+    llvm::SmallVector<OpFoldResult> getShape();
+
+    /// returns the strides info of the source. It is either from the
+    /// memref type, if source is a memref with static shape
+    /// information or from the dynamic_stride parameter. If both
+    /// exists, the dynamic_strides parameter will be used and the
+    /// strides information from  memref type will be ignored.
+    llvm::SmallVector<OpFoldResult> getStrides();
+
+    /// return the shape embeded in the memref type of the source.
+    /// If source is not memref type. array of kDynamic will be returned.
+    llvm::ArrayRef<int64_t> getStaticShape();
+
+    /// return the strides embeded in the memref type of the source.
+    /// If source is not memref type. array of kDynamic will be returned.
+    llvm::ArrayRef<int64_t> getStaticStrides();
+
+    /// Return the element type of the TensorDesc
+    Type getElementType();
+
+    /// Return the shape of the TensorDesc
+    llvm::ArrayRef<int64_t> ge...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/78483