[Mlir-commits] [mlir] Revert "[MLIR][XeGPU] Adding XeGPU 2d block operators (#84692)" (PR #85653)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Mon Mar 18 08:48:56 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
Author: Balaji V. Iyer. (bviyer)
<details>
<summary>Changes</summary>
This reverts commit daebe5c4f27ba140ac8d13abf41e3fe4db72b91a.
This commit causes the following asan issue:
```
<snip>/llvm-project/build/bin/mlir-opt <snip>/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir | <snip>/llvm-project/build/bin/FileCheck <snip>/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir
# executed command: <snip>/llvm-project/build/bin/mlir-opt <snip>/llvm-project/mlir/test/Dialect/XeGPU/XeGPUOps.mlir
# .---command stderr------------
# | =================================================================
# | ==2772558==ERROR: AddressSanitizer: stack-use-after-return on address 0x7fd2c2c42b90 at pc 0x55e406d54614 bp 0x7ffc810e4070 sp 0x7ffc810e4068
# | READ of size 8 at 0x7fd2c2c42b90 thread T0
# | #<!-- -->0 0x55e406d54613 in operator()<long int const*> /usr/include/c++/13/bits/predefined_ops.h:318
# | #<!-- -->1 0x55e406d54613 in __count_if<long int const*, __gnu_cxx::__ops::_Iter_pred<mlir::verifyListOfOperandsOrIntegers(Operation*, llvm::StringRef, unsigned int, llvm::ArrayRef<long int>, ValueRange)::<lambda(int64_t)> > > /usr/include/c++/13/bits/stl_algobase.h:2125
# | #<!-- -->2 0x55e406d54613 in count_if<long int const*, mlir::verifyListOfOperandsOrIntegers(Operation*,
...
```
---
Patch is 36.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/85653.diff
8 Files Affected:
- (modified) mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h (+1-6)
- (modified) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td (-61)
- (modified) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td (+2-2)
- (modified) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td (+2-289)
- (modified) mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td (+1-103)
- (modified) mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp (+3-70)
- (modified) mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp (+3-174)
- (removed) mlir/test/Dialect/XeGPU/XeGPUOps.mlir (-62)
``````````diff
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
index 87aabdc015fea5..7aaa4ecc7ee77a 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
@@ -9,12 +9,7 @@
#ifndef MLIR_DIALECT_XEGPU_IR_XEGPU_H
#define MLIR_DIALECT_XEGPU_IR_XEGPU_H
-#include "mlir/Bytecode/BytecodeOpInterface.h"
-#include "mlir/IR/BuiltinTypes.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/Interfaces/ShapedOpInterfaces.h"
-#include "mlir/Interfaces/SideEffectInterfaces.h"
-#include "mlir/Interfaces/ViewLikeInterface.h"
+#include <mlir/IR/Dialect.h>
namespace mlir {
namespace xegpu {
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index cd38549f1ccf43..bb325c272e3324 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -10,7 +10,6 @@
#define MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td"
-include "mlir/IR/EnumAttr.td"
class XeGPUAttr<string name, string attrMnemonic, list<Trait> traits = [],
string baseCppClass = "::mlir::Attribute">
@@ -18,64 +17,4 @@ class XeGPUAttr<string name, string attrMnemonic, list<Trait> traits = [],
let mnemonic = attrMnemonic;
}
-def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> {
- let parameters = (ins
- OptionalParameter<"MemoryScopeAttr">: $memory_scope,
- OptionalParameter<"IntegerAttr", "1">: $array_length,
- OptionalParameter<"BoolAttr", "true">: $boundary_check
- );
-
- let builders = [
- AttrBuilder<(ins
- CArg<"xegpu::MemoryScope", "xegpu::MemoryScope::Global">:$memory_scope,
- CArg<"int", "1">:$array_length,
- CArg<"bool", "true">: $boundary_check
- )>
- ];
-
- let assemblyFormat = "`<` struct(params) `>`";
-}
-
-//===----------------------------------------------------------------------===//
-// XeGPU Memory Scope Enums.
-//===----------------------------------------------------------------------===//
-def XeGPU_MemoryScopeGlobal: I32EnumAttrCase<"Global", 0, "global">;
-def XeGPU_MemoryScopeShared: I32EnumAttrCase<"SLM", 1, "slm">;
-def XeGPU_MemoryScope: I32EnumAttr<"MemoryScope",
- "The address space of the memory the tensor descritor is created for",
- [XeGPU_MemoryScopeGlobal, XeGPU_MemoryScopeShared]> {
- let genSpecializedAttr = 0;
- let cppNamespace = "::mlir::xegpu";
-}
-
-def XeGPU_MemoryScopeAttr:
- EnumAttr<XeGPU_Dialect, XeGPU_MemoryScope, "memory_scope"> {
- let assemblyFormat = "$value";
-}
-
-//===----------------------------------------------------------------------===//
-// XeGPU Cache Enums.
-//===----------------------------------------------------------------------===//
-def XeGPU_CachePolicyCached: I32EnumAttrCase<"CACHED", 0, "cached">; // valid for read and write
-def XeGPU_CachePolicyUncached: I32EnumAttrCase<"UNCACHED", 1, "uncached">; // valid for read and write
-def XeGPU_CachePolicyStreaming: I32EnumAttrCase<"STREAMING", 2, "streaming">; // valid for read only
-def XeGPU_CachePolicyInvalid: I32EnumAttrCase<"READ_INVALIDATE", 3, "read_invalidate">; // valid for read only
-def XeGPU_CachePolicyWriteBack: I32EnumAttrCase<"WRITE_BACK", 4, "write_back">; // valid for write only
-def XeGPU_CachePolicyWriteThrough: I32EnumAttrCase<"WRITE_THROUGH", 5, "write_through">; // valid for write only
-
-def XeGPU_CachePolicyEnums : I32EnumAttr<"CachePolicy", "Cache policy",
- [XeGPU_CachePolicyCached, XeGPU_CachePolicyUncached,
- XeGPU_CachePolicyStreaming, XeGPU_CachePolicyInvalid,
- XeGPU_CachePolicyWriteBack, XeGPU_CachePolicyWriteThrough]> {
- let genSpecializedAttr = 0;
- let cppNamespace = "::mlir::xegpu";
-}
-
-def XeGPU_CacheHintAttr
- : EnumAttr<XeGPU_Dialect, XeGPU_CachePolicyEnums, "cache_hint"> {
- let assemblyFormat = "`<` $value `>`";
-}
-
-
-
#endif // MLIR_DIALECT_XEGPU_IR_XEGPUATTRS_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
index c2f09319c790e0..3851275ad30a0a 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUDialect.td
@@ -23,8 +23,8 @@ def XeGPU_Dialect : Dialect {
the lower-level GPU compiler.
}];
- let useDefaultTypePrinterParser = true;
- let useDefaultAttributePrinterParser = true;
+ // let useDefaultTypePrinterParser = true;
+ // let useDefaultAttributePrinterParser = true;
}
#endif // MLIR_DIALECT_XEGPU_IR_XEGPUDIALECT_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 1f90dcb4bf55ad..5825ef9195b03f 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -9,13 +9,10 @@
#ifndef MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
#define MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
-include "mlir/IR/AttrTypeBase.td"
include "mlir/Dialect/XeGPU/IR/XeGPUAttrs.td"
include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td"
include "mlir/Dialect/XeGPU/IR/XeGPUTypes.td"
-include "mlir/Interfaces/ShapedOpInterfaces.td"
-include "mlir/Interfaces/SideEffectInterfaces.td"
-include "mlir/Interfaces/ViewLikeInterface.td"
+
// Base class for dialect operations. This operation inherits from the base
// `Op` class in OpBase.td, and provides:
@@ -23,291 +20,7 @@ include "mlir/Interfaces/ViewLikeInterface.td"
// * The mnemonic for the operation, or the name without the dialect prefix.
// * A list of traits for the operation.
class XeGPU_Op<string mnemonic, list<Trait> traits = []>:
- Op<XeGPU_Dialect, mnemonic, traits> {
-
- code extraBaseClassDeclaration = [{
- void printProperties(::mlir::MLIRContext *ctx,
- ::mlir::OpAsmPrinter &p, const Properties &prop) {
- Attribute propAttr = getPropertiesAsAttr(ctx, prop);
- if (propAttr)
- p << "<" << propAttr << ">";
- }
-
- static ::mlir::ParseResult parseProperties(::mlir::OpAsmParser &parser,
- ::mlir::OperationState &result) {
- if (mlir::succeeded(parser.parseLess())) {
- if (parser.parseAttribute(result.propertiesAttr) || parser.parseGreater())
- return failure();
- }
- return success();
- }
-
- }];
-}
-
-
-def XeGPU_CreateNdDescOp: XeGPU_Op<"create_nd_tdesc", [Pure, ViewLikeOpInterface,
- AttrSizedOperandSegments, OffsetSizeAndStrideOpInterface]> {
-
- let summary = "Create nd-tensor descriptor operation";
- let description = [{
- The "create_nd_tdesc" operation creates a TensorDescType which represents
- a sub-view of a 2D memory region (It can be extended to support n-D memory
- region if needed in future). Elements in the subview continuous in each
- dimention. It encodes the following important information for supporting
- Intel hardware features:
-
- * source: an object representing (starting address/pointer of) a 2D memory region.
- It can be either a 2D memref object, or simply a pointer represented by uint64_t type.
- for the later case, the shape and layout information of the 2D memory region should
- be explicitly passed via `dynamic_shape` and `dynamic_strides` parameters.
- * offsets: two index values represents offsets from the "source" at the each dimension
- at which the subview of the target memory will be created. It is encoded via two
- variables, including "dynamic_offsets" and "static_offsets", such that it can
- accept various forms, such as, operands (e.g., [%c0, %c]) and attributes (e.g., [2, 4])).
- * shape: the shape information of the memory region pointed by the "source". It is
- typically encoded via the MemRefType of the source, e.g., memref<4096x4096xf16>.
- But if "source" is simply a pointer represented as uint64_t type, or a memref
- type without shape information e.g., memref<?x?xf16>, the shape information has
- to be explicitly passed via the "dynamic_shape" argument. Currently "dynamic_shape"
- only accepts operands(e.g., [%c4096, %c4096]), not attributes(e.g., [4096, 4096]).
- * strides: the strides of the memory region pointed by the "source". Similar to shape,
- it is typically encoded via the MemRefType of the source too. But if "source" is
- simply a pointer represented as uint64_t type, or a memref type without shape
- information e.g., memref<?x?xf16>, the strides information has to be explicitly
- passed via the "dynamic_strides" argument. And it currently only accepts operands two.
-
- Example 1 (suppose the tensor shape inferred by the compiler is 8x16):
- %0 = memref.alloc() : memref<1024x1024xf32>
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %1 = xegpu.create_nd_tdesc %0[%c0, %c0]: memref<1024x1024xf32> -> TensorDesc<8x16xf32>
-
- Example 2 (suppose the tensor shape inferred by the compiler is 8x16):
- %0 = memref.alloc(%h, %w) : memref<?x?xf32>
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %1 = xegpu.create_nd_tdesc %0[%c0, %c0], [%h, %w], [%w, %c1]: memref<?x?xf32> -> TensorDesc<8x16xf32>
-
- Example 3 (suppose the tensor shape inferred by the compiler is 8x16):
- %0 = ... : ui64
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %1 = xegpu.create_nd_tdesc %0[%c0, %c0], [%h, %w], [%w, %c1]: ui64 -> TensorDesc<8x16xf32>
- }];
-
- let arguments = (ins
- XeGPU_BaseAddrType: $source,
- Variadic<Index>: $offsets,
- Variadic<Index>: $shape,
- Variadic<Index>: $strides,
- DenseI64ArrayAttr: $static_offsets
- );
- let results = (outs XeGPU_TensorDesc: $TensorDesc);
-
- let assemblyFormat = [{
- $source ``
- custom<DynamicIndexList>($offsets, $static_offsets)
- (`,` `[` $shape^ `]` `,` `[` $strides `]`)?
- attr-dict `:` type($source) `->` qualified(type($TensorDesc))
- }];
-
- let hasVerifier = 1;
-
- let builders = [
- OpBuilder<(ins "Type": $tdesc, "TypedValue<MemRefType>": $source,
- "llvm::ArrayRef<OpFoldResult>": $offsets)>,
-
- OpBuilder<(ins "Type": $tdesc, "TypedValue<IntegerType> ": $source,
- "llvm::ArrayRef<OpFoldResult>": $offsets,
- "ValueRange": $shape, "ValueRange": $stride)>
- ];
-
- let extraClassDeclaration = extraBaseClassDeclaration # [{
- /// Returns the type of the source memref operand.
- Type getSourceType() {
- return getSource().getType();
- }
-
- /// Returns the type of the result TensorDesc.
- xegpu::TensorDescType getType() {
- return getTensorDesc().getType();
- }
-
- /// Return the element type of the TensorDesc
- Type getElementType() {
- return getType().getElementType();
- }
-
- /// Return the shape of the TensorDesc
- llvm::ArrayRef<int64_t> getTensorDescShape() {
- return getType().getShape();
- }
-
- /// wrapper for matching with OffsetSizeAndStrideOpInterface
- OperandRange getSizes() {
- return getShape();
- }
-
- /// wrapper for matching with OffsetSizeAndStrideOpInterface
- /// If source is IntegerType and `shape` is filled, it will
- /// return an array of ShapedType::kDynamic representing dynamic
- /// shape encoded in the `shape` argument will be used. Presence
- /// of `shape` overides static shape from source memref type.
- SmallVector<int64_t> getStaticSizes() {
- if (getSourceType().isa<IntegerType>() || getShape().size()) {
- auto dims = getMixedOffsets().size();
- return SmallVector<int64_t>(dims, ShapedType::kDynamic);
- }
- auto memrefType = getSourceType().dyn_cast<MemRefType>();
- return SmallVector<int64_t>(memrefType.getShape());
- }
-
- /// wrapper for matching with OffsetSizeAndStrideOpInterface
- /// If source is IntegerType or `strides` is filled, it will
- /// return an array of ShapedType::kDynamic representing dynamic
- /// strides encoded in the `strides` argument will be used. Presence
- /// of `strides` overides static strides from source memref type.
- SmallVector<int64_t> getStaticStrides() {
- if (getSourceType().isa<IntegerType>() || getStrides().size()) {
- auto dims = getMixedOffsets().size();
- return SmallVector<int64_t>(dims, ShapedType::kDynamic);
- }
- auto memrefType = getSourceType().dyn_cast<MemRefType>();
- auto [strides, offset] = getStridesAndOffset(memrefType);
- return strides;
- }
-
- /// Return the expected rank of each of the`static_offsets`,
- /// `static_shape` and `static_strides` attributes.
- std::array<unsigned, 3> getArrayAttrMaxRanks() {
- unsigned rank;
- if (auto ty = getSourceType().dyn_cast<MemRefType>()) {
- rank = ty.getRank();
- } else {
- rank = (unsigned)getMixedOffsets().size();
- }
- return {rank, rank, rank};
- }
-
- /// Return the number of leading operands before the `offsets`,
- /// `shape` and `strides` operands.
- static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 1; }
-
- mlir::Value getViewSource() { return getSource(); }
- }];
-}
-
-def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> {
- let summary = "prefetches a nD block to cache";
- let description = [{
- It issues an instruction to prefetch the data from memory to each
- level of the cache based on their cache policy.
-
- Example:
- ```
- xegpu.prefetch_nd %tdesc {l1_hint = #xegpu.cache_hint<cached>,
- l2_hint = #xegpu.cache_hint<cached>,
- l3_hint = #xegpu.cache_hint<cached>}
- : !xegpu.tensor_desc<8x16xf16>
- ```
-
- }];
-
- let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
- OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
-
- let extraClassDeclaration = extraBaseClassDeclaration;
-
- let assemblyFormat = "$TensorDesc prop-dict attr-dict `:` qualified(type($TensorDesc))";
-}
-
-
-def XeGPU_LoadNdOp : XeGPU_Op<"load_nd"> {
- let summary = "loads a n-D block from memory (represented by TensorDesc)"
- "to registers (represented by vector)";
- let description = [{
- LoadNdOp essentially mimics the hardware block read instruction to read
- a block of data from memory to register. It takes a set of optional cache
- hints for each level of cache, L1, L2 and L3. If hardware does not have a
- correspoding cache, Corresponding cache hint attribute will be masked.
- vnni transform is an hardware feature for Intel GPU, which is used to
- do data packing during the load for B operand of matrix operation, if
- the bit width of the data type is less then 32 bits, e.g., fp16. And
- transpose is another Intel hardware feature, which will do transpose
- operation when loading the data if the bit width of the data type is
- fp32 or fp64. It implies that vnni and transpose cannot exit at the
- same time.
-
- Example:
- ```
- xegpu.load_nd %1 {transpose = [1, 0],
- l1_hint = #xegpu.cache_hint<cached>,
- l2_hint = #xegpu.cache_hint<uncached>,
- l3_hint = #xegpu.cache_hint<streaming>}
- : !xegpu.tensor_desc<8x16xf32> -> vector<16x8xf32>
- ```
-
-
- }];
-
- let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
- OptionalAttr<I64Attr>: $vnni_axis,
- OptionalAttr<DenseI64ArrayAttr>: $transpose,
- OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
-
- let results = (outs XeGPU_ValueType: $value);
-
- let extraClassDeclaration = extraBaseClassDeclaration # [{
- VectorType getType() {
- return llvm::dyn_cast<VectorType>(getValue().getType());
- }
-
- xegpu::TensorDescType getTensorDescType() {
- return getTensorDesc().getType();
- }
- }];
-
- let assemblyFormat = "$TensorDesc prop-dict attr-dict `:` qualified(type($TensorDesc)) `->` type($value)";
- let hasVerifier = 1;
-}
-
-def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", []> {
- let summary = "stores a n-D block register region back to memory, currently only supports 2D";
-
- let description = [{
- StoreNdOp essentially mimics the hardware block write instruction io
- write a block of data from register into the memory region as described
- by the TensorDesc. It takes a set of optional cache hints for each level
- of cache, L1, L2 and L3. If hardware does not have a correspoding cache,
- Corresponding cache hint attribute will be masked.
-
- Example:
- ```
- xegpu.store_nd %3, %2 {l1_hint = #xegpu.cache_hint<uncached>,
- l2_hint = #xegpu.cache_hint<write_back>,
- l3_hint = #xegpu.cache_hint<write_through>}
- : vector<8x16xf16>, !xegpu.tensor_desc<8x16xf16>
- ```
-
-
- }];
-
- let arguments = (ins XeGPU_ValueType: $value,
- XeGPU_TensorDesc: $TensorDesc,
- OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
-
- let extraClassDeclaration = extraBaseClassDeclaration;
+ Op<XeGPU_Dialect, mnemonic, traits>;
- let assemblyFormat = [{$value `,` $TensorDesc prop-dict attr-dict
- `:` type($value) `,` qualified(type($TensorDesc))}];
- let hasVerifier = 1;
-}
#endif // MLIR_DIALECT_XEGPU_IR_XEGPUOPS_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
index 19ac1693712dd8..1d75bb4e2906fe 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
@@ -9,9 +9,9 @@
#ifndef MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD
#define MLIR_DIALECT_XEGPU_IR_XEGPUTYPES_TD
+include "mlir/IR/BuiltinTypes.td"
include "mlir/Dialect/XeGPU/IR/XeGPUAttrs.td"
include "mlir/Dialect/XeGPU/IR/XeGPUDialect.td"
-include "mlir/IR/BuiltinTypes.td"
def XeGPU_IntType: AnyTypeOf<[I1, I8, I16, I32, I64, SI1, SI8, SI16, SI32, SI64, UI1, UI8, UI16, UI32, UI64]>;
def XeGPU_FloatType: AnyTypeOf<[F16, F32, F64, BF16, TF32]>;
@@ -30,106 +30,4 @@ class XeGPUTypeDef<string name, string typeMnemonic, list<Trait> traits = [],
let mnemonic = typeMnemonic;
}
-def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
- [ShapedTypeInterface], "::mlir::TensorType"> {
- let summary = "TensorDesc describing regions of interested data.";
- let description = [{
- TensorDesc is a type designed to describe regions of the interested data as well as some
- features that are unique to Intel hardware. Different with the builtin tensor type in MLIR,
- it essentially only contains the meta data, and doesn't hold the data by itself. It is designed
- to mainly support 2D block load/store and DPAS (matrix multiplication instruction) on Intel GPU.
- It encodes the following information:
-
- * shape: the sizes/shape of the intereted data block, e.g., 8x16 means 8 rows
- and each row contains 16 contiguous data element. The rows could be
- either contiguous or not, depends on whether the encoding attribute
- is set or not.
- * element_type: the data type of the data element, e.g., f16, f32.
-
- Similar to the builtin tensor, it also provides an optinal attribute to encoding
- the following information via the TensorDescAttr object:
- * memory_scope (xegpu::MemoryScope): [optional] where the data is located,
- global memory or shared memory. It is default to Global.
- * array_length (int): [optional] The number of contiguous blocks with size as `shape`,
- that will be loaded by block load at a time. It is default to 1.
- ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/85653
More information about the Mlir-commits
mailing list