[Mlir-commits] [mlir] [mlir][XeGPU] Add MemoryEffectsOpInterface for XeGPU memory related ops. (PR #125314)
Charitha Saumya
llvmlistbot at llvm.org
Mon Feb 3 09:51:44 PST 2025
https://github.com/charithaintc updated https://github.com/llvm/llvm-project/pull/125314
>From ece99af0a9c42390ad77ffb59d6e4473f2fd3644 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Fri, 31 Jan 2025 22:55:50 +0000
Subject: [PATCH 1/3] add mem side effects interface
---
.../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 393 +++++++++---------
mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 26 +-
2 files changed, 228 insertions(+), 191 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index c2335eecc3781d..d98aa9ffb26f1a 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -276,97 +276,103 @@ def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> {
}
-def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [AllElementTypesMatch<["value", "TensorDesc"]>]> {
- let summary = "loads a n-D block from memory (represented by TensorDesc)"
- "to registers (represented by vector)";
- let description = [{
- LoadNdOp essentially mimics the hardware block read instruction to read
- a block of data from memory to register. It takes a set of optional cache
- hints for each level of cache, L1, L2 and L3. If hardware does not have a
- correspoding cache, Corresponding cache hint attribute will be masked.
- VNNI transformation is an hardware feature for Intel GPU, which is used to
- do data packing during the load for B operand of matrix operation, if
- the bit width of the data type is less then 32 bits, e.g., fp16. And
- transpose is another Intel hardware feature, which will do transpose
- operation when loading the data if the bit width of the data type is
- fp32 or fp64. It implies that vnni and transpose cannot exit at the
- same time.
-
- Example:
- ```mlir
- xegpu.load_nd %1 {transpose = [1, 0],
- l1_hint = #xegpu.cache_hint<cached>,
- l2_hint = #xegpu.cache_hint<uncached>,
- l3_hint = #xegpu.cache_hint<streaming>}
- : !xegpu.tensor_desc<8x16xf32> -> vector<16x8xf32>
- ```
-
-
- }];
-
- let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
- OptionalAttr<UnitAttr>: $packed,
- OptionalAttr<DenseI64ArrayAttr>: $transpose,
- OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
-
- let results = (outs XeGPU_ValueType: $value);
-
- let extraClassDeclaration = extraBaseClassDeclaration # [{
- VectorType getType() {
- return llvm::dyn_cast<VectorType>(getValue().getType());
- }
+def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
+ AllElementTypesMatch<["value", "TensorDesc"]>,
+ DeclareOpInterfaceMethods<MemoryEffectsOpInterface>
+ ]> {
+ let summary = "loads a n-D block from memory (represented by TensorDesc)"
+ "to registers (represented by vector)";
+ let description = [{
+ LoadNdOp essentially mimics the hardware block read instruction to read
+ a block of data from memory to register. It takes a set of optional cache
+ hints for each level of cache, L1, L2 and L3. If hardware does not have a
+ correspoding cache, Corresponding cache hint attribute will be masked.
+ VNNI transformation is an hardware feature for Intel GPU, which is used to
+ do data packing during the load for B operand of matrix operation, if
+ the bit width of the data type is less then 32 bits, e.g., fp16. And
+ transpose is another Intel hardware feature, which will do transpose
+ operation when loading the data if the bit width of the data type is
+ fp32 or fp64. It implies that vnni and transpose cannot exit at the
+ same time.
+
+ Example:
+ ```mlir
+ xegpu.load_nd %1 {transpose = [1, 0],
+ l1_hint = #xegpu.cache_hint<cached>,
+ l2_hint = #xegpu.cache_hint<uncached>,
+ l3_hint = #xegpu.cache_hint<streaming>}
+ : !xegpu.tensor_desc<8x16xf32> -> vector<16x8xf32>
+ ```
+
+
+ }];
+
+ let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
+ OptionalAttr<UnitAttr>: $packed,
+ OptionalAttr<DenseI64ArrayAttr>: $transpose,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
+
+ let results = (outs XeGPU_ValueType: $value);
+
+ let extraClassDeclaration = extraBaseClassDeclaration # [{
+ VectorType getType() {
+ return llvm::dyn_cast<VectorType>(getValue().getType());
+ }
- xegpu::TensorDescType getTensorDescType() {
- return getTensorDesc().getType();
- }
- }];
+ xegpu::TensorDescType getTensorDescType() {
+ return getTensorDesc().getType();
+ }
+ }];
- let assemblyFormat = "$TensorDesc prop-dict attr-dict `:` qualified(type($TensorDesc)) `->` type($value)";
- let hasVerifier = 1;
+ let assemblyFormat = "$TensorDesc prop-dict attr-dict `:` qualified(type($TensorDesc)) `->` type($value)";
+ let hasVerifier = 1;
}
-def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", [AllElementTypesMatch<["value", "TensorDesc"]>]> {
- let summary = "stores a n-D block register region back to memory, currently only supports 2D";
-
- let description = [{
- StoreNdOp essentially mimics the hardware block write instruction io
- write a block of data from register into the memory region as described
- by the TensorDesc. It takes a set of optional cache hints for each level
- of cache, L1, L2 and L3. If hardware does not have a correspoding cache,
- Corresponding cache hint attribute will be masked.
-
- Example:
- ```mlir
- xegpu.store_nd %3, %2 {l1_hint = #xegpu.cache_hint<uncached>,
- l2_hint = #xegpu.cache_hint<write_back>,
- l3_hint = #xegpu.cache_hint<write_through>}
- : vector<8x16xf16>, !xegpu.tensor_desc<8x16xf16>
- ```
-
-
- }];
-
- let arguments = (ins XeGPU_ValueType: $value,
- XeGPU_TensorDesc: $TensorDesc,
- OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
-
- let extraClassDeclaration = extraBaseClassDeclaration # [{
- VectorType getValueType() {
- return llvm::dyn_cast<VectorType>(getValue().getType());
- }
+def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", [
+ AllElementTypesMatch<["value", "TensorDesc"]>,
+ DeclareOpInterfaceMethods<MemoryEffectsOpInterface>
+ ]> {
+ let summary = "stores a n-D block register region back to memory, currently only supports 2D";
+
+ let description = [{
+ StoreNdOp essentially mimics the hardware block write instruction io
+ write a block of data from register into the memory region as described
+ by the TensorDesc. It takes a set of optional cache hints for each level
+ of cache, L1, L2 and L3. If hardware does not have a correspoding cache,
+ Corresponding cache hint attribute will be masked.
+
+ Example:
+ ```mlir
+ xegpu.store_nd %3, %2 {l1_hint = #xegpu.cache_hint<uncached>,
+ l2_hint = #xegpu.cache_hint<write_back>,
+ l3_hint = #xegpu.cache_hint<write_through>}
+ : vector<8x16xf16>, !xegpu.tensor_desc<8x16xf16>
+ ```
+
+
+ }];
+
+ let arguments = (ins XeGPU_ValueType: $value,
+ XeGPU_TensorDesc: $TensorDesc,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
+
+ let extraClassDeclaration = extraBaseClassDeclaration # [{
+ VectorType getValueType() {
+ return llvm::dyn_cast<VectorType>(getValue().getType());
+ }
- xegpu::TensorDescType getTensorDescType() {
- return getTensorDesc().getType();
- }
- }];
+ xegpu::TensorDescType getTensorDescType() {
+ return getTensorDesc().getType();
+ }
+ }];
- let assemblyFormat = [{$value `,` $TensorDesc prop-dict attr-dict
- `:` type($value) `,` qualified(type($TensorDesc))}];
- let hasVerifier = 1;
+ let assemblyFormat = [{$value `,` $TensorDesc prop-dict attr-dict
+ `:` type($value) `,` qualified(type($TensorDesc))}];
+ let hasVerifier = 1;
}
def XeGPU_UpdateNdOffsetOp : XeGPU_Op<"update_nd_offset",
@@ -548,131 +554,138 @@ def XeGPU_PrefetchOp : XeGPU_Op<"prefetch", []> {
let hasVerifier = 1;
}
-def XeGPU_LoadGatherOp : XeGPU_Op<"load", [AllElementTypesMatch<["value", "TensorDesc"]>]> {
- let summary = "load a set of scattered data points from memory.";
-
- let description = [{ It (aka. load) load data per each work-item. The output
- describes the data being loaded at the subgroup level, so its size is
- consistent with the number of work-items in a subgroup. When the chunk size
- is larger than 2, the output vector is a 2D vector, with dim-1 correspoding
- to work-items, and dim-0 corresponding to the chunk size loaded by each work-item.
- Specially, there is a transpose effect on the result (as compared to the TensorDesc)
- due to the hardware implementation. Therefore, a transpose attribute is introduced
- on purpose, making sure users are aware of this implicit transformation.
-
- The mask operand masks out memory access so that it is safe to pass out-of-boundary
- addresses/offsets as long as they are masked. It applies to slots of SIMD lanes.
-
- Example 1:
- ```mlir
- %2 = xegpu.load %1, %0 {l1_hint = #xegpu.cache_hint<cached>,
- l2_hint = #xegpu.cache_hint<uncached>,
- l3_hint = #xegpu.cache_hint<uncached>}
- : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<memory_space=global>>,
- vector<16xi1> -> vector<16xf32>
- ```
+def XeGPU_LoadGatherOp : XeGPU_Op<"load", [
+ AllElementTypesMatch<["value", "TensorDesc"]>,
+ DeclareOpInterfaceMethods<MemoryEffectsOpInterface>
+ ]> {
+ let summary = "load a set of scattered data points from memory.";
+
+ let description = [{ It (aka. load) load data per each work-item. The output
+ describes the data being loaded at the subgroup level, so its size is
+ consistent with the number of work-items in a subgroup. When the chunk size
+ is larger than 2, the output vector is a 2D vector, with dim-1 correspoding
+ to work-items, and dim-0 corresponding to the chunk size loaded by each work-item.
+ Specially, there is a transpose effect on the result (as compared to the TensorDesc)
+ due to the hardware implementation. Therefore, a transpose attribute is introduced
+ on purpose, making sure users are aware of this implicit transformation.
+
+ The mask operand masks out memory access so that it is safe to pass out-of-boundary
+ addresses/offsets as long as they are masked. It applies to slots of SIMD lanes.
+
+ Example 1:
+ ```mlir
+ %2 = xegpu.load %1, %0 {l1_hint = #xegpu.cache_hint<cached>,
+ l2_hint = #xegpu.cache_hint<uncached>,
+ l3_hint = #xegpu.cache_hint<uncached>}
+ : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<memory_space=global>>,
+ vector<16xi1> -> vector<16xf32>
+ ```
- Example 2:
- ```mlir
- %2 = xegpu.load %1, %0 {transpose,
- l1_hint = #xegpu.cache_hint<cached>,
- l2_hint = #xegpu.cache_hint<uncached>,
- l3_hint = #xegpu.cache_hint<uncached>}
- : !xegpu.tensor_desc<16x8xf32, #xegpu.scatter_tdesc_attr<memory_space=global, chunk_size=8>>,
- vector<16xi1> -> vector<8x16xf32>
- ```
+ Example 2:
+ ```mlir
+ %2 = xegpu.load %1, %0 {transpose,
+ l1_hint = #xegpu.cache_hint<cached>,
+ l2_hint = #xegpu.cache_hint<uncached>,
+ l3_hint = #xegpu.cache_hint<uncached>}
+ : !xegpu.tensor_desc<16x8xf32, #xegpu.scatter_tdesc_attr<memory_space=global, chunk_size=8>>,
+ vector<16xi1> -> vector<8x16xf32>
+ ```
- }];
+ }];
- let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
- XeGPU_MaskType: $mask,
- OptionalAttr<UnitAttr>: $transpose,
- OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
- let results = (outs XeGPU_ValueType: $value);
+ let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
+ XeGPU_MaskType: $mask,
+ OptionalAttr<UnitAttr>: $transpose,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
+ let results = (outs XeGPU_ValueType: $value);
- let extraClassDeclaration = extraBaseClassDeclaration # [{
- xegpu::TensorDescType getTensorDescType() {
- return getTensorDesc().getType();
- }
+ let extraClassDeclaration = extraBaseClassDeclaration # [{
+ xegpu::TensorDescType getTensorDescType() {
+ return getTensorDesc().getType();
+ }
- mlir::Type getElementType() {
- auto type = getValue().getType();
- return getElementTypeOrSelf(type);
- }
+ mlir::Type getElementType() {
+ auto type = getValue().getType();
+ return getElementTypeOrSelf(type);
+ }
- Type getValueType() {
- return getValue().getType();
- }
+ Type getValueType() {
+ return getValue().getType();
+ }
- Type getMaskType() {
- return getMask().getType();
- }
+ Type getMaskType() {
+ return getMask().getType();
+ }
- }];
+ }];
- let assemblyFormat = [{$TensorDesc `,` $mask prop-dict attr-dict
- `:` qualified(type($TensorDesc)) `,` type($mask) `->` type($value)}];
+ let assemblyFormat = [{$TensorDesc `,` $mask prop-dict attr-dict
+ `:` qualified(type($TensorDesc)) `,` type($mask) `->` type($value)}];
- let hasVerifier = 1;
+ let hasVerifier = 1;
}
-def XeGPU_StoreScatterOp : XeGPU_Op<"store", [AllElementTypesMatch<["value", "TensorDesc"]>]> {
- let summary = "store data to scattered memory locations.";
- let description = [{ It (aka. store) stores data to scattered memory locations. The value is
- typically a 1D vector. But when the chunk size of the TensorDesc is larger than 1, it will be
- a 2D vector instead. For the later case, dim-1 of the value correspods to the simd lanes
- and the dim-0 of the value corresponds to the chunk size stored per lane. So `store_scatter`
- has transpose effect, which is similar to `load_gather`. Therefore, a transpose attribute is
- introduced on purpose, making sure users are aware of this implicit transformation.
-
- Example 1:
- ```mlir
- %3 = xegpu.store %0, %1, %2 {l1_hint = #xegpu.cache_hint<uncached>,
- l2_hint = #xegpu.cache_hint<write_back>,
- l3_hint = #xegpu.cache_hint<write_through>}
- : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.scattered_tdesc_attr<>>, vector<16xi1>
- ```
+def XeGPU_StoreScatterOp : XeGPU_Op<"store",
+ [
+ AllElementTypesMatch<["value", "TensorDesc"]>,
+ DeclareOpInterfaceMethods<MemoryEffectsOpInterface>
+ ]> {
+ let summary = "store data to scattered memory locations.";
+ let description = [{ It (aka. store) stores data to scattered memory locations. The value is
+ typically a 1D vector. But when the chunk size of the TensorDesc is larger than 1, it will be
+ a 2D vector instead. For the later case, dim-1 of the value correspods to the simd lanes
+ and the dim-0 of the value corresponds to the chunk size stored per lane. So `store_scatter`
+ has transpose effect, which is similar to `load_gather`. Therefore, a transpose attribute is
+ introduced on purpose, making sure users are aware of this implicit transformation.
+
+ Example 1:
+ ```mlir
+ %3 = xegpu.store %0, %1, %2 {l1_hint = #xegpu.cache_hint<uncached>,
+ l2_hint = #xegpu.cache_hint<write_back>,
+ l3_hint = #xegpu.cache_hint<write_through>}
+ : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.scattered_tdesc_attr<>>, vector<16xi1>
+ ```
- Example 2:
- ```mlir
- %3 = xegpu.store %0, %1, %2 {transpose,
- l1_hint = #xegpu.cache_hint<uncached>,
- l2_hint = #xegpu.cache_hint<write_back>,
- l3_hint = #xegpu.cache_hint<write_through>}
- : vector<8x16xf32>, !xegpu.tensor_desc<16x8xf32, #xegpu.scattered_tdesc_attr<chunk_size=8>>, vector<16xi1>
- ```
+ Example 2:
+ ```mlir
+ %3 = xegpu.store %0, %1, %2 {transpose,
+ l1_hint = #xegpu.cache_hint<uncached>,
+ l2_hint = #xegpu.cache_hint<write_back>,
+ l3_hint = #xegpu.cache_hint<write_through>}
+ : vector<8x16xf32>, !xegpu.tensor_desc<16x8xf32, #xegpu.scattered_tdesc_attr<chunk_size=8>>, vector<16xi1>
+ ```
- }];
+ }];
- let arguments = (ins
- XeGPU_ValueType: $value,
- XeGPU_TensorDesc: $TensorDesc,
- XeGPU_MaskType: $mask,
- OptionalAttr<UnitAttr>: $transpose,
- OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
+ let arguments = (ins
+ XeGPU_ValueType: $value,
+ XeGPU_TensorDesc: $TensorDesc,
+ XeGPU_MaskType: $mask,
+ OptionalAttr<UnitAttr>: $transpose,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
- let extraClassDeclaration = extraBaseClassDeclaration # [{
- xegpu::TensorDescType getTensorDescType() {
- return getTensorDesc().getType();
- }
+ let extraClassDeclaration = extraBaseClassDeclaration # [{
+ xegpu::TensorDescType getTensorDescType() {
+ return getTensorDesc().getType();
+ }
- Type getValueType() {
- return getValue().getType();
- }
+ Type getValueType() {
+ return getValue().getType();
+ }
- Type getMaskType() {
- return getMask().getType();
- }
- }];
+ Type getMaskType() {
+ return getMask().getType();
+ }
+ }];
- let assemblyFormat = [{$value `,` $TensorDesc `,` $mask prop-dict attr-dict
- `:` type($value) `,` qualified(type($TensorDesc)) `,` type($mask)}];
+ let assemblyFormat = [{$value `,` $TensorDesc `,` $mask prop-dict attr-dict
+ `:` type($value) `,` qualified(type($TensorDesc)) `,` type($mask)}];
- let hasVerifier = 1;
+ let hasVerifier = 1;
}
def XeGPU_UpdateOffsetOp: XeGPU_Op<"update_offset",
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index cd883baa986b85..d015e5772a94f2 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -324,6 +324,12 @@ LogicalResult LoadNdOp::verify() {
return success();
}
+void LoadNdOp::getEffects(
+ SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
+ &effects) {
+ effects.emplace_back(MemoryEffects::Read::get());
+}
+
//===----------------------------------------------------------------------===//
// XeGPU_StoreNdOp
//===----------------------------------------------------------------------===//
@@ -361,6 +367,12 @@ LogicalResult StoreNdOp::verify() {
return success();
}
+void StoreNdOp::getEffects(
+ SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
+ &effects) {
+ effects.emplace_back(MemoryEffects::Write::get());
+}
+
//===----------------------------------------------------------------------===//
// XeGPU_UpdateNDOffsetOp
//===----------------------------------------------------------------------===//
@@ -494,7 +506,7 @@ LogicalResult PrefetchOp::verify() {
}
//===----------------------------------------------------------------------===//
-// XeGPU_LoadGatherOp
+// XeGPU_jrOp
//===----------------------------------------------------------------------===//
LogicalResult LoadGatherOp::verify() {
auto tdescTy = getTensorDescType();
@@ -553,6 +565,12 @@ LogicalResult LoadGatherOp::verify() {
return success();
}
+void LoadGatherOp::getEffects(
+ SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
+ &effects) {
+ effects.emplace_back(MemoryEffects::Read::get());
+}
+
//===----------------------------------------------------------------------===//
// XeGPU_StoreScatterOp
//===----------------------------------------------------------------------===//
@@ -605,6 +623,12 @@ LogicalResult StoreScatterOp::verify() {
return success();
}
+void StoreScatterOp::getEffects(
+ SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
+ &effects) {
+ effects.emplace_back(MemoryEffects::Write::get());
+}
+
//===----------------------------------------------------------------------===//
// XeGPU_UpdateOffsetOp
//===----------------------------------------------------------------------===//
>From 1be0ae30b85e6270756e41fbc939edc3f42ba7e9 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Fri, 31 Jan 2025 23:18:30 +0000
Subject: [PATCH 2/3] add mem side effects interface
---
.../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 390 +++++++++---------
mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 2 +-
2 files changed, 194 insertions(+), 198 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index d98aa9ffb26f1a..0ff723005d4359 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -277,102 +277,101 @@ def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> {
def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
- AllElementTypesMatch<["value", "TensorDesc"]>,
- DeclareOpInterfaceMethods<MemoryEffectsOpInterface>
+ AllElementTypesMatch<["value", "TensorDesc"]>,
+ DeclareOpInterfaceMethods<MemoryEffectsOpInterface>
]> {
- let summary = "loads a n-D block from memory (represented by TensorDesc)"
- "to registers (represented by vector)";
- let description = [{
- LoadNdOp essentially mimics the hardware block read instruction to read
- a block of data from memory to register. It takes a set of optional cache
- hints for each level of cache, L1, L2 and L3. If hardware does not have a
- correspoding cache, Corresponding cache hint attribute will be masked.
- VNNI transformation is an hardware feature for Intel GPU, which is used to
- do data packing during the load for B operand of matrix operation, if
- the bit width of the data type is less then 32 bits, e.g., fp16. And
- transpose is another Intel hardware feature, which will do transpose
- operation when loading the data if the bit width of the data type is
- fp32 or fp64. It implies that vnni and transpose cannot exit at the
- same time.
-
- Example:
- ```mlir
- xegpu.load_nd %1 {transpose = [1, 0],
- l1_hint = #xegpu.cache_hint<cached>,
- l2_hint = #xegpu.cache_hint<uncached>,
- l3_hint = #xegpu.cache_hint<streaming>}
- : !xegpu.tensor_desc<8x16xf32> -> vector<16x8xf32>
- ```
-
-
- }];
-
- let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
- OptionalAttr<UnitAttr>: $packed,
- OptionalAttr<DenseI64ArrayAttr>: $transpose,
- OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
-
- let results = (outs XeGPU_ValueType: $value);
-
- let extraClassDeclaration = extraBaseClassDeclaration # [{
- VectorType getType() {
- return llvm::dyn_cast<VectorType>(getValue().getType());
- }
+ let summary = "loads a n-D block from memory (represented by TensorDesc)"
+ "to registers (represented by vector)";
+ let description = [{
+ LoadNdOp essentially mimics the hardware block read instruction to read
+ a block of data from memory to register. It takes a set of optional cache
+ hints for each level of cache, L1, L2 and L3. If hardware does not have a
+ correspoding cache, Corresponding cache hint attribute will be masked.
+ VNNI transformation is an hardware feature for Intel GPU, which is used to
+ do data packing during the load for B operand of matrix operation, if
+ the bit width of the data type is less then 32 bits, e.g., fp16. And
+ transpose is another Intel hardware feature, which will do transpose
+ operation when loading the data if the bit width of the data type is
+ fp32 or fp64. It implies that vnni and transpose cannot exit at the
+ same time.
- xegpu::TensorDescType getTensorDescType() {
- return getTensorDesc().getType();
- }
- }];
+ Example:
+ ```mlir
+ xegpu.load_nd %1 {transpose = [1, 0],
+ l1_hint = #xegpu.cache_hint<cached>,
+ l2_hint = #xegpu.cache_hint<uncached>,
+ l3_hint = #xegpu.cache_hint<streaming>}
+ : !xegpu.tensor_desc<8x16xf32> -> vector<16x8xf32>
+ ```
+
+
+ }];
+
+ let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
+ OptionalAttr<UnitAttr>: $packed,
+ OptionalAttr<DenseI64ArrayAttr>: $transpose,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
+
+ let results = (outs XeGPU_ValueType: $value);
+
+ let extraClassDeclaration = extraBaseClassDeclaration # [{
+ VectorType getType() {
+ return llvm::dyn_cast<VectorType>(getValue().getType());
+ }
- let assemblyFormat = "$TensorDesc prop-dict attr-dict `:` qualified(type($TensorDesc)) `->` type($value)";
- let hasVerifier = 1;
+ xegpu::TensorDescType getTensorDescType() {
+ return getTensorDesc().getType();
+ }
+ }];
+
+ let assemblyFormat = "$TensorDesc prop-dict attr-dict `:` qualified(type($TensorDesc)) `->` type($value)";
+ let hasVerifier = 1;
}
def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", [
- AllElementTypesMatch<["value", "TensorDesc"]>,
- DeclareOpInterfaceMethods<MemoryEffectsOpInterface>
+ AllElementTypesMatch<["value", "TensorDesc"]>, DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
]> {
- let summary = "stores a n-D block register region back to memory, currently only supports 2D";
-
- let description = [{
- StoreNdOp essentially mimics the hardware block write instruction io
- write a block of data from register into the memory region as described
- by the TensorDesc. It takes a set of optional cache hints for each level
- of cache, L1, L2 and L3. If hardware does not have a correspoding cache,
- Corresponding cache hint attribute will be masked.
-
- Example:
- ```mlir
- xegpu.store_nd %3, %2 {l1_hint = #xegpu.cache_hint<uncached>,
- l2_hint = #xegpu.cache_hint<write_back>,
- l3_hint = #xegpu.cache_hint<write_through>}
- : vector<8x16xf16>, !xegpu.tensor_desc<8x16xf16>
- ```
-
-
- }];
-
- let arguments = (ins XeGPU_ValueType: $value,
- XeGPU_TensorDesc: $TensorDesc,
- OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
-
- let extraClassDeclaration = extraBaseClassDeclaration # [{
- VectorType getValueType() {
- return llvm::dyn_cast<VectorType>(getValue().getType());
- }
+ let summary = "stores a n-D block register region back to memory, currently only supports 2D";
- xegpu::TensorDescType getTensorDescType() {
- return getTensorDesc().getType();
- }
- }];
+ let description = [{
+ StoreNdOp essentially mimics the hardware block write instruction io
+ write a block of data from register into the memory region as described
+ by the TensorDesc. It takes a set of optional cache hints for each level
+ of cache, L1, L2 and L3. If hardware does not have a correspoding cache,
+ Corresponding cache hint attribute will be masked.
+
+ Example:
+ ```mlir
+ xegpu.store_nd %3, %2 {l1_hint = #xegpu.cache_hint<uncached>,
+ l2_hint = #xegpu.cache_hint<write_back>,
+ l3_hint = #xegpu.cache_hint<write_through>}
+ : vector<8x16xf16>, !xegpu.tensor_desc<8x16xf16>
+ ```
+
+
+ }];
+
+ let arguments = (ins XeGPU_ValueType: $value,
+ XeGPU_TensorDesc: $TensorDesc,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
+
+ let extraClassDeclaration = extraBaseClassDeclaration # [{
+ VectorType getValueType() {
+ return llvm::dyn_cast<VectorType>(getValue().getType());
+ }
+
+ xegpu::TensorDescType getTensorDescType() {
+ return getTensorDesc().getType();
+ }
+ }];
- let assemblyFormat = [{$value `,` $TensorDesc prop-dict attr-dict
- `:` type($value) `,` qualified(type($TensorDesc))}];
- let hasVerifier = 1;
+ let assemblyFormat = [{$value `,` $TensorDesc prop-dict attr-dict
+ `:` type($value) `,` qualified(type($TensorDesc))}];
+ let hasVerifier = 1;
}
def XeGPU_UpdateNdOffsetOp : XeGPU_Op<"update_nd_offset",
@@ -555,137 +554,134 @@ def XeGPU_PrefetchOp : XeGPU_Op<"prefetch", []> {
}
def XeGPU_LoadGatherOp : XeGPU_Op<"load", [
- AllElementTypesMatch<["value", "TensorDesc"]>,
- DeclareOpInterfaceMethods<MemoryEffectsOpInterface>
+ AllElementTypesMatch<["value", "TensorDesc"]>,
+ DeclareOpInterfaceMethods<MemoryEffectsOpInterface>
]> {
- let summary = "load a set of scattered data points from memory.";
-
- let description = [{ It (aka. load) load data per each work-item. The output
- describes the data being loaded at the subgroup level, so its size is
- consistent with the number of work-items in a subgroup. When the chunk size
- is larger than 2, the output vector is a 2D vector, with dim-1 correspoding
- to work-items, and dim-0 corresponding to the chunk size loaded by each work-item.
- Specially, there is a transpose effect on the result (as compared to the TensorDesc)
- due to the hardware implementation. Therefore, a transpose attribute is introduced
- on purpose, making sure users are aware of this implicit transformation.
-
- The mask operand masks out memory access so that it is safe to pass out-of-boundary
- addresses/offsets as long as they are masked. It applies to slots of SIMD lanes.
-
- Example 1:
- ```mlir
- %2 = xegpu.load %1, %0 {l1_hint = #xegpu.cache_hint<cached>,
- l2_hint = #xegpu.cache_hint<uncached>,
- l3_hint = #xegpu.cache_hint<uncached>}
- : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<memory_space=global>>,
- vector<16xi1> -> vector<16xf32>
- ```
+ let summary = "load a set of scattered data points from memory.";
+
+ let description = [{ It (aka. load) load data per each work-item. The output
+ describes the data being loaded at the subgroup level, so its size is
+ consistent with the number of work-items in a subgroup. When the chunk size
+ is larger than 2, the output vector is a 2D vector, with dim-1 correspoding
+ to work-items, and dim-0 corresponding to the chunk size loaded by each work-item.
+ Specially, there is a transpose effect on the result (as compared to the TensorDesc)
+ due to the hardware implementation. Therefore, a transpose attribute is introduced
+ on purpose, making sure users are aware of this implicit transformation.
+
+ The mask operand masks out memory access so that it is safe to pass out-of-boundary
+ addresses/offsets as long as they are masked. It applies to slots of SIMD lanes.
+
+ Example 1:
+ ```mlir
+ %2 = xegpu.load %1, %0 {l1_hint = #xegpu.cache_hint<cached>,
+ l2_hint = #xegpu.cache_hint<uncached>,
+ l3_hint = #xegpu.cache_hint<uncached>}
+ : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<memory_space=global>>,
+ vector<16xi1> -> vector<16xf32>
+ ```
- Example 2:
- ```mlir
- %2 = xegpu.load %1, %0 {transpose,
- l1_hint = #xegpu.cache_hint<cached>,
- l2_hint = #xegpu.cache_hint<uncached>,
- l3_hint = #xegpu.cache_hint<uncached>}
- : !xegpu.tensor_desc<16x8xf32, #xegpu.scatter_tdesc_attr<memory_space=global, chunk_size=8>>,
- vector<16xi1> -> vector<8x16xf32>
- ```
+ Example 2:
+ ```mlir
+ %2 = xegpu.load %1, %0 {transpose,
+ l1_hint = #xegpu.cache_hint<cached>,
+ l2_hint = #xegpu.cache_hint<uncached>,
+ l3_hint = #xegpu.cache_hint<uncached>}
+ : !xegpu.tensor_desc<16x8xf32, #xegpu.scatter_tdesc_attr<memory_space=global, chunk_size=8>>,
+ vector<16xi1> -> vector<8x16xf32>
+ ```
- }];
+ }];
- let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
- XeGPU_MaskType: $mask,
- OptionalAttr<UnitAttr>: $transpose,
- OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
- let results = (outs XeGPU_ValueType: $value);
+ let arguments = (ins XeGPU_TensorDesc: $TensorDesc,
+ XeGPU_MaskType: $mask,
+ OptionalAttr<UnitAttr>: $transpose,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
+ let results = (outs XeGPU_ValueType: $value);
- let extraClassDeclaration = extraBaseClassDeclaration # [{
- xegpu::TensorDescType getTensorDescType() {
- return getTensorDesc().getType();
- }
+ let extraClassDeclaration = extraBaseClassDeclaration # [{
+ xegpu::TensorDescType getTensorDescType() {
+ return getTensorDesc().getType();
+ }
- mlir::Type getElementType() {
- auto type = getValue().getType();
- return getElementTypeOrSelf(type);
- }
+ mlir::Type getElementType() {
+ auto type = getValue().getType();
+ return getElementTypeOrSelf(type);
+ }
- Type getValueType() {
- return getValue().getType();
- }
+ Type getValueType() {
+ return getValue().getType();
+ }
- Type getMaskType() {
- return getMask().getType();
- }
+ Type getMaskType() {
+ return getMask().getType();
+ }
- }];
+ }];
- let assemblyFormat = [{$TensorDesc `,` $mask prop-dict attr-dict
- `:` qualified(type($TensorDesc)) `,` type($mask) `->` type($value)}];
+ let assemblyFormat = [{$TensorDesc `,` $mask prop-dict attr-dict
+ `:` qualified(type($TensorDesc)) `,` type($mask) `->` type($value)}];
- let hasVerifier = 1;
+ let hasVerifier = 1;
}
-def XeGPU_StoreScatterOp : XeGPU_Op<"store",
- [
- AllElementTypesMatch<["value", "TensorDesc"]>,
- DeclareOpInterfaceMethods<MemoryEffectsOpInterface>
- ]> {
- let summary = "store data to scattered memory locations.";
- let description = [{ It (aka. store) stores data to scattered memory locations. The value is
- typically a 1D vector. But when the chunk size of the TensorDesc is larger than 1, it will be
- a 2D vector instead. For the later case, dim-1 of the value correspods to the simd lanes
- and the dim-0 of the value corresponds to the chunk size stored per lane. So `store_scatter`
- has transpose effect, which is similar to `load_gather`. Therefore, a transpose attribute is
- introduced on purpose, making sure users are aware of this implicit transformation.
-
- Example 1:
- ```mlir
- %3 = xegpu.store %0, %1, %2 {l1_hint = #xegpu.cache_hint<uncached>,
- l2_hint = #xegpu.cache_hint<write_back>,
- l3_hint = #xegpu.cache_hint<write_through>}
- : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.scattered_tdesc_attr<>>, vector<16xi1>
- ```
+def XeGPU_StoreScatterOp : XeGPU_Op<"store", [
+ AllElementTypesMatch<["value", "TensorDesc"]>, DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
+ let summary = "store data to scattered memory locations.";
+ let description = [{ It (aka. store) stores data to scattered memory locations. The value is
+ typically a 1D vector. But when the chunk size of the TensorDesc is larger than 1, it will be
+ a 2D vector instead. For the later case, dim-1 of the value correspods to the simd lanes
+ and the dim-0 of the value corresponds to the chunk size stored per lane. So `store_scatter`
+ has transpose effect, which is similar to `load_gather`. Therefore, a transpose attribute is
+ introduced on purpose, making sure users are aware of this implicit transformation.
+
+ Example 1:
+ ```mlir
+ %3 = xegpu.store %0, %1, %2 {l1_hint = #xegpu.cache_hint<uncached>,
+ l2_hint = #xegpu.cache_hint<write_back>,
+ l3_hint = #xegpu.cache_hint<write_through>}
+ : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.scattered_tdesc_attr<>>, vector<16xi1>
+ ```
- Example 2:
- ```mlir
- %3 = xegpu.store %0, %1, %2 {transpose,
- l1_hint = #xegpu.cache_hint<uncached>,
- l2_hint = #xegpu.cache_hint<write_back>,
- l3_hint = #xegpu.cache_hint<write_through>}
- : vector<8x16xf32>, !xegpu.tensor_desc<16x8xf32, #xegpu.scattered_tdesc_attr<chunk_size=8>>, vector<16xi1>
- ```
+ Example 2:
+ ```mlir
+ %3 = xegpu.store %0, %1, %2 {transpose,
+ l1_hint = #xegpu.cache_hint<uncached>,
+ l2_hint = #xegpu.cache_hint<write_back>,
+ l3_hint = #xegpu.cache_hint<write_through>}
+ : vector<8x16xf32>, !xegpu.tensor_desc<16x8xf32, #xegpu.scattered_tdesc_attr<chunk_size=8>>, vector<16xi1>
+ ```
- }];
+ }];
- let arguments = (ins
- XeGPU_ValueType: $value,
- XeGPU_TensorDesc: $TensorDesc,
- XeGPU_MaskType: $mask,
- OptionalAttr<UnitAttr>: $transpose,
- OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
- OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
+ let arguments = (ins
+ XeGPU_ValueType: $value,
+ XeGPU_TensorDesc: $TensorDesc,
+ XeGPU_MaskType: $mask,
+ OptionalAttr<UnitAttr>: $transpose,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l1_hint,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l2_hint,
+ OptionalAttr<XeGPU_CacheHintAttr>: $l3_hint);
- let extraClassDeclaration = extraBaseClassDeclaration # [{
- xegpu::TensorDescType getTensorDescType() {
- return getTensorDesc().getType();
- }
+ let extraClassDeclaration = extraBaseClassDeclaration # [{
+ xegpu::TensorDescType getTensorDescType() {
+ return getTensorDesc().getType();
+ }
- Type getValueType() {
- return getValue().getType();
- }
+ Type getValueType() {
+ return getValue().getType();
+ }
- Type getMaskType() {
- return getMask().getType();
- }
- }];
+ Type getMaskType() {
+ return getMask().getType();
+ }
+ }];
- let assemblyFormat = [{$value `,` $TensorDesc `,` $mask prop-dict attr-dict
- `:` type($value) `,` qualified(type($TensorDesc)) `,` type($mask)}];
+ let assemblyFormat = [{$value `,` $TensorDesc `,` $mask prop-dict attr-dict
+ `:` type($value) `,` qualified(type($TensorDesc)) `,` type($mask)}];
- let hasVerifier = 1;
+ let hasVerifier = 1;
}
def XeGPU_UpdateOffsetOp: XeGPU_Op<"update_offset",
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index d015e5772a94f2..443a1347334e23 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -506,7 +506,7 @@ LogicalResult PrefetchOp::verify() {
}
//===----------------------------------------------------------------------===//
-// XeGPU_jrOp
+// XeGPU_LoadGatherOp
//===----------------------------------------------------------------------===//
LogicalResult LoadGatherOp::verify() {
auto tdescTy = getTensorDescType();
>From ffae0295ea78ce2ae00af83227063edeb04e4f20 Mon Sep 17 00:00:00 2001
From: Charitha Saumya <charitha.saumya.gusthinna.waduge at intel.com>
Date: Mon, 3 Feb 2025 16:48:54 +0000
Subject: [PATCH 3/3] add mem side effects interface
---
.../include/mlir/Dialect/XeGPU/IR/XeGPUOps.td | 11 ++++-----
mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp | 24 -------------------
2 files changed, 5 insertions(+), 30 deletions(-)
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
index 0ff723005d4359..7560ede058faa3 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
@@ -277,8 +277,7 @@ def XeGPU_PrefetchNdOp : XeGPU_Op<"prefetch_nd", []> {
def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
- AllElementTypesMatch<["value", "TensorDesc"]>,
- DeclareOpInterfaceMethods<MemoryEffectsOpInterface>
+ AllElementTypesMatch<["value", "TensorDesc"]>, MemoryEffects<[MemRead]>
]> {
let summary = "loads a n-D block from memory (represented by TensorDesc)"
"to registers (represented by vector)";
@@ -331,7 +330,7 @@ def XeGPU_LoadNdOp : XeGPU_Op<"load_nd", [
}
def XeGPU_StoreNdOp : XeGPU_Op<"store_nd", [
- AllElementTypesMatch<["value", "TensorDesc"]>, DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
+ AllElementTypesMatch<["value", "TensorDesc"]>, MemoryEffects<[MemWrite]>
]> {
let summary = "stores a n-D block register region back to memory, currently only supports 2D";
@@ -554,8 +553,7 @@ def XeGPU_PrefetchOp : XeGPU_Op<"prefetch", []> {
}
def XeGPU_LoadGatherOp : XeGPU_Op<"load", [
- AllElementTypesMatch<["value", "TensorDesc"]>,
- DeclareOpInterfaceMethods<MemoryEffectsOpInterface>
+ AllElementTypesMatch<["value", "TensorDesc"]>, MemoryEffects<[MemRead]>
]> {
let summary = "load a set of scattered data points from memory.";
@@ -627,7 +625,8 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load", [
}
def XeGPU_StoreScatterOp : XeGPU_Op<"store", [
- AllElementTypesMatch<["value", "TensorDesc"]>, DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
+ AllElementTypesMatch<["value", "TensorDesc"]>, MemoryEffects<[MemWrite]>
+ ]> {
let summary = "store data to scattered memory locations.";
let description = [{ It (aka. store) stores data to scattered memory locations. The value is
typically a 1D vector. But when the chunk size of the TensorDesc is larger than 1, it will be
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index 443a1347334e23..cd883baa986b85 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -324,12 +324,6 @@ LogicalResult LoadNdOp::verify() {
return success();
}
-void LoadNdOp::getEffects(
- SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
- &effects) {
- effects.emplace_back(MemoryEffects::Read::get());
-}
-
//===----------------------------------------------------------------------===//
// XeGPU_StoreNdOp
//===----------------------------------------------------------------------===//
@@ -367,12 +361,6 @@ LogicalResult StoreNdOp::verify() {
return success();
}
-void StoreNdOp::getEffects(
- SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
- &effects) {
- effects.emplace_back(MemoryEffects::Write::get());
-}
-
//===----------------------------------------------------------------------===//
// XeGPU_UpdateNDOffsetOp
//===----------------------------------------------------------------------===//
@@ -565,12 +553,6 @@ LogicalResult LoadGatherOp::verify() {
return success();
}
-void LoadGatherOp::getEffects(
- SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
- &effects) {
- effects.emplace_back(MemoryEffects::Read::get());
-}
-
//===----------------------------------------------------------------------===//
// XeGPU_StoreScatterOp
//===----------------------------------------------------------------------===//
@@ -623,12 +605,6 @@ LogicalResult StoreScatterOp::verify() {
return success();
}
-void StoreScatterOp::getEffects(
- SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>
- &effects) {
- effects.emplace_back(MemoryEffects::Write::get());
-}
-
//===----------------------------------------------------------------------===//
// XeGPU_UpdateOffsetOp
//===----------------------------------------------------------------------===//
More information about the Mlir-commits
mailing list