[Mlir-commits] [mlir] ed45c05 - [MLIR][XeGPU] fix load/store/prefetch op offset verifier (#166137)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Nov 4 06:22:05 PST 2025
Author: Tuomas Kärnä
Date: 2025-11-04T15:22:00+01:00
New Revision: ed45c0571eb35339f7c3562edbb3b27d67594acd
URL: https://github.com/llvm/llvm-project/commit/ed45c0571eb35339f7c3562edbb3b27d67594acd
DIFF: https://github.com/llvm/llvm-project/commit/ed45c0571eb35339f7c3562edbb3b27d67594acd.diff
LOG: [MLIR][XeGPU] fix load/store/prefetch op offset verifier (#166137)
The verifier of `xegpu.{load/store/prefetch}_nd` op fails if `offset` a
mix of static and dynamic values, e.g. `offset = [0, %c0]`. In this case
the length of dynamic offsets is 1 and the check `offsetSize !=
tDescRank` (=2) fails. Instead, we should check the length of
`getMixedOffsets()`.
Added:
Modified:
mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
mlir/test/Dialect/XeGPU/ops.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index c8f5c86c03686..fb51077b5dff3 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -498,11 +498,8 @@ LogicalResult PrefetchNdOp::verify() {
return emitOpError("invalid l3_hint: ") << getL3HintAttr();
int64_t tDescRank = tdescTy.getRank();
- int64_t offsetSize = static_cast<int64_t>(getOffsets().size());
- int64_t constOffsetSize =
- getConstOffsetsAttr() ? getConstOffsetsAttr().size() : 0;
- if (((offsetSize != 0) && (offsetSize != tDescRank)) ||
- ((constOffsetSize != 0) && (constOffsetSize != tDescRank)))
+ int64_t offsetSize = getMixedOffsets().size();
+ if (offsetSize != 0 && offsetSize != tDescRank)
return emitOpError(
"Mismatched ranks between offsets and tensor descriptor");
@@ -623,11 +620,8 @@ LogicalResult LoadNdOp::verify() {
<< tdescTy;
int64_t tDescRank = tdescTy.getRank();
- int64_t offsetSize = static_cast<int64_t>(getOffsets().size());
- int64_t constOffsetSize =
- getConstOffsetsAttr() ? getConstOffsetsAttr().size() : 0;
- if (((offsetSize != 0) && (offsetSize != tDescRank)) ||
- ((constOffsetSize != 0) && (constOffsetSize != tDescRank)))
+ int64_t offsetSize = getMixedOffsets().size();
+ if (offsetSize != 0 && offsetSize != tDescRank)
return emitOpError(
"Mismatched ranks between offsets and tensor descriptor");
@@ -717,11 +711,8 @@ LogicalResult StoreNdOp::verify() {
<< dstTy;
int64_t tDescRank = dstTy.getRank();
- int64_t offsetSize = static_cast<int64_t>(getOffsets().size());
- int64_t constOffsetSize =
- getConstOffsetsAttr() ? getConstOffsetsAttr().size() : 0;
- if (((offsetSize != 0) && (offsetSize != tDescRank)) ||
- ((constOffsetSize != 0) && (constOffsetSize != tDescRank)))
+ int64_t offsetSize = getMixedOffsets().size();
+ if (offsetSize != 0 && offsetSize != tDescRank)
return emitOpError(
"Mismatched ranks between offsets and tensor descriptor");
diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir
index 0a10f6814ae96..9b3829664108d 100644
--- a/mlir/test/Dialect/XeGPU/ops.mlir
+++ b/mlir/test/Dialect/XeGPU/ops.mlir
@@ -278,6 +278,15 @@ gpu.func @subgroup_load_nd_offset_1(%src: memref<24x32xf32>, %x : index, %y : in
gpu.return
}
+// CHECK: func @subgroup_load_nd_offset_2(%[[arg0:.*]]: memref<24x32xf32>, %arg1: index) {
+gpu.func @subgroup_load_nd_offset_2(%src: memref<24x32xf32>, %x : index) {
+ // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0 : memref<24x32xf32> -> !xegpu.tensor_desc<16x8xf32>
+ %1 = xegpu.create_nd_tdesc %src : memref<24x32xf32> -> !xegpu.tensor_desc<16x8xf32>
+ // CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]][%arg1, 0] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>, transpose = array<i64: 1, 0>}> : !xegpu.tensor_desc<16x8xf32> -> vector<8x16xf32>
+ %2 = xegpu.load_nd %1[%x, 0] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>, transpose = array<i64: 1, 0>}> : !xegpu.tensor_desc<16x8xf32> -> vector<8x16xf32>
+ gpu.return
+}
+
// CHECK: func @simt_load_nd_8(%[[arg0:.*]]: memref<24x32xf32>) {
gpu.func @simt_load_nd_8(%src: memref<24x32xf32>) {
// CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<16x8xf32>
More information about the Mlir-commits
mailing list