[Mlir-commits] [mlir] [MLIR][XeGPU] Remove offsets from create_nd_tdesc & remove update_nd_offset, move offsets to load/store/prefetch ops (PR #193330)
Jianhui Li
llvmlistbot at llvm.org
Thu Apr 23 14:46:55 PDT 2026
================
@@ -1,963 +0,0 @@
-// RUN: mlir-opt --xegpu-wg-to-sg-distribute -split-input-file %s | FileCheck %s
-gpu.module @test_distribution {
- // CHECK-LABEL: create_nd_tdesc_no_offset
- // CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
- gpu.func @create_nd_tdesc_no_offset(%src: memref<256x128xf32>) {
- // CHECK: xegpu.create_nd_tdesc %[[ARG_0]] : memref<256x128xf32>
- // CHECK-SAME: -> !xegpu.tensor_desc<32x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- %tdesc = xegpu.create_nd_tdesc %src : memref<256x128xf32>
- -> !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
- gpu.return
- }
-
- // CHECK-LABEL: create_nd_tdesc_with_ptr
- // CHECK-SAME: %[[ARG_0:.*]]: ui64
- gpu.func @create_nd_tdesc_with_ptr(%src: ui64, %w : index, %h : index, %x : index, %y : index) {
- // CHECK: xegpu.create_nd_tdesc %[[ARG_0]], shape : [{{.*}}, {{.*}}], strides : [{{.*}}, {{.*}}] : ui64
- // CHECK-SAME: -> !xegpu.tensor_desc<32x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- %c1 = arith.constant 1 : index
- %tdesc = xegpu.create_nd_tdesc %src, shape:[%h, %w], strides: [%w, %c1] : ui64
- -> !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
- gpu.return
- }
-
- // CHECK-LABEL: load_nd_tdesc_with_offset
- gpu.func @load_nd_tdesc_with_offset(%src: memref<256x128xf32>) {
- //CHECK: %[[TDESC:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<256x128xf32> -> !xegpu.tensor_desc<32x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- //CHECK-DAG: %[[SGID:.*]] = gpu.subgroup_id : index
- //CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
- //CHECK-DAG: %[[SGIDX:.*]] = arith.remui %[[SGID]], %[[C4]]
- //CHECK-DAG: %[[SGIDY_TMP:.*]] = arith.divui %[[SGID]], %[[C4]]
- //CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
- //CHECK-DAG: %[[SGIDY:.*]] = arith.remui %[[SGIDY_TMP]], %[[C8]]
- //CHECK-DAG: %[[C32:.*]] = arith.constant 32 : index
- //CHECK-DAG: %[[L_OFF_Y:.*]] = arith.muli %[[SGIDY]], %[[C32]] : index
- //CHECK-DAG: %[[L_OFF_X:.*]] = arith.muli %[[SGIDX]], %[[C32_1:.*]] : index
- //CHECK-DAG: %[[C256:.*]] = arith.constant 256 : index
- //CHECK-DAG: %[[OFF_Y:.*]] = arith.remui %[[L_OFF_Y]], %[[C256]] : index
- //CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index
- //CHECK-DAG: %[[OFF_X:.*]] = arith.remui %[[L_OFF_X]], %[[C128]] : index
- //CHECK-DAG: %[[LOAD:.*]] = xegpu.load_nd %[[TDESC]][{{%.*}}, {{%.*}}] <{layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}> : !xegpu.tensor_desc<32x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<32x32xf32>
- %tdesc = xegpu.create_nd_tdesc %src : memref<256x128xf32>
- -> !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
- %load = xegpu.load_nd %tdesc[0, 0] {layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>}
- : !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
- -> vector<256x128xf32>
- gpu.return
- }
-
- // CHECK-LABEL: store_nd_with_offsets
- // CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
- gpu.func @store_nd_with_offsets(%src: memref<256x128xf32>) {
- //CHECK: xegpu.store_nd %{{.*}}, {{%.*}}[{{%.*}}, {{%.*}}] <{layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}> : vector<32x32xf32>, !xegpu.tensor_desc<32x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
- %tdesc = xegpu.create_nd_tdesc %src: memref<256x128xf32>
- -> !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
- %load = xegpu.load_nd %tdesc[0, 0] {layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>}
- : !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
- -> vector<256x128xf32>
- xegpu.store_nd %load, %tdesc[0, 0] {layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>}
- : vector<256x128xf32>, !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
- gpu.return
-}
-
- // CHECK-LABEL: prefetch_nd_tdesc_with_offset
- // CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
- gpu.func @prefetch_nd_tdesc_with_offset(%src: memref<256x128xf32>) {
----------------
Jianhui-Li wrote:
not sure this one also - why removed?
https://github.com/llvm/llvm-project/pull/193330
More information about the Mlir-commits
mailing list