[Mlir-commits] [mlir] [MLIR][XeGPU] Remove offsets from create_nd_tdesc & remove update_nd_offset, move offsets to load/store/prefetch ops (PR #193330)

Thu Apr 23 14:46:55 PDT 2026

================
@@ -1,963 +0,0 @@
-// RUN: mlir-opt --xegpu-wg-to-sg-distribute -split-input-file %s | FileCheck %s
-gpu.module @test_distribution {
-  // CHECK-LABEL: create_nd_tdesc_no_offset
-  // CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
-  gpu.func @create_nd_tdesc_no_offset(%src: memref<256x128xf32>) {
-    // CHECK: xegpu.create_nd_tdesc %[[ARG_0]] : memref<256x128xf32>
-    // CHECK-SAME: -> !xegpu.tensor_desc<32x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
-    %tdesc = xegpu.create_nd_tdesc %src : memref<256x128xf32>
-        -> !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
-      gpu.return
-  }
-
-  // CHECK-LABEL: create_nd_tdesc_with_ptr
-  // CHECK-SAME: %[[ARG_0:.*]]: ui64
-  gpu.func @create_nd_tdesc_with_ptr(%src: ui64, %w : index, %h : index, %x : index, %y : index) {
-    // CHECK: xegpu.create_nd_tdesc %[[ARG_0]], shape : [{{.*}}, {{.*}}], strides : [{{.*}}, {{.*}}] : ui64
-    // CHECK-SAME: -> !xegpu.tensor_desc<32x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
-    %c1 = arith.constant 1 : index
-    %tdesc = xegpu.create_nd_tdesc %src, shape:[%h, %w], strides: [%w, %c1] : ui64
-        -> !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
-      gpu.return
-  }
-
-  // CHECK-LABEL: load_nd_tdesc_with_offset
-  gpu.func @load_nd_tdesc_with_offset(%src: memref<256x128xf32>) {
-    //CHECK: %[[TDESC:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<256x128xf32> -> !xegpu.tensor_desc<32x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
-    //CHECK-DAG: %[[SGID:.*]] = gpu.subgroup_id : index
-    //CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index
-    //CHECK-DAG: %[[SGIDX:.*]] = arith.remui %[[SGID]], %[[C4]]
-    //CHECK-DAG: %[[SGIDY_TMP:.*]] = arith.divui %[[SGID]], %[[C4]]
-    //CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
-    //CHECK-DAG: %[[SGIDY:.*]] = arith.remui %[[SGIDY_TMP]], %[[C8]]
-    //CHECK-DAG: %[[C32:.*]] = arith.constant 32 : index
-    //CHECK-DAG: %[[L_OFF_Y:.*]] = arith.muli %[[SGIDY]], %[[C32]] : index
-    //CHECK-DAG: %[[L_OFF_X:.*]] = arith.muli %[[SGIDX]], %[[C32_1:.*]] : index
-    //CHECK-DAG: %[[C256:.*]] = arith.constant 256 : index
-    //CHECK-DAG: %[[OFF_Y:.*]] = arith.remui %[[L_OFF_Y]], %[[C256]] : index
-    //CHECK-DAG: %[[C128:.*]] = arith.constant 128 : index
-    //CHECK-DAG: %[[OFF_X:.*]] = arith.remui %[[L_OFF_X]], %[[C128]] : index
-    //CHECK-DAG: %[[LOAD:.*]] = xegpu.load_nd %[[TDESC]][{{%.*}}, {{%.*}}] <{layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}> : !xegpu.tensor_desc<32x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<32x32xf32>
-    %tdesc = xegpu.create_nd_tdesc %src : memref<256x128xf32>
-      -> !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
-    %load =  xegpu.load_nd %tdesc[0, 0] {layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>}
-      : !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
-      -> vector<256x128xf32>
-    gpu.return
-  }
-
-  // CHECK-LABEL: store_nd_with_offsets
-  // CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
-  gpu.func @store_nd_with_offsets(%src: memref<256x128xf32>) {
-    //CHECK: xegpu.store_nd %{{.*}}, {{%.*}}[{{%.*}}, {{%.*}}] <{layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>}>  : vector<32x32xf32>, !xegpu.tensor_desc<32x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
-    %tdesc = xegpu.create_nd_tdesc %src: memref<256x128xf32>
-      -> !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
-    %load =  xegpu.load_nd %tdesc[0, 0] {layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>}
-      : !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
-      -> vector<256x128xf32>
-    xegpu.store_nd %load, %tdesc[0, 0] {layout = #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>}
-      : vector<256x128xf32>, !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
-    gpu.return
-}
-
-  // CHECK-LABEL: prefetch_nd_tdesc_with_offset
-  // CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
-  gpu.func @prefetch_nd_tdesc_with_offset(%src: memref<256x128xf32>) {
----------------
Jianhui-Li wrote:

not sure this one also - why removed? 

https://github.com/llvm/llvm-project/pull/193330