[Mlir-commits] [mlir] [mlir][XeGPU] Fix double spaces in tests after ODS printer fix. NFC. (PR #185324)

Sun Mar 8 15:27:07 PDT 2026

llvmbot wrote:




@llvm/pr-subscribers-mlir-gpu

Author: Jakub Kuderski (kuhar)

<details>
<summary>Changes</summary>

Follow-up to #184253. Update tests that checked for the old double-space output of gpu.block_id using GPU_DimensionAttr.

---
Full diff: https://github.com/llvm/llvm-project/pull/185324.diff


7 Files Affected:

- (modified) mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir (+4-4) 
- (modified) mlir/test/Dialect/XeGPU/sg-to-wi-experimental.mlir (+12-12) 
- (modified) mlir/test/Dialect/XeGPU/subgroup-distribute.mlir (+3-3) 
- (modified) mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops-rr.mlir (+1-1) 
- (modified) mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir (+4-4) 
- (modified) mlir/test/Dialect/XeGPU/xegpu-wg-to-sg.mlir (+2-2) 
- (modified) mlir/test/Integration/Dialect/XeGPU/LANE/no-xegpu-ops.mlir (+3-3) 


``````````diff

diff --git a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
index ffbe95b2a6f84..39fd815b1b380 100644
--- a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
+++ b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir
@@ -173,8 +173,8 @@ gpu.module @test {
     %c128 = arith.constant 128 : index
     %c8192 = arith.constant 8192 : index
     %c0 = arith.constant 0 : index
-    %block_id_x = gpu.block_id  x
-    %block_id_y = gpu.block_id  y
+    %block_id_x = gpu.block_id x
+    %block_id_y = gpu.block_id y
     %0 = affine.apply affine_map<()[s0] -> (s0 * 128)>()[%block_id_x]
     %1 = affine.apply affine_map<()[s0] -> (s0 * 128)>()[%block_id_y]
     // CHECK: %2 = scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%{{.*}} = %{{.*}}) -> (vector<128x128xf32>) {
@@ -222,8 +222,8 @@ gpu.module @test {
     %c16 = arith.constant 16 : index
     %c8192 = arith.constant 8192 : index
     %c0 = arith.constant 0 : index
-    %block_id_x = gpu.block_id  x
-    %block_id_y = gpu.block_id  y
+    %block_id_x = gpu.block_id x
+    %block_id_y = gpu.block_id y
     %4 = xegpu.create_nd_tdesc %arg0 : memref<2048x8192xf16> -> !xegpu.tensor_desc<32x16xf16, #xegpu.block_tdesc_attr<boundary_check = false>>
     %5 = xegpu.load_nd %4[%block_id_x, %c0]  : !xegpu.tensor_desc<32x16xf16, #xegpu.block_tdesc_attr<boundary_check = false>> -> vector<32x16xf16>
     %6 = xegpu.create_nd_tdesc %arg1 : memref<8192x4096xf16> -> !xegpu.tensor_desc<16x64xf16, #xegpu.block_tdesc_attr<boundary_check = false>>
diff --git a/mlir/test/Dialect/XeGPU/sg-to-wi-experimental.mlir b/mlir/test/Dialect/XeGPU/sg-to-wi-experimental.mlir
index 9172cd3018b71..4e9cc20eb3d96 100644
--- a/mlir/test/Dialect/XeGPU/sg-to-wi-experimental.mlir
+++ b/mlir/test/Dialect/XeGPU/sg-to-wi-experimental.mlir
@@ -6,8 +6,8 @@
 // CHECK-DAG  : %[[C16:.*]] = arith.constant 16 : index
 // CHECK-DAG  : %[[C8:.*]] = arith.constant 8 : index
 // CHECK-DAG  : %[[C1024:.*]] = arith.constant 1024 : index
-// CHECK-DAG  : %[[BID_X:.*]] = gpu.block_id  x
-// CHECK-DAG  : %[[BID_Y:.*]] = gpu.block_id  y
+// CHECK-DAG  : %[[BID_X:.*]] = gpu.block_id x
+// CHECK-DAG  : %[[BID_Y:.*]] = gpu.block_id y
 // CHECK-DAG  : %[[MUL_X:.*]] = arith.muli %[[BID_X]], %[[C8]] : index
 // CHECK-DAG  : %[[MUL_Y:.*]] = arith.muli %[[BID_Y]], %[[C16]] : index
 // CHECK      : %[[TD_C:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32>
@@ -32,8 +32,8 @@ gpu.func @gemm(%arg0: memref<1024x1024xbf16>, %arg1: memref<1024x1024xbf16>, %ar
   %c16 = arith.constant 16 : index
   %c8 = arith.constant 8 : index
   %c1024 = arith.constant 1024 : index
-  %block_id_x = gpu.block_id  x
-  %block_id_y = gpu.block_id  y
+  %block_id_x = gpu.block_id x
+  %block_id_y = gpu.block_id y
   %0 = arith.muli %block_id_x, %c8 : index
   %1 = arith.muli %block_id_y, %c16 : index
   %2 = xegpu.create_nd_tdesc %arg2 : memref<1024x1024xf32> ->
@@ -77,8 +77,8 @@ gpu.func @gemm(%arg0: memref<1024x1024xbf16>, %arg1: memref<1024x1024xbf16>, %ar
 // CHECK-DAG  : %[[C8:.*]] = arith.constant 8 : index
 // CHECK-DAG  : %[[C1024:.*]] = arith.constant 1024 : index
 // CHECK      : %[[CST:.*]] = arith.constant dense<1.000000e+00> : vector<8x1xbf16>
-// CHECK-DAG  : %[[BID_X:.*]] = gpu.block_id  x
-// CHECK-DAG  : %[[BID_Y:.*]] = gpu.block_id  y
+// CHECK-DAG  : %[[BID_X:.*]] = gpu.block_id x
+// CHECK-DAG  : %[[BID_Y:.*]] = gpu.block_id y
 // CHECK-DAG  : %[[MUL_X:.*]] = arith.muli %[[BID_X]], %[[C8]] : index
 // CHECK-DAG  : %[[MUL_Y:.*]] = arith.muli %[[BID_Y]], %[[C16]] : index
 // CHECK      : %[[TD_C:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32>
@@ -106,8 +106,8 @@ gpu.func @gemm_with_preop(%arg0: memref<1024x1024xbf16>, %arg1: memref<1024x1024
   %c8 = arith.constant 8 : index
   %c1024 = arith.constant 1024 : index
   %cst = arith.constant  {layout_result_0 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} dense<1.0> : vector<8x16xbf16>
-  %block_id_x = gpu.block_id  x
-  %block_id_y = gpu.block_id  y
+  %block_id_x = gpu.block_id x
+  %block_id_y = gpu.block_id y
   %0 = arith.muli %block_id_x, %c8 : index
   %1 = arith.muli %block_id_y, %c16 : index
   %2 = xegpu.create_nd_tdesc %arg2 : memref<1024x1024xf32> ->
@@ -151,8 +151,8 @@ gpu.func @gemm_with_preop(%arg0: memref<1024x1024xbf16>, %arg1: memref<1024x1024
 // CHECK-DAG  : %[[C16:.*]] = arith.constant 16 : index
 // CHECK-DAG  : %[[C8:.*]] = arith.constant 8 : index
 // CHECK-DAG  : %[[C1024:.*]] = arith.constant 1024 : index
-// CHECK-DAG  : %[[BID_X:.*]] = gpu.block_id  x
-// CHECK-DAG  : %[[BID_Y:.*]] = gpu.block_id  y
+// CHECK-DAG  : %[[BID_X:.*]] = gpu.block_id x
+// CHECK-DAG  : %[[BID_Y:.*]] = gpu.block_id y
 // CHECK-DAG  : %[[MUL_X:.*]] = arith.muli %[[BID_X]], %[[C8]] : index
 // CHECK-DAG  : %[[MUL_Y:.*]] = arith.muli %[[BID_Y]], %[[C16]] : index
 // CHECK      : %[[TD_C:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32>
@@ -176,8 +176,8 @@ gpu.func @gemm_with_postop(%arg0: memref<1024x1024xbf16>, %arg1: memref<1024x102
   %c16 = arith.constant 16 : index
   %c8 = arith.constant 8 : index
   %c1024 = arith.constant 1024 : index
-  %block_id_x = gpu.block_id  x
-  %block_id_y = gpu.block_id  y
+  %block_id_x = gpu.block_id x
+  %block_id_y = gpu.block_id y
   %0 = arith.muli %block_id_x, %c8 : index
   %1 = arith.muli %block_id_y, %c16 : index
   %2 = xegpu.create_nd_tdesc %arg2 : memref<1024x1024xf32> ->
diff --git a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
index b8c92ec8d6f87..c3cdc79d9f70e 100644
--- a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
+++ b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
@@ -79,8 +79,8 @@ gpu.func @gemm(%arg0: memref<1024x1024xbf16>, %arg1: memref<1024x1024xbf16>, %ar
   %c16 = arith.constant 16 : index
   %c8 = arith.constant 8 : index
   %c1024 = arith.constant 1024 : index
-  %block_id_x = gpu.block_id  x
-  %block_id_y = gpu.block_id  y
+  %block_id_x = gpu.block_id x
+  %block_id_y = gpu.block_id y
   %0 = arith.muli %block_id_x, %c8 : index
   %1 = arith.muli %block_id_y, %c16 : index
   %2 = xegpu.create_nd_tdesc %arg2 : memref<1024x1024xf32> ->
@@ -385,7 +385,7 @@ gpu.module @xevm_module{
 gpu.module @xevm_module{
    gpu.func  @vector_shape_cast_scalar_to_vector(%arg0: memref<16xf16>, %arg1: memref<16x16xf16>) {
     %c0 = arith.constant 0 : index
-    %9 = gpu.block_id  x
+    %9 = gpu.block_id x
     %10 = arith.index_cast %9 : index to i16
     %11 = arith.bitcast %10 : i16 to f16
     // CHECK: vector.broadcast {{.*}} : f16 to vector<16xf16>
diff --git a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops-rr.mlir b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops-rr.mlir
index 29385406007ba..28865c2ec50bb 100644
--- a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops-rr.mlir
+++ b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops-rr.mlir
@@ -154,7 +154,7 @@ gpu.module @test_distribution {
   // CHECK: %[[BCAST:.*]] = vector.broadcast %[[CAST]] : vector<8x1xf32> to vector<8x128xf32>
   gpu.func @distribute_shapecast_expandunitdims_broadcast(%arg0: memref<4096x128xf32>, %arg1: memref<4096x128xf32>) {
     %cst_0 = arith.constant {layout_result_0=#xegpu.slice<#xegpu.layout<sg_layout = [32, 1], sg_data = [8, 128], inst_data = [8, 16]>, dims = [1]>} dense<0xFF800000> : vector<256xf32>
-    %block_id_x = gpu.block_id  x
+    %block_id_x = gpu.block_id x
     %0 = xegpu.create_nd_tdesc %arg0 : memref<4096x128xf32> -> !xegpu.tensor_desc<256x128xf32, #xegpu.block_tdesc_attr<boundary_check = false>, #xegpu.layout<sg_layout = [32, 1], sg_data = [8, 128], inst_data = [8, 16]>>
     %1 = xegpu.load_nd %0[%block_id_x, 0] {layout = #xegpu.layout<sg_layout = [32, 1], sg_data = [8, 128], inst_data = [8, 16]>}  : !xegpu.tensor_desc<256x128xf32, #xegpu.block_tdesc_attr<boundary_check = false>, #xegpu.layout<sg_layout = [32, 1], sg_data = [8, 128], inst_data = [8, 16]>> -> vector<256x128xf32>
     %2 = vector.multi_reduction <maximumf>, %1, %cst_0 {layout_result_0 = #xegpu.slice<#xegpu.layout<sg_layout = [32, 1], sg_data = [8, 128], inst_data = [8, 16]>, dims = [1]>} [1] : vector<256x128xf32> to vector<256xf32>
diff --git a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir
index 2b1655a7ac44f..9a7ea4d1c2f43 100644
--- a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir
+++ b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir
@@ -169,8 +169,8 @@ gpu.module @test_distribution {
     %c0 = arith.constant 0 : index
     %c128 = arith.constant 128 : index
     %c1024 = arith.constant 1024 : index
-    %block_id_x = gpu.block_id  x
-    %block_id_y = gpu.block_id  y
+    %block_id_x = gpu.block_id x
+    %block_id_y = gpu.block_id y
     %0 = arith.muli %block_id_x, %c128 : index
     %1 = arith.muli %block_id_y, %c128 : index
     %2 = xegpu.create_nd_tdesc %arg2 : memref<1024x1024xf32> -> !xegpu.tensor_desc<128x128xf32, #xegpu.layout<sg_layout = [8, 8], sg_data = [16, 16]>>
@@ -832,8 +832,8 @@ gpu.module @test_distribution {
     %c4096 = arith.constant 4096 : index
     %c0 = arith.constant 0 : index
     %c256 = arith.constant 256 : index
-    %block_id_x = gpu.block_id  x
-    %block_id_y = gpu.block_id  y
+    %block_id_x = gpu.block_id x
+    %block_id_y = gpu.block_id y
     %0 = arith.muli %block_id_x, %c256 overflow<nsw> : index
     %1 = arith.muli %block_id_y, %c256 overflow<nsw> : index
     %2 = xegpu.create_nd_tdesc %arg0 : memref<4096x4096xf32> -> !xegpu.tensor_desc<256x256xf32, #xegpu.block_tdesc_attr<boundary_check = false>, #xegpu.layout<sg_layout = [8, 8], sg_data = [32, 32], inst_data = [8, 16]>>
diff --git a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg.mlir b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg.mlir
index 767113a043557..61ca028dd3ea1 100644
--- a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg.mlir
+++ b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg.mlir
@@ -178,8 +178,8 @@ gpu.module @test_1_1_assignment {
     %c0 = arith.constant 0 : index
     %c128 = arith.constant 128 : index
     %c1024 = arith.constant 1024 : index
-    %block_id_x = gpu.block_id  x
-    %block_id_y = gpu.block_id  y
+    %block_id_x = gpu.block_id x
+    %block_id_y = gpu.block_id y
     %0 = arith.muli %block_id_x, %c128 : index
     %1 = arith.muli %block_id_y, %c128 : index
     %2 = xegpu.create_nd_tdesc %arg2[%0, %1] : memref<1024x1024xf32> -> !xegpu.tensor_desc<128x128xf32, #xegpu.layout<sg_layout = [8, 8], sg_data = [16, 16]>>
diff --git a/mlir/test/Integration/Dialect/XeGPU/LANE/no-xegpu-ops.mlir b/mlir/test/Integration/Dialect/XeGPU/LANE/no-xegpu-ops.mlir
index 548e1864b1a05..cb2dce90fe668 100644
--- a/mlir/test/Integration/Dialect/XeGPU/LANE/no-xegpu-ops.mlir
+++ b/mlir/test/Integration/Dialect/XeGPU/LANE/no-xegpu-ops.mlir
@@ -36,9 +36,9 @@ module @add attributes {gpu.container_module} {
   }
   gpu.module @test_kernel {
     gpu.func @test_kernel(%arg0: memref<2x2x2xf32>, %arg1: memref<2x2x2xf32>, %arg2: memref<2x2x2xf32>) kernel {
-      %0 = gpu.block_id  x
-      %1 = gpu.block_id  y
-      %2 = gpu.block_id  z
+      %0 = gpu.block_id x
+      %1 = gpu.block_id y
+      %2 = gpu.block_id z
       %3 = memref.load %arg0[%0, %1, %2] : memref<2x2x2xf32>
       %4 = memref.load %arg1[%0, %1, %2] : memref<2x2x2xf32>
       %5 = arith.addf %3, %4 : f32

``````````

</details>


https://github.com/llvm/llvm-project/pull/185324