[Mlir-commits] [mlir] [mlir][NVGPU] Fix double spaces in tests after ODS printer fix. NFC. (PR #185327)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Sun Mar 8 15:30:28 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-gpu
Author: Jakub Kuderski (kuhar)
<details>
<summary>Changes</summary>
Follow-up to #<!-- -->184253. Update tests that checked for the old double-space output of GPU and NVVM ops using GPU_DimensionAttr and SetMaxRegisterActionAttr.
---
Full diff: https://github.com/llvm/llvm-project/pull/185327.diff
8 Files Affected:
- (modified) mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir (+1-1)
- (modified) mlir/test/Dialect/NVGPU/canonicalization.mlir (+2-2)
- (modified) mlir/test/Dialect/NVGPU/tmaload-transform.mlir (+1-1)
- (modified) mlir/test/Dialect/NVGPU/transform-matmul-to-nvvm.mlir (+1-1)
- (modified) mlir/test/Examples/NVGPU/Ch0.py (+1-1)
- (modified) mlir/test/Examples/NVGPU/Ch3.py (+1-1)
- (modified) mlir/test/Examples/NVGPU/Ch4.py (+12-12)
- (modified) mlir/test/Examples/NVGPU/Ch5.py (+11-11)
``````````diff
diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
index 0eb44789fe31d..50bea5a85022e 100644
--- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
@@ -631,7 +631,7 @@ func.func @mbarrier_txcount_pred() {
%mine = arith.constant 1 : index
// CHECK: %[[c0:.+]] = arith.constant 0 : index
// CHECK: %[[mid:.+]] = builtin.unrealized_conversion_cast %[[c0]] : index to i64
- // CHECK: %[[S2:.+]] = gpu.thread_id x
+ // CHECK: %[[S2:.+]] = gpu.thread_id x
// CHECK: %[[P:.+]] = arith.cmpi eq, %[[S2]], %[[c0]] : index
%c0 = arith.constant 0 : index
%tidx = gpu.thread_id x
diff --git a/mlir/test/Dialect/NVGPU/canonicalization.mlir b/mlir/test/Dialect/NVGPU/canonicalization.mlir
index a7fbfd8067395..7f33a79cb102f 100644
--- a/mlir/test/Dialect/NVGPU/canonicalization.mlir
+++ b/mlir/test/Dialect/NVGPU/canonicalization.mlir
@@ -12,13 +12,13 @@ gpu.module @main_kernel {
}
{
// CHECK: %[[c0:.+]] = arith.constant 0 : index
- // CHECK: %[[S0:.+]] = gpu.thread_id x
+ // CHECK: %[[S0:.+]] = gpu.thread_id x
// CHECK: %[[S1:.+]] = arith.cmpi eq, %[[S0]], %[[c0]] : index
// CHECK: %[[S2:.+]] = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>>
// CHECK: %[[S3:.+]] = memref.view %[[S2]][%[[c0]]][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<128x32xf32, #gpu.address_space<workgroup>>
// CHECK: nvgpu.tma.async.store %[[S3]] to %[[arg0]][%[[c0]], %[[c0]]], predicate = %[[S1]] : memref<128x32xf32, #gpu.address_space<workgroup>> -> <tensor = memref<128x32xf32, 3>, swizzle = none, l2promo = none, oob = zero, interleave = none>
%c0 = arith.constant 0 : index
- %0 = gpu.thread_id x
+ %0 = gpu.thread_id x
%1 = arith.cmpi eq, %0, %c0 : index
%2 = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>>
%view = memref.view %2[%c0][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<128x32xf32, #gpu.address_space<workgroup>>
diff --git a/mlir/test/Dialect/NVGPU/tmaload-transform.mlir b/mlir/test/Dialect/NVGPU/tmaload-transform.mlir
index 40acd82cd0558..901f7732797d1 100644
--- a/mlir/test/Dialect/NVGPU/tmaload-transform.mlir
+++ b/mlir/test/Dialect/NVGPU/tmaload-transform.mlir
@@ -38,7 +38,7 @@ func.func @main() {
// CHECK: gpu.barrier
//
// CHECK: %[[c0:.*]] = arith.constant 0 : index
- // CHECK: %[[TIDX:.*]] = gpu.thread_id x
+ // CHECK: %[[TIDX:.*]] = gpu.thread_id x
// CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[TIDX]], %[[c0]] : index
//
// CHECK: scf.if %[[CMP]] {
diff --git a/mlir/test/Dialect/NVGPU/transform-matmul-to-nvvm.mlir b/mlir/test/Dialect/NVGPU/transform-matmul-to-nvvm.mlir
index bbe27fe1b99d9..c6aa2039a9511 100644
--- a/mlir/test/Dialect/NVGPU/transform-matmul-to-nvvm.mlir
+++ b/mlir/test/Dialect/NVGPU/transform-matmul-to-nvvm.mlir
@@ -13,7 +13,7 @@ func.func @matmul_16x8x4xf32_global(
// CHECK-SAME: %[[VAL_1:.*]]: memref<4x8xf32>,
// CHECK-SAME: %[[VAL_2:.*]]: memref<16x8xf32>) {
-// CHECK: %[[TIDX:.*]] = gpu.thread_id x
+// CHECK: %[[TIDX:.*]] = gpu.thread_id x
// CHECK: %[[VAL_4:.*]] = affine.apply #[[$div4]]()[%[[TIDX]]]
// CHECK: %[[VAL_5:.*]] = affine.apply #[[$mod4]]()[%[[TIDX]]]
// CHECK: %[[VAL_6:.*]] = memref.load %[[VAL_0]][%[[VAL_4]], %[[VAL_5]]] : memref<16x4xf32>
diff --git a/mlir/test/Examples/NVGPU/Ch0.py b/mlir/test/Examples/NVGPU/Ch0.py
index e09720a0f3b75..4f1743bb5f17f 100644
--- a/mlir/test/Examples/NVGPU/Ch0.py
+++ b/mlir/test/Examples/NVGPU/Ch0.py
@@ -61,7 +61,7 @@ def kernel():
# DUMPIR: %[[C1_2:.*]] = arith.constant 1 : index
# DUMPIR: %[[C1_3:.*]] = arith.constant 1 : index
# DUMPIR: gpu.launch blocks(%arg1, %arg2, %arg3) in (%arg7 = %[[C1]], %arg8 = %[[C1_0]], %arg9 = %[[C1_1]]) threads(%arg4, %arg5, %arg6) in (%arg10 = %[[C4]], %arg11 = %[[C1_2]], %arg12 = %[[C1_3]]) dynamic_shared_memory_size %[[C0_I32]] {
-# DUMPIR: %[[TIDX:.*]] = gpu.thread_id x
+# DUMPIR: %[[TIDX:.*]] = gpu.thread_id x
# DUMPIR: %[[MYVAL:.*]] = arith.addi %arg0, %[[TIDX]] : index
# DUMPIR: gpu.printf "GPU thread %llu has %llu\0A", %[[TIDX]], %[[MYVAL]] : index, index
# DUMPIR: gpu.terminator
diff --git a/mlir/test/Examples/NVGPU/Ch3.py b/mlir/test/Examples/NVGPU/Ch3.py
index fe11575416866..815e16dc68553 100644
--- a/mlir/test/Examples/NVGPU/Ch3.py
+++ b/mlir/test/Examples/NVGPU/Ch3.py
@@ -143,7 +143,7 @@ def gemm_tma_kernel():
# DUMPIR: %[[C64_5:.*]] = arith.constant 64 : index
# DUMPIR: %[[C64_6:.*]] = arith.constant 64 : index
# DUMPIR: %[[TMA1:.*]] = nvgpu.tma.create.descriptor %[[CAST1]] box[%[[C64_5]], %[[C64_6]]] : memref<*xf16> -> <tensor = memref<64x64xf16, 3>, swizzle = swizzle_128b, l2promo = none, oob = zero, interleave = none>
-# DUMPIR: %[[THREADID:.*]] = gpu.thread_id x
+# DUMPIR: %[[THREADID:.*]] = gpu.thread_id x
# DUMPIR: %[[MB:.*]] = nvgpu.mbarrier.create -> <memorySpace = #gpu.address_space<workgroup>>
# DUMPIR: %[[C0:.*]] = arith.constant 0 : index
# DUMPIR: %[[EQ:.*]] = arith.cmpi eq, %[[THREADID]], %[[C0]] : index
diff --git a/mlir/test/Examples/NVGPU/Ch4.py b/mlir/test/Examples/NVGPU/Ch4.py
index dffafda7f21c9..c66259d141336 100644
--- a/mlir/test/Examples/NVGPU/Ch4.py
+++ b/mlir/test/Examples/NVGPU/Ch4.py
@@ -342,14 +342,14 @@ def gemm_multistage_kernel():
# DUMPIR: %[[C6:.*]] = arith.constant 6 : index
# DUMPIR: %[[C1_PROLOGUE:.*]] = arith.constant 1 : index
# DUMPIR: scf.for %arg15 = %[[C0_PROLOGUE]] to %[[C6]] step %[[C1_PROLOGUE]] {
-# DUMPIR: %[[BID_X_P:.*]] = gpu.block_id x
-# DUMPIR: %[[BID_Y_P:.*]] = gpu.block_id y
+# DUMPIR: %[[BID_X_P:.*]] = gpu.block_id x
+# DUMPIR: %[[BID_Y_P:.*]] = gpu.block_id y
# DUMPIR: %[[C128_P1:.*]] = arith.constant 128 : index
# DUMPIR: %[[DIMX_P:.*]] = arith.muli %[[BID_X_P]], %[[C128_P1]] : index
# DUMPIR: %[[C128_P2:.*]] = arith.constant 128 : index
# DUMPIR: %[[DIMY_P:.*]] = arith.muli %[[BID_Y_P]], %[[C128_P2]] : index
-# DUMPIR: %{{.*}} = gpu.thread_id x
-# DUMPIR: %[[TID_X_P:.*]] = gpu.thread_id x
+# DUMPIR: %{{.*}} = gpu.thread_id x
+# DUMPIR: %[[TID_X_P:.*]] = gpu.thread_id x
# DUMPIR: %[[C0_P:.*]] = arith.constant 0 : index
# DUMPIR: %[[PRED_P:.*]] = arith.cmpi eq, %[[TID_X_P]], %[[C0_P]] : index
# DUMPIR: %[[C16384_P1:.*]] = arith.constant 16384 : index
@@ -376,7 +376,7 @@ def gemm_multistage_kernel():
# DUMPIR: %[[DIMY_P_OFF:.*]] = arith.addi %[[DIMY_P]], %[[C64_OFF]] : index
# DUMPIR: nvgpu.tma.async.load %{{.*}}[%[[DIMY_P_OFF]], %[[K_COORD_P]]], %{{.*}}[%arg15] to %[[VIEW_B2_P]], predicate = %[[PRED_P]] : <tensor = memref<64x64xf16, 3>, swizzle = swizzle_128b, l2promo = none, oob = zero, interleave = none>, <memorySpace = #gpu.address_space<workgroup>, num_barriers = 7> -> memref<64x64xf16, #gpu.address_space<workgroup>>
# DUMPIR: }
-# DUMPIR: %[[TID_X_LOOP:.*]] = gpu.thread_id x
+# DUMPIR: %[[TID_X_LOOP:.*]] = gpu.thread_id x
# DUMPIR: %[[ACC_INIT:.*]] = nvgpu.warpgroup.mma.init.accumulator -> <fragmented = vector<128x128xf32>>
# DUMPIR: %[[FALSE_LOOP:.*]] = arith.constant false
# DUMPIR: %[[C0_LOOP:.*]] = arith.constant 0 : index
@@ -409,14 +409,14 @@ def gemm_multistage_kernel():
# DUMPIR: %[[STAGE_NEXT_L:.*]] = arith.addi %arg15, %[[C6_STAGE]] : index
# DUMPIR: %[[C7_MOD:.*]] = arith.constant 7 : index
# DUMPIR: %[[STAGE_LOAD:.*]] = arith.remui %[[STAGE_NEXT_L]], %[[C7_MOD]] : index
-# DUMPIR: %[[BID_X_L:.*]] = gpu.block_id x
-# DUMPIR: %[[BID_Y_L:.*]] = gpu.block_id y
+# DUMPIR: %[[BID_X_L:.*]] = gpu.block_id x
+# DUMPIR: %[[BID_Y_L:.*]] = gpu.block_id y
# DUMPIR: %[[C128_L1:.*]] = arith.constant 128 : index
# DUMPIR: %[[DIMX_L:.*]] = arith.muli %[[BID_X_L]], %[[C128_L1]] : index
# DUMPIR: %[[C128_L2:.*]] = arith.constant 128 : index
# DUMPIR: %[[DIMY_L:.*]] = arith.muli %[[BID_Y_L]], %[[C128_L2]] : index
-# DUMPIR: %[[TID_X_L1:.*]] = gpu.thread_id x
-# DUMPIR: %[[TID_X_L2:.*]] = gpu.thread_id x
+# DUMPIR: %[[TID_X_L1:.*]] = gpu.thread_id x
+# DUMPIR: %[[TID_X_L2:.*]] = gpu.thread_id x
# DUMPIR: %[[C16384_LA1:.*]] = arith.constant 16384 : index
# DUMPIR: %[[OFF_A_LOAD:.*]] = arith.muli %[[STAGE_LOAD]], %[[C16384_LA1]] : index
# DUMPIR: %[[C16384_LA2:.*]] = arith.constant 16384 : index
@@ -448,9 +448,9 @@ def gemm_multistage_kernel():
# DUMPIR: scf.yield %[[ACC_L]], %[[NEW_PARITY]] : !nvgpu.warpgroup.accumulator<fragmented = vector<128x128xf32>>, i1
# DUMPIR: }
# DUMPIR: nvvm.wgmma.wait.group.sync.aligned 0
-# DUMPIR: %[[TID_X_EPI:.*]] = gpu.thread_id x
-# DUMPIR: %[[BID_X_EPI:.*]] = gpu.block_id x
-# DUMPIR: %[[BID_Y_EPI:.*]] = gpu.block_id y
+# DUMPIR: %[[TID_X_EPI:.*]] = gpu.thread_id x
+# DUMPIR: %[[BID_X_EPI:.*]] = gpu.block_id x
+# DUMPIR: %[[BID_Y_EPI:.*]] = gpu.block_id y
# DUMPIR: %[[C128_EPI1:.*]] = arith.constant 128 : index
# DUMPIR: %[[DIMX_EPI:.*]] = arith.muli %[[BID_X_EPI]], %[[C128_EPI1]] : index
# DUMPIR: %[[C128_EPI2:.*]] = arith.constant 128 : index
diff --git a/mlir/test/Examples/NVGPU/Ch5.py b/mlir/test/Examples/NVGPU/Ch5.py
index b725e50d8f44b..4f06f97142620 100644
--- a/mlir/test/Examples/NVGPU/Ch5.py
+++ b/mlir/test/Examples/NVGPU/Ch5.py
@@ -324,7 +324,7 @@ def gemm_warp_specialized_kernel():
# CHECK-NOT: Mismatched elements
# CHECK: PASS
-# DUMPIR: %[[TID_X:.*]] = gpu.thread_id x
+# DUMPIR: %[[TID_X:.*]] = gpu.thread_id x
# DUMPIR: %[[C128:.*]] = arith.constant 128 : index
# DUMPIR: %[[REM1:.*]] = arith.remui %[[TID_X]], %[[C128]] : index
# DUMPIR: %[[C0:.*]] = arith.constant 0 : index
@@ -333,7 +333,7 @@ def gemm_warp_specialized_kernel():
# DUMPIR: %[[DIV1:.*]] = arith.divui %[[TID_X]], %[[C128_1]] : index
# DUMPIR: %[[C1:.*]] = arith.constant 1 : index
# DUMPIR: %[[IS_PRODUCER:.*]] = arith.cmpi eq, %[[DIV1]], %[[C1]] : index
-# DUMPIR: %[[TID_X_2:.*]] = gpu.thread_id x
+# DUMPIR: %[[TID_X_2:.*]] = gpu.thread_id x
# DUMPIR: %[[C128_2:.*]] = arith.constant 128 : index
# DUMPIR: %[[REM2:.*]] = arith.remui %[[TID_X_2]], %[[C128_2]] : index
# DUMPIR: %[[C0_2:.*]] = arith.constant 0 : index
@@ -342,7 +342,7 @@ def gemm_warp_specialized_kernel():
# DUMPIR: %[[DIV2:.*]] = arith.divui %[[TID_X_2]], %[[C128_3]] : index
# DUMPIR: %[[C0_3:.*]] = arith.constant 0 : index
# DUMPIR: %[[IS_CONSUMER:.*]] = arith.cmpi eq, %[[DIV2]], %[[C0_3]] : index
-# DUMPIR: %[[TID_X_3:.*]] = gpu.thread_id x
+# DUMPIR: %[[TID_X_3:.*]] = gpu.thread_id x
# DUMPIR: %[[MBAR_MMA:.*]] = nvgpu.mbarrier.create -> <memorySpace = #gpu.address_space<workgroup>, num_barriers = 7>
# DUMPIR: %[[MBAR_TMA:.*]] = nvgpu.mbarrier.create -> <memorySpace = #gpu.address_space<workgroup>, num_barriers = 7>
# DUMPIR: %[[C0_4:.*]] = arith.constant 0 : index
@@ -361,7 +361,7 @@ def gemm_warp_specialized_kernel():
# DUMPIR: nvgpu.tma.prefetch.descriptor %{{.*}} : <tensor = memref<64x64xf16, 3>, swizzle = swizzle_128b, l2promo = none, oob = zero, interleave = none>
# DUMPIR: }
# DUMPIR: scf.if %[[IS_PRODUCER]] {
-# DUMPIR: nvvm.setmaxregister decrease 40
+# DUMPIR: nvvm.setmaxregister decrease 40
# DUMPIR: %[[TRUE:.*]] = arith.constant true
# DUMPIR: %[[C0_PROD:.*]] = arith.constant 0 : index
# DUMPIR: %[[C16:.*]] = arith.constant 16 : index
@@ -376,13 +376,13 @@ def gemm_warp_specialized_kernel():
# DUMPIR: %[[TRUE_2:.*]] = arith.constant true
# DUMPIR: %[[FLIP:.*]] = arith.xori %arg16, %[[TRUE_2]] : i1
# DUMPIR: %[[PHASE:.*]] = arith.select %[[IS_LAST]], %[[FLIP]], %arg16 : i1
-# DUMPIR: %[[BID_X:.*]] = gpu.block_id x
-# DUMPIR: %[[BID_Y:.*]] = gpu.block_id y
+# DUMPIR: %[[BID_X:.*]] = gpu.block_id x
+# DUMPIR: %[[BID_Y:.*]] = gpu.block_id y
# DUMPIR: %[[C128_TILE:.*]] = arith.constant 128 : index
# DUMPIR: %[[DIM_X:.*]] = arith.muli %[[BID_X]], %[[C128_TILE]] : index
# DUMPIR: %[[C128_TILE_2:.*]] = arith.constant 128 : index
# DUMPIR: %[[DIM_Y:.*]] = arith.muli %[[BID_Y]], %[[C128_TILE_2]] : index
-# DUMPIR: %[[TID_PROD:.*]] = gpu.thread_id x
+# DUMPIR: %[[TID_PROD:.*]] = gpu.thread_id x
# DUMPIR: %[[C16384:.*]] = arith.constant 16384 : index
# DUMPIR: %[[OFF_A:.*]] = arith.muli %[[SLOT]], %[[C16384]] : index
# DUMPIR: %[[C16384_2:.*]] = arith.constant 16384 : index
@@ -414,7 +414,7 @@ def gemm_warp_specialized_kernel():
# DUMPIR: }
# DUMPIR: }
# DUMPIR: scf.if %[[IS_CONSUMER]] {
-# DUMPIR: nvvm.setmaxregister increase 232
+# DUMPIR: nvvm.setmaxregister increase 232
# DUMPIR: %[[FALSE:.*]] = arith.constant false
# DUMPIR: %[[ACC_INIT:.*]] = nvgpu.warpgroup.mma.init.accumulator -> <fragmented = vector<128x128xf32>>
# DUMPIR: %[[C0_CONS:.*]] = arith.constant 0 : index
@@ -456,9 +456,9 @@ def gemm_warp_specialized_kernel():
# DUMPIR: scf.yield %[[ACC]], %[[PHASE_CONS]] : !nvgpu.warpgroup.accumulator<fragmented = vector<128x128xf32>>, i1
# DUMPIR: }
# DUMPIR: nvvm.wgmma.wait.group.sync.aligned 0
-# DUMPIR: %[[TID_EPI:.*]] = gpu.thread_id x
-# DUMPIR: %[[BID_X_EPI:.*]] = gpu.block_id x
-# DUMPIR: %[[BID_Y_EPI:.*]] = gpu.block_id y
+# DUMPIR: %[[TID_EPI:.*]] = gpu.thread_id x
+# DUMPIR: %[[BID_X_EPI:.*]] = gpu.block_id x
+# DUMPIR: %[[BID_Y_EPI:.*]] = gpu.block_id y
# DUMPIR: %[[C128_EPI:.*]] = arith.constant 128 : index
# DUMPIR: %[[DIM_X_EPI:.*]] = arith.muli %[[BID_X_EPI]], %[[C128_EPI]] : index
# DUMPIR: %[[C128_EPI_2:.*]] = arith.constant 128 : index
``````````
</details>
https://github.com/llvm/llvm-project/pull/185327
More information about the Mlir-commits
mailing list