[Mlir-commits] [mlir] [MLIR][GPU] Fix gpu.printf (PR #121940)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Jan 7 06:21:29 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-gpu
@llvm/pr-subscribers-mlir
Author: Guray Ozen (grypp)
<details>
<summary>Changes</summary>
Following code fails with the error below. It looks like a parser error.
```
test_printf.mlir:5:9: error: expected ':'
%135 = arith.cmpi slt, %arg1, %c127_i32 : i32
```
```
func.func @<!-- -->test(%133 : i32, %arg1: i32, %c127_i32:i32) {
%134 = llvm.bitcast %133 : i32 to f32
gpu.printf "]"
%135 = arith.cmpi slt, %arg1, %c127_i32 : i32
scf.if %135 {
gpu.printf ", "
}
....
}
```
This PR attempts to fix that by changing the assembly format of `printf`
---
Full diff: https://github.com/llvm/llvm-project/pull/121940.diff
17 Files Affected:
- (modified) mlir/include/mlir/Dialect/GPU/IR/GPUOps.td (+1-1)
- (modified) mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir (+1-1)
- (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir (+1-1)
- (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir (+1-1)
- (modified) mlir/test/Conversion/GPUToSPIRV/printf.mlir (+1-1)
- (modified) mlir/test/Dialect/GPU/indirect-device-func-call.mlir (+1-1)
- (modified) mlir/test/Dialect/GPU/ops.mlir (+15-2)
- (modified) mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir (+1-1)
- (added) mlir/test/Dialect/GPU/test_printf.mlir (+12)
- (modified) mlir/test/Integration/GPU/CUDA/assert.mlir (+2-2)
- (modified) mlir/test/Integration/GPU/CUDA/printf.mlir (+1-1)
- (modified) mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir (+1-1)
- (modified) mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir (+5-5)
- (modified) mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir (+3-3)
- (modified) mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir (+4-4)
- (modified) mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir (+2-2)
- (modified) mlir/test/Integration/GPU/ROCM/printf.mlir (+1-1)
``````````diff
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 42a017db300af6..3adfd5f4f2c436 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -1055,7 +1055,7 @@ def GPU_PrintfOp : GPU_Op<"printf", [MemoryEffects<[MemWrite]>]>,
imposed by one's target platform.
}];
let assemblyFormat = [{
- $format attr-dict ($args^ `:` type($args))?
+ $format attr-dict (`,` $args^ `:` type($args))?
}];
}
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index 318f0f78efa5b7..f52dd6c0d0ce30 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -633,7 +633,7 @@ gpu.module @test_module_29 {
// CHECK-NEXT: %[[EL1:.*]] = llvm.getelementptr %[[ALLOC]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(i32, f64)>
// CHECK-NEXT: llvm.store %[[EXT]], %[[EL1]] : f64, !llvm.ptr
// CHECK-NEXT: llvm.call @vprintf(%[[FORMATSTART]], %[[ALLOC]]) : (!llvm.ptr, !llvm.ptr) -> i32
- gpu.printf "Hello: %d\n" %arg0, %arg1 : i32, f32
+ gpu.printf "Hello: %d\n", %arg0, %arg1 : i32, f32
gpu.return
}
}
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir
index 1b904fa142bad3..2dc6a5ab2a86ce 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir
@@ -36,7 +36,7 @@ gpu.module @test_module {
// CHECK-NEXT: %[[NARGS1:.*]] = llvm.mlir.constant(1 : i32) : i32
// CHECK-NEXT: %[[ARG0_64:.*]] = llvm.zext %[[ARG0]] : i32 to i64
// CHECK-NEXT: %{{.*}} = llvm.call @__ockl_printf_append_args(%[[DESC1]], %[[NARGS1]], %[[ARG0_64]], %[[CST0]], %[[CST0]], %[[CST0]], %[[CST0]], %[[CST0]], %[[CST0]], %[[ISLAST]]) : (i64, i32, i64, i64, i64, i64, i64, i64, i64, i32) -> i64
- gpu.printf "Hello: %d\n" %arg0 : i32
+ gpu.printf "Hello: %d\n", %arg0 : i32
gpu.return
}
}
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir
index 870f5c5016ecef..00d1d7d8526809 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir
@@ -9,7 +9,7 @@ gpu.module @test_module {
// CHECK: %[[IMM0:.*]] = llvm.mlir.addressof @[[$PRINT_GLOBAL]] : !llvm.ptr<4>
// CHECK-NEXT: %[[IMM2:.*]] = llvm.getelementptr %[[IMM0]][0, 0] : (!llvm.ptr<4>) -> !llvm.ptr<4>, !llvm.array<11 x i8>
// CHECK-NEXT: %{{.*}} = llvm.call @printf(%[[IMM2]], %[[ARG0]]) vararg(!llvm.func<i32 (ptr<4>, ...)>) : (!llvm.ptr<4>, i32) -> i32
- gpu.printf "Hello: %d\n" %arg0 : i32
+ gpu.printf "Hello: %d\n", %arg0 : i32
gpu.return
}
}
diff --git a/mlir/test/Conversion/GPUToSPIRV/printf.mlir b/mlir/test/Conversion/GPUToSPIRV/printf.mlir
index bc091124ea4c6f..7fe9752b088dba 100644
--- a/mlir/test/Conversion/GPUToSPIRV/printf.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/printf.mlir
@@ -62,7 +62,7 @@ module attributes {
// CHECK: [[FMTSTR_ADDR:%.*]] = spirv.mlir.addressof [[PRINTMSG]] : !spirv.ptr<!spirv.array<[[ARRAYSIZE]] x i8>, UniformConstant>
// CHECK-NEXT: [[FMTSTR_PTR1:%.*]] = spirv.Bitcast [[FMTSTR_ADDR]] : !spirv.ptr<!spirv.array<[[ARRAYSIZE]] x i8>, UniformConstant> to !spirv.ptr<i8, UniformConstant>
// CHECK-NEXT: {{%.*}} = spirv.CL.printf [[FMTSTR_PTR1]] {{%.*}}, {{%.*}}, {{%.*}} : !spirv.ptr<i8, UniformConstant>, i32, f32, i32 -> i32
- gpu.printf "\nHello, world : %d %f \n Thread id: %d\n" %arg0, %arg1, %2: i32, f32, index
+ gpu.printf "\nHello, world : %d %f \n Thread id: %d\n", %arg0, %arg1, %2: i32, f32, index
// CHECK: spirv.Return
gpu.return
diff --git a/mlir/test/Dialect/GPU/indirect-device-func-call.mlir b/mlir/test/Dialect/GPU/indirect-device-func-call.mlir
index 91d7f1cd6c67d9..85805da3ac10e1 100644
--- a/mlir/test/Dialect/GPU/indirect-device-func-call.mlir
+++ b/mlir/test/Dialect/GPU/indirect-device-func-call.mlir
@@ -6,7 +6,7 @@ gpu.module @kernels {
func.func @hello(%arg0 : f32) {
%tid_x = gpu.thread_id x
%csti8 = arith.constant 2 : i8
- gpu.printf "Hello from %lld, %d, %f\n" %tid_x, %csti8, %arg0 : index, i8, f32
+ gpu.printf "Hello from %lld, %d, %f\n", %tid_x, %csti8, %arg0 : index, i8, f32
return
}
// CHECK-LABEL: @hello_indirect
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index c0ff2044b76c40..99915c493ea465 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -229,9 +229,22 @@ module attributes {gpu.container_module} {
// CHECK-LABEL: gpu.func @printf_test
// CHECK: (%[[ARG0:.*]]: i32)
- // CHECK: gpu.printf "Value: %d" %[[ARG0]] : i32
+ // CHECK: gpu.printf "Value: %d", %[[ARG0]] : i32
gpu.func @printf_test(%arg0 : i32) {
- gpu.printf "Value: %d" %arg0 : i32
+ gpu.printf "Value: %d", %arg0 : i32
+ gpu.return
+ }
+
+ // CHECK-LABEL: gpu.func @printf_empty
+ // CHECK: gpu.printf "]"
+ // CHECK: scf.if
+ // CHECK: gpu.printf ", "
+ gpu.func @printf_empty(%arg0 : i32) {
+ gpu.printf "]"
+ %1 = arith.cmpi slt, %arg0, %arg0 : i32
+ scf.if %1 {
+ gpu.printf ", "
+ }
gpu.return
}
diff --git a/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir b/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir
index 732f40c4333df2..f02b26dba97d59 100644
--- a/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir
+++ b/mlir/test/Dialect/GPU/test-nvvm-pipeline.mlir
@@ -23,7 +23,7 @@ func.func @test_math(%arg0 : f32) {
threads(%6, %7, %8) in (%9 = %c2, %10 = %c1, %11 = %c1) {
// CHECK-NVVM: __nv_expf
%s1 = math.exp %arg0 : f32
- gpu.printf "%f" %s1 : f32
+ gpu.printf "%f", %s1 : f32
gpu.terminator
}
return
diff --git a/mlir/test/Dialect/GPU/test_printf.mlir b/mlir/test/Dialect/GPU/test_printf.mlir
new file mode 100644
index 00000000000000..2a332530355d48
--- /dev/null
+++ b/mlir/test/Dialect/GPU/test_printf.mlir
@@ -0,0 +1,12 @@
+func.func @gemm_no_scf_sm100_1cta(%133 : i32, %arg1: i32, %c127_i32:i32) {
+ %134 = llvm.bitcast %133 : i32 to f32
+ gpu.printf "]"
+ %135 = arith.cmpi slt, %arg1, %c127_i32 : i32
+ scf.if %135 {
+ gpu.printf ", "
+ }
+
+ %0 = gpu.thread_id x
+ gpu.printf "Hello from %d\n", %0 : index
+ func.return
+}
diff --git a/mlir/test/Integration/GPU/CUDA/assert.mlir b/mlir/test/Integration/GPU/CUDA/assert.mlir
index 06a9c1ca0d114b..3d6527fe59b2c1 100644
--- a/mlir/test/Integration/GPU/CUDA/assert.mlir
+++ b/mlir/test/Integration/GPU/CUDA/assert.mlir
@@ -16,10 +16,10 @@ gpu.module @kernels {
gpu.func @test_assert(%c0: i1, %c1: i1) kernel {
%0 = gpu.thread_id x
cf.assert %c1, "passing assertion"
- gpu.printf "thread %lld: print after passing assertion\n" %0 : index
+ gpu.printf "thread %lld: print after passing assertion\n", %0 : index
// Test callsite(callsite(name)) location.
cf.assert %c0, "failing assertion" loc(callsite(callsite("callee_func_name"("callee_file.cc":7:9) at "caller_file.cc":10:8) at "caller2_file.cc":11:12))
- gpu.printf "thread %lld: print after failing assertion\n" %0 : index
+ gpu.printf "thread %lld: print after failing assertion\n", %0 : index
gpu.return
}
}
diff --git a/mlir/test/Integration/GPU/CUDA/printf.mlir b/mlir/test/Integration/GPU/CUDA/printf.mlir
index 99ea1208e9c5e7..15b0bf02d911a5 100644
--- a/mlir/test/Integration/GPU/CUDA/printf.mlir
+++ b/mlir/test/Integration/GPU/CUDA/printf.mlir
@@ -14,7 +14,7 @@ module attributes {gpu.container_module} {
%0 = gpu.thread_id x
%csti8 = arith.constant 2 : i8
%cstf32 = arith.constant 3.0 : f32
- gpu.printf "Hello from %lld, %d, %f\n" %0, %csti8, %cstf32 : index, i8, f32
+ gpu.printf "Hello from %lld, %d, %f\n", %0, %csti8, %cstf32 : index, i8, f32
gpu.return
}
}
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir b/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir
index c70c940564a264..a22a34b9393a3f 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir
@@ -43,7 +43,7 @@ module attributes {gpu.container_module} {
%cnd2 = arith.cmpi eq, %bidY, %c3 : index
scf.if %cnd1 {
scf.if %cnd2 {
- gpu.printf "clusterIdx: (%d, %d, %d) in Cluster Dimension: (%d, %d, %d) blockIdx: (%d, %d, %d) \n"
+ gpu.printf "clusterIdx: (%d, %d, %d) in Cluster Dimension: (%d, %d, %d) blockIdx: (%d, %d, %d) \n",
%cidX_i32,
%cidY_i32,
%cidZ_i32,
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir
index b50772f8249fb7..95bde40deb48ee 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_128x64_swizzle128b.mlir
@@ -85,7 +85,7 @@ module @mymod {
// Step 7. First thread does TMA load
scf.if %10 {
- gpu.printf "[GPU] TMA SIZE %d\0A" %c8192 : index
+ gpu.printf "[GPU] TMA SIZE %d\0A", %c8192 : index
nvgpu.tma.async.load %3[%c0, %c0], %9[%c0] to %7 : !lhsTensorMap, !barrierType -> !shmemlhs
nvgpu.mbarrier.arrive.expect_tx %9[%c0], %c8192 : !barrierType
} else {
@@ -98,16 +98,16 @@ module @mymod {
// Step 9. Print loaded data in 128b swizzled
scf.if %10 {
- gpu.printf "===--- Matrix A ---=== %d \0A" %c-1_i32 : i32
+ gpu.printf "===--- Matrix A ---=== %d \0A", %c-1_i32 : i32
scf.for %arg12 = %c0 to %c128 step %c1 {
scf.for %arg13 = %c0 to %c64 step %c1 {
%15 = memref.load %7[%arg12, %arg13] : !shmemlhs
%16 = arith.extf %15 : f16 to f32
- gpu.printf "%.0f, " %16 : f32
+ gpu.printf "%.0f, ", %16 : f32
}
- gpu.printf "%d\0A" %c-1_i32 : i32
+ gpu.printf "%d\0A", %c-1_i32 : i32
}
- gpu.printf "===----------------=== %d \0A" %c-1_i32 : i32
+ gpu.printf "===----------------=== %d \0A", %c-1_i32 : i32
}
gpu.terminator
}
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir
index 65e5fc0aff6aa3..fce16f3df23686 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x64_swizzle128b.mlir
@@ -109,7 +109,7 @@ module @mymod {
// Step 6. First thread does TMA load
scf.if %10 {
- gpu.printf "[GPU] TMA SIZE %d\0A" %c32768 : index
+ gpu.printf "[GPU] TMA SIZE %d\0A", %c32768 : index
nvgpu.tma.async.load %d_lhsTensorMap[%c0, %c0], %9[%c0] to %lhsShmem : !lhsTensorMap, !barrierType -> !shmemlhs
nvgpu.tma.async.load %d_rhsTensorMap[%c0, %c0], %9[%c0] to %rhsShmem1 : !rhsTensorMap, !barrierType -> memref<64x64xf16, strided<[128, 1]>, 3>
nvgpu.tma.async.load %d_rhsTensorMap[%c64, %c0], %9[%c0] to %rhsShmem2 : !rhsTensorMap, !barrierType -> memref<64x64xf16, strided<[128, 1], offset: 4096>, 3>
@@ -124,7 +124,7 @@ module @mymod {
// Step 8. Print loaded data in 128b swizzled
scf.if %10 {
- gpu.printf "===--- Matrix B ---=== %d \n" %c-1_i32 : i32
+ gpu.printf "===--- Matrix B ---=== %d \n", %c-1_i32 : i32
scf.for %ii = %c0 to %c64 step %c1 {
scf.for %j = %c0 to %c128 step %c1 {
%lhs0 = memref.load %rhsShmem[%ii, %j] : !shmemrhs
@@ -133,7 +133,7 @@ module @mymod {
}
gpu.printf "%d\n" %c-1_i32 : i32
}
- gpu.printf "===----------------=== %d \n" %c-1_i32 : i32
+ gpu.printf "===----------------=== %d \n", %c-1_i32 : i32
}
gpu.barrier
gpu.terminator
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir
index 391fda82e1e197..acca9811f5702e 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/tma_load_64x8_8x128_noswizzle.mlir
@@ -80,8 +80,8 @@ module @mymod {
nvgpu.mbarrier.arrive.expect_tx %9[%c0], %c6144 : <memorySpace = #gpu.address_space<workgroup>>
%11 = memref.load %7[%c0, %c0] : memref<64x8xf32, 3>
%12 = memref.load %8[%c0, %c0] : memref<8x128xf32, 3>
- gpu.printf "[GPU] TMA BEFORE lhs[45][7] %f\0A" %11 : f32
- gpu.printf "[GPU] TMA BEFORE rhs[7][0] %f\0A" %12 : f32
+ gpu.printf "[GPU] TMA BEFORE lhs[45][7] %f\0A", %11 : f32
+ gpu.printf "[GPU] TMA BEFORE rhs[7][0] %f\0A", %12 : f32
nvgpu.tma.async.load %3[%c0, %c0], %9[%c0] to %7 : <tensor = memref<64x8xf32, 3>, swizzle = none, l2promo = none, oob = zero, interleave = none>, <memorySpace = #gpu.address_space<workgroup>> -> memref<64x8xf32, 3>
nvgpu.tma.async.load %4[%c0, %c0], %9[%c0] to %8 : <tensor = memref<8x128xf32, 3>, swizzle = none, l2promo = none, oob = zero, interleave = none>, <memorySpace = #gpu.address_space<workgroup>> -> memref<8x128xf32, 3>
} else {
@@ -92,8 +92,8 @@ module @mymod {
scf.if %10 {
%11 = memref.load %7[%c45, %c7] : memref<64x8xf32, 3>
%12 = memref.load %8[%c7, %c0] : memref<8x128xf32, 3>
- gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A" %11 : f32
- gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A" %12 : f32
+ gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A", %11 : f32
+ gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A", %12 : f32
}
gpu.terminator
}
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir b/mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir
index f83f65bb2963ca..fe6c645357ecb3 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir
+++ b/mlir/test/Integration/GPU/CUDA/sm90/transform-dialect/tma_load_64x8_8x128_noswizzle-transform.mlir
@@ -96,8 +96,8 @@ func.func @main() {
scf.if %10 {
%11 = memref.load %out[%c45, %c7] : memref<64x8xf32, 3>
%12 = memref.load %out_1[%c7, %c0] : memref<8x128xf32, 3>
- gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A" %11 : f32
- gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A" %12 : f32
+ gpu.printf "[GPU] TMA LOADED lhs[45][7] %f\0A", %11 : f32
+ gpu.printf "[GPU] TMA LOADED rhs[7][0] %f\0A", %12 : f32
}
gpu.terminator
}
diff --git a/mlir/test/Integration/GPU/ROCM/printf.mlir b/mlir/test/Integration/GPU/ROCM/printf.mlir
index d5e6e3757540b2..4a0e4d34bfab5e 100644
--- a/mlir/test/Integration/GPU/ROCM/printf.mlir
+++ b/mlir/test/Integration/GPU/ROCM/printf.mlir
@@ -13,7 +13,7 @@ module attributes {gpu.container_module} {
gpu.module @kernels {
gpu.func @hello() kernel {
%0 = gpu.thread_id x
- gpu.printf "Hello from %d\n" %0 : index
+ gpu.printf "Hello from %d\n", %0 : index
gpu.return
}
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/121940
More information about the Mlir-commits
mailing list