[Mlir-commits] [mlir] [MLIR][GPU] Add support for non-portable cluster size attribute (PR #95545)
Guray Ozen
llvmlistbot at llvm.org
Fri Jun 14 06:49:29 PDT 2024
================
@@ -0,0 +1,124 @@
+// RUN: mlir-opt %s \
+// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-chip=sm_90a cubin-features=+ptx80 opt-level=3" \
+// RUN: | mlir-cpu-runner \
+// RUN: --shared-libs=%mlir_cuda_runtime \
+// RUN: --shared-libs=%mlir_runner_utils \
+// RUN: --shared-libs=%mlir_c_runner_utils \
+// RUN: --entry-point-result=void \
+// RUN: | FileCheck %s
+
+// CHECK: clusterIdx: (3, 3, 0) in Cluster Dimension: (4, 4, 1) blockIdx: (15, 15, 0)
+// CHECK: clusterIdx: (3, 3, 0) in Cluster Dimension: (4, 4, 1) blockIdx: (15, 15, 0)
+
+module attributes {gpu.container_module} {
+gpu.module @gpumodule {
+ gpu.func @kernel_cluster() kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 2, 2, 1>} {
+ %cidX = gpu.cluster_id x
+ %cidY = gpu.cluster_id y
+ %cidZ = gpu.cluster_id z
+ %cdimX = gpu.cluster_dim_blocks x
+ %cdimY = gpu.cluster_dim_blocks y
+ %cdimZ = gpu.cluster_dim_blocks z
+ %bidX = gpu.block_id x
+ %bidY = gpu.block_id y
+ %bidZ = gpu.block_id z
+ %cidX_i32 = index.casts %cidX : index to i32
+ %cidY_i32 = index.casts %cidY : index to i32
+ %cidZ_i32 = index.casts %cidZ : index to i32
+ %cdimX_i32 = index.casts %cdimX : index to i32
+ %cdimY_i32 = index.casts %cdimY : index to i32
+ %cdimZ_i32 = index.casts %cdimZ : index to i32
+ %bidX_i32 = index.casts %bidX : index to i32
+ %bidY_i32 = index.casts %bidY : index to i32
+ %bidZ_i32 = index.casts %bidZ : index to i32
+
+ %c_1 = arith.constant -1 : index
+ %cBlocksX = gpu.grid_dim x
+ %cN_1 = arith.addi %cBlocksX, %c_1 : index
+ %cnd1 = arith.cmpi eq, %bidX, %cN_1 : index
+ %cnd2 = arith.cmpi eq, %bidY, %cN_1 : index
+ scf.if %cnd1 {
+ scf.if %cnd2 {
+ gpu.printf "clusterIdx: (%d, %d, %d) in Cluster Dimension: (%d, %d, %d) blockIdx: (%d, %d, %d) \n"
+ %cidX_i32,
+ %cidY_i32,
+ %cidZ_i32,
+ %cdimX_i32,
+ %cdimY_i32,
+ %cdimZ_i32,
+ %bidX_i32,
+ %bidY_i32,
+ %bidZ_i32
+ :
+ i32, i32, i32, i32, i32, i32, i32, i32, i32
+ }
+ }
+ gpu.return
+ }
+}
+
+func.func @main() {
+ %cDimX = arith.constant 4 : index
+ %cDimY = arith.constant 4 : index
+ %cDimZ = arith.constant 1 : index
+ %gDimX = arith.constant 16 : index
+ %gDimY = arith.constant 16 : index
+ %gDimZ = arith.constant 1 : index
+ %bDimX = arith.constant 1 : index
+ %bDimY = arith.constant 1 : index
+ %bDimZ = arith.constant 1 : index
+
+ gpu.launch clusters(%cx, %cy, %cz) in (%cluster_x = %cDimX, %cluster_y = %cDimY,
----------------
grypp wrote:
Can we actually test with cluster more than 8?
https://github.com/llvm/llvm-project/pull/95545
More information about the Mlir-commits
mailing list