[Mlir-commits] [mlir] [mlir][python] Add `cluster_size` to `gpu.launch_func` python binding (PR #177811)

Sat Jan 24 17:13:04 PST 2026

https://github.com/Hardcode84 created https://github.com/llvm/llvm-project/pull/177811

None

>From cf8e1b90ed7de1b181376b3c5c85b19e4f1deb4e Mon Sep 17 00:00:00 2001
From: Ivan Butygin <ivan.butygin at gmail.com>
Date: Sun, 25 Jan 2026 02:02:38 +0100
Subject: [PATCH] [mlir][python] Add cluster_site to launch_func python binding

---
 mlir/python/mlir/dialects/gpu/__init__.py | 8 ++++++++
 mlir/test/python/dialects/gpu/dialect.py  | 7 ++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/mlir/python/mlir/dialects/gpu/__init__.py b/mlir/python/mlir/dialects/gpu/__init__.py
index d15643ca700e4..2259951293ef6 100644
--- a/mlir/python/mlir/dialects/gpu/__init__.py
+++ b/mlir/python/mlir/dialects/gpu/__init__.py
@@ -186,6 +186,7 @@ def __init__(
         async_dependencies: Optional[List[Value]] = None,
         dynamic_shared_memory_size: Optional[Value] = None,
         async_object=None,
+        cluster_size: Optional[Tuple[Any, Any, Any]] = None,
         *,
         loc=None,
         ip=None,
@@ -202,6 +203,8 @@ def __init__(
         block_size_x, block_size_y, block_size_z = map(
             _convert_literal_to_constant, block_size
         )
+        cluster_size_x, cluster_size_y, cluster_size_z = map(
+            _convert_literal_to_constant, cluster_size) if cluster_size else (None, None, None)
 
         super().__init__(
             async_token,
@@ -214,6 +217,9 @@ def __init__(
             block_size_y,
             block_size_z,
             kernel_operands,
+            clusterSizeX=cluster_size_x,
+            clusterSizeY=cluster_size_y,
+            clusterSizeZ=cluster_size_z,
             dynamicSharedMemorySize=dynamic_shared_memory_size,
             asyncObject=async_object,
             loc=loc,
@@ -229,6 +235,7 @@ def launch_func(
     async_dependencies: Optional[List[Value]] = None,
     dynamic_shared_memory_size: Optional[Value] = None,
     async_object=None,
+    cluster_size: Optional[Tuple[Any, Any, Any]] = None,
     *,
     loc=None,
     ip=None,
@@ -241,6 +248,7 @@ def launch_func(
         async_dependencies=async_dependencies,
         dynamic_shared_memory_size=dynamic_shared_memory_size,
         async_object=async_object,
+        cluster_size=cluster_size,
         loc=loc,
         ip=ip,
     )
diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py
index 1a009b7dfa30d..331993ee18821 100644
--- a/mlir/test/python/dialects/gpu/dialect.py
+++ b/mlir/test/python/dialects/gpu/dialect.py
@@ -187,6 +187,7 @@ def testGPULaunchFuncOp():
         c1 = arith.constant(T.index(), 1)
         grid_sizes = (1, 1, 1)
         block_sizes = (1, 1, 1)
+        cluster_sizes = (1, 1, 1)
         token = gpu.wait()
         token = gpu.launch_func(
             async_dependencies=[token],
@@ -194,6 +195,7 @@ def testGPULaunchFuncOp():
             grid_size=grid_sizes,
             block_size=block_sizes,
             kernel_operands=[],
+            cluster_size=cluster_sizes,
         )
         gpu.wait(async_dependencies=[token])
         func.ReturnOp([])
@@ -215,7 +217,10 @@ def testGPULaunchFuncOp():
     # CHECK:           %[[CONSTANT_4:.*]] = arith.constant 1 : index
     # CHECK:           %[[CONSTANT_5:.*]] = arith.constant 1 : index
     # CHECK:           %[[CONSTANT_6:.*]] = arith.constant 1 : index
-    # CHECK:           %[[LAUNCH_FUNC_0:.*]] = gpu.launch_func async {{\[}}%[[WAIT_0]]] @gpu_module::@kernel blocks in (%[[CONSTANT_1]], %[[CONSTANT_2]], %[[CONSTANT_3]]) threads in (%[[CONSTANT_4]], %[[CONSTANT_5]], %[[CONSTANT_6]])
+    # CHECK:           %[[CONSTANT_7:.*]] = arith.constant 1 : index
+    # CHECK:           %[[CONSTANT_8:.*]] = arith.constant 1 : index
+    # CHECK:           %[[CONSTANT_9:.*]] = arith.constant 1 : index
+    # CHECK:           %[[LAUNCH_FUNC_0:.*]] = gpu.launch_func async {{\[}}%[[WAIT_0]]] @gpu_module::@kernel clusters in (%[[CONSTANT_7]], %[[CONSTANT_8]], %[[CONSTANT_9]]) blocks in (%[[CONSTANT_1]], %[[CONSTANT_2]], %[[CONSTANT_3]]) threads in (%[[CONSTANT_4]], %[[CONSTANT_5]], %[[CONSTANT_6]])
     # CHECK:           %[[WAIT_1:.*]] = gpu.wait async {{\[}}%[[LAUNCH_FUNC_0]]]
     # CHECK:           return
     # CHECK:         }