[Mlir-commits] [mlir] [MLIR][NVVM] Add clusterlaunchcontrol Ops (PR #156585)
Srinivasa Ravi
llvmlistbot at llvm.org
Tue Sep 2 22:06:37 PDT 2025
https://github.com/Wolfram70 created https://github.com/llvm/llvm-project/pull/156585
This change adds the `clusterlaunchcontrol.try.cancel` and `clusterlaunchcontrol.query.cancel` Ops to the NVVM dialect.
Tests are added in `clusterlaunchcontrol.mlir`.
PTX Reference: https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-clusterlaunchcontrol-try-cancel
>From d2a57c3b43d1accf3a8595cc5bd95e4562874ea0 Mon Sep 17 00:00:00 2001
From: Srinivasa Ravi <srinivasar at nvidia.com>
Date: Mon, 1 Sep 2025 19:03:00 +0530
Subject: [PATCH] [MLIR][NVVM] Add clusterlaunchcontrol Ops
This change adds the `clusterlaunchcontrol.try.cancel` and
`clusterlaunchcontrol.query.cancel` Ops to the NVVM dialect.
Tests are added in `clusterlaunchcontrol.mlir`.
PTX Reference: https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-clusterlaunchcontrol-try-cancel
---
mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 113 ++++++++++++++++++
mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp | 64 ++++++++++
.../LLVMIR/nvvm/clusterlaunchcontrol.mlir | 64 ++++++++++
mlir/test/Target/LLVMIR/nvvmir-invalid.mlir | 16 +++
4 files changed, 257 insertions(+)
create mode 100644 mlir/test/Target/LLVMIR/nvvm/clusterlaunchcontrol.mlir
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 8537c7030aa8f..511aa495ad757 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -4035,6 +4035,119 @@ def NVVM_DotAccumulate2WayOp : NVVM_Op<"dot.accumulate.2way"> {
}];
}
+//===----------------------------------------------------------------------===//
+// NVVM clusterlaunchcontrol Ops.
+//===----------------------------------------------------------------------===//
+
+def NVVM_ClusterLaunchControlTryCancelOp
+ : NVVM_Op<"clusterlaunchcontrol.try.cancel", [NVVMRequiresSM<100>]> {
+ let summary = "Request atomically canceling the launch of a cluster that has not started running yet";
+ let description = [{
+ `clusterlaunchcontrol.try.cancel` requests atomically canceling the launch
+ of a cluster that has not started running yet. It asynchronously writes an
+ opaque response to shared memory indicating whether the operation succeeded
+ or failed.
+
+ Operand `addr` specifies the naturally aligned address of the 16-byte wide
+ shared memory location where the request's response is written.
+
+ Operand `mbar` specifies the mbarrier object used to track the completion
+ of the asynchronous operation.
+
+ If `multicast` is specified, the response is asynchronously written to the
+ corresponding local shared memory location (specifed by `addr`) of each CTA
+ in the requesting cluster.
+
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-clusterlaunchcontrol-try-cancel)
+ }];
+
+ let arguments = (ins UnitAttr:$multicast,
+ LLVM_PointerShared: $addr,
+ LLVM_PointerShared: $mbar);
+
+ let assemblyFormat = "(`multicast` $multicast^ `,`)? $addr `,` $mbar attr-dict";
+
+ let extraClassDeclaration = [{
+ static mlir::NVVM::IDArgPair
+ getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase &builder);
+ }];
+
+ string llvmBuilder = [{
+ auto [id, args] =
+ NVVM::ClusterLaunchControlTryCancelOp::getIntrinsicIDAndArgs(
+ *op, moduleTranslation, builder);
+ createIntrinsicCall(builder, id, args);
+ }];
+}
+
+def ClusterLaunchControlIsCanceled
+ : I32EnumCase<"IS_CANCELED", 0, "is_canceled">;
+def ClusterLaunchControlGetFirstCTAIDX
+ : I32EnumCase<"GET_FIRST_CTA_ID_X", 1, "get_first_cta_id_x">;
+def ClusterLaunchControlGetFirstCTAIDY
+ : I32EnumCase<"GET_FIRST_CTA_ID_Y", 2, "get_first_cta_id_y">;
+def ClusterLaunchControlGetFirstCTAIDZ
+ : I32EnumCase<"GET_FIRST_CTA_ID_Z", 3, "get_first_cta_id_z">;
+
+def ClusterLaunchControlQueryType
+ : I32Enum<"ClusterLaunchControlQueryType",
+ "NVVM ClusterLaunchControlQueryType",
+ [ClusterLaunchControlIsCanceled, ClusterLaunchControlGetFirstCTAIDX,
+ ClusterLaunchControlGetFirstCTAIDY, ClusterLaunchControlGetFirstCTAIDZ]> {
+ let cppNamespace = "::mlir::NVVM";
+}
+
+def ClusterLaunchControlQueryTypeAttr
+ : EnumAttr<NVVM_Dialect,
+ ClusterLaunchControlQueryType, "cluster_launch_control_query_type"> {
+ let assemblyFormat = "$value";
+}
+
+def NVVM_ClusterLaunchControlQueryCancelOp
+ : NVVM_Op<"clusterlaunchcontrol.query.cancel", [NVVMRequiresSM<100>]> {
+ let summary = "Query the response of a clusterlaunchcontrol.try.cancel operation";
+ let description = [{
+ `clusterlaunchcontrol.query.cancel` queries the response of a
+ `clusterlaunchcontrol.try.cancel` operation.
+
+ Operand `try_cancel_response` specifies the response of the
+ `clusterlaunchcontrol.try.cancel` operation to be queried.
+
+ Operand `query_type` specifies the type of query to perform and can be one
+ of the following:
+ - `is_canceled` : Returns true if the try cancel request succeeded,
+ otherwise returns false.
+ - `get_first_cta_id_{x/y/z}` : Behaviour is defined only if the try cancel
+ request succeeded. Returns the x, y, or z coordinate of the first CTA in
+ the canceled cluster.
+
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-clusterlaunchcontrol-query-cancel)
+ }];
+
+ let arguments = (ins DefaultValuedAttr<ClusterLaunchControlQueryTypeAttr,
+ "ClusterLaunchControlQueryType::IS_CANCELED">:$query_type,
+ I128:$try_cancel_response);
+ let results = (outs AnyTypeOf<[I1, I32]>:$res);
+
+ let assemblyFormat = "(`query` `=` $query_type^ `,`)? $try_cancel_response attr-dict `:` type($res)";
+
+ let hasVerifier = 1;
+
+ let extraClassDeclaration = [{
+ static mlir::NVVM::IDArgPair
+ getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase &builder);
+ }];
+
+ string llvmBuilder = [{
+ auto [id, args] =
+ NVVM::ClusterLaunchControlQueryCancelOp::getIntrinsicIDAndArgs(
+ *op, moduleTranslation, builder);
+ $res = createIntrinsicCall(builder, id, args);
+ }];
+}
+
//===----------------------------------------------------------------------===//
// NVVM target attribute.
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 77ec1ebde3109..ea070ff1c0822 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -1402,6 +1402,24 @@ LogicalResult NVVM::PrefetchOp::verify() {
return success();
}
+LogicalResult NVVM::ClusterLaunchControlQueryCancelOp::verify() {
+ switch (getQueryType()) {
+ case NVVM::ClusterLaunchControlQueryType::IS_CANCELED:
+ if (!getType().isInteger(1))
+ return emitOpError("is_canceled query type returns an i1");
+ break;
+ case NVVM::ClusterLaunchControlQueryType::GET_FIRST_CTA_ID_X:
+ case NVVM::ClusterLaunchControlQueryType::GET_FIRST_CTA_ID_Y:
+ case NVVM::ClusterLaunchControlQueryType::GET_FIRST_CTA_ID_Z:
+ if (!getType().isInteger(32)) {
+ return emitOpError("get_first_cta_id_x, get_first_cta_id_y, "
+ "get_first_cta_id_z query types return an i32");
+ }
+ break;
+ }
+ return success();
+}
+
/// Packs the given `field` into the `result`.
/// The `result` is 64-bits and each `field` can be 32-bits or narrower.
static llvm::Value *
@@ -2088,6 +2106,52 @@ bool NVVM::InlinePtxOp::getAsmValues(
return false; // No manual mapping needed
}
+NVVM::IDArgPair ClusterLaunchControlTryCancelOp::getIntrinsicIDAndArgs(
+ Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
+ auto curOp = cast<NVVM::ClusterLaunchControlTryCancelOp>(op);
+ llvm::SmallVector<llvm::Value *> args;
+ args.push_back(mt.lookupValue(curOp.getAddr()));
+ args.push_back(mt.lookupValue(curOp.getMbar()));
+
+ return curOp.getMulticast()
+ ? NVVM::IDArgPair(
+ {llvm::Intrinsic::
+ nvvm_clusterlaunchcontrol_try_cancel_async_multicast_shared,
+ args})
+ : NVVM::IDArgPair(
+ {llvm::Intrinsic::
+ nvvm_clusterlaunchcontrol_try_cancel_async_shared,
+ args});
+}
+
+NVVM::IDArgPair ClusterLaunchControlQueryCancelOp::getIntrinsicIDAndArgs(
+ Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
+ auto curOp = cast<NVVM::ClusterLaunchControlQueryCancelOp>(op);
+ llvm::SmallVector<llvm::Value *> args;
+ args.push_back(mt.lookupValue(curOp.getTryCancelResponse()));
+
+ switch (curOp.getQueryType()) {
+ case NVVM::ClusterLaunchControlQueryType::IS_CANCELED:
+ return {
+ llvm::Intrinsic::nvvm_clusterlaunchcontrol_query_cancel_is_canceled,
+ args};
+ case NVVM::ClusterLaunchControlQueryType::GET_FIRST_CTA_ID_X:
+ return {llvm::Intrinsic::
+ nvvm_clusterlaunchcontrol_query_cancel_get_first_ctaid_x,
+ args};
+ case NVVM::ClusterLaunchControlQueryType::GET_FIRST_CTA_ID_Y:
+ return {llvm::Intrinsic::
+ nvvm_clusterlaunchcontrol_query_cancel_get_first_ctaid_y,
+ args};
+ case NVVM::ClusterLaunchControlQueryType::GET_FIRST_CTA_ID_Z:
+ return {llvm::Intrinsic::
+ nvvm_clusterlaunchcontrol_query_cancel_get_first_ctaid_z,
+ args};
+ default:
+ llvm_unreachable("Invalid query type");
+ }
+}
+
//===----------------------------------------------------------------------===//
// NVVMDialect initialization, type parsing, and registration.
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Target/LLVMIR/nvvm/clusterlaunchcontrol.mlir b/mlir/test/Target/LLVMIR/nvvm/clusterlaunchcontrol.mlir
new file mode 100644
index 0000000000000..3100231e0de2f
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/clusterlaunchcontrol.mlir
@@ -0,0 +1,64 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+llvm.func @clusterlaunchcontrol_try_cancel(%addr: !llvm.ptr<3>, %mbar: !llvm.ptr<3>) {
+ // CHECK-LABEL: define void @clusterlaunchcontrol_try_cancel(ptr addrspace(3) %0, ptr addrspace(3) %1) {
+ // CHECK-NEXT: call void @llvm.nvvm.clusterlaunchcontrol.try_cancel.async.shared(ptr addrspace(3) %0, ptr addrspace(3) %1)
+ // CHECK-NEXT: ret void
+ // CHECK-NEXT: }
+ nvvm.clusterlaunchcontrol.try.cancel %addr, %mbar
+ llvm.return
+}
+
+llvm.func @clusterlaunchcontrol_try_cancel_multicast(%addr: !llvm.ptr<3>, %mbar: !llvm.ptr<3>) {
+ // CHECK-LABEL: define void @clusterlaunchcontrol_try_cancel_multicast(ptr addrspace(3) %0, ptr addrspace(3) %1) {
+ // CHECK-NEXT: call void @llvm.nvvm.clusterlaunchcontrol.try_cancel.async.multicast.shared(ptr addrspace(3) %0, ptr addrspace(3) %1)
+ // CHECK-NEXT: ret void
+ // CHECK-NEXT: }
+ nvvm.clusterlaunchcontrol.try.cancel multicast, %addr, %mbar
+ llvm.return
+}
+
+llvm.func @clusterlaunchcontrol_query_cancel(%try_cancel_response: i128) {
+ // CHECK-LABEL: define void @clusterlaunchcontrol_query_cancel(i128 %0) {
+ // CHECK-NEXT: %2 = call i1 @llvm.nvvm.clusterlaunchcontrol.query_cancel.is_canceled(i128 %0)
+ // CHECK-NEXT: ret void
+ // CHECK-NEXT: }
+ nvvm.clusterlaunchcontrol.query.cancel %try_cancel_response : i1
+ llvm.return
+}
+
+llvm.func @clusterlaunchcontrol_query_cancel_is_canceled(%try_cancel_response: i128) {
+ // CHECK-LABEL: define void @clusterlaunchcontrol_query_cancel_is_canceled(i128 %0) {
+ // CHECK-NEXT: %2 = call i1 @llvm.nvvm.clusterlaunchcontrol.query_cancel.is_canceled(i128 %0)
+ // CHECK-NEXT: ret void
+ // CHECK-NEXT: }
+ %res = nvvm.clusterlaunchcontrol.query.cancel query = is_canceled, %try_cancel_response : i1
+ llvm.return
+}
+
+llvm.func @clusterlaunchcontrol_query_cancel_get_first_cta_id_x(%try_cancel_response: i128) {
+ // CHECK-LABEL: define void @clusterlaunchcontrol_query_cancel_get_first_cta_id_x(i128 %0) {
+ // CHECK-NEXT: %2 = call i32 @llvm.nvvm.clusterlaunchcontrol.query_cancel.get_first_ctaid.x(i128 %0)
+ // CHECK-NEXT: ret void
+ // CHECK-NEXT: }
+ %res = nvvm.clusterlaunchcontrol.query.cancel query = get_first_cta_id_x, %try_cancel_response : i32
+ llvm.return
+}
+
+llvm.func @clusterlaunchcontrol_query_cancel_get_first_cta_id_y(%try_cancel_response: i128) {
+ // CHECK-LABEL: define void @clusterlaunchcontrol_query_cancel_get_first_cta_id_y(i128 %0) {
+ // CHECK-NEXT: %2 = call i32 @llvm.nvvm.clusterlaunchcontrol.query_cancel.get_first_ctaid.y(i128 %0)
+ // CHECK-NEXT: ret void
+ // CHECK-NEXT: }
+ %res = nvvm.clusterlaunchcontrol.query.cancel query = get_first_cta_id_y, %try_cancel_response : i32
+ llvm.return
+}
+
+llvm.func @clusterlaunchcontrol_query_cancel_get_first_cta_id_z(%try_cancel_response: i128) {
+ // CHECK-LABEL: define void @clusterlaunchcontrol_query_cancel_get_first_cta_id_z(i128 %0) {
+ // CHECK-NEXT: %2 = call i32 @llvm.nvvm.clusterlaunchcontrol.query_cancel.get_first_ctaid.z(i128 %0)
+ // CHECK-NEXT: ret void
+ // CHECK-NEXT: }
+ %res = nvvm.clusterlaunchcontrol.query.cancel query = get_first_cta_id_z, %try_cancel_response : i32
+ llvm.return
+}
diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
index b35a6dbcca286..383f4829f3287 100644
--- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
@@ -535,3 +535,19 @@ llvm.func @nanosleep() {
nvvm.nanosleep 100000000000000
llvm.return
}
+
+// -----
+
+llvm.func @clusterlaunchcontrol_query_cancel_is_canceled_invalid_return_type(%try_cancel_response: i128) {
+ // expected-error at +1 {{'nvvm.clusterlaunchcontrol.query.cancel' op is_canceled query type returns an i1}}
+ %res = nvvm.clusterlaunchcontrol.query.cancel query = is_canceled, %try_cancel_response : i32
+ llvm.return
+}
+
+// -----
+
+llvm.func @clusterlaunchcontrol_query_cancel_get_first_cta_id_invalid_return_type(%try_cancel_response: i128) {
+ // expected-error at +1 {{'nvvm.clusterlaunchcontrol.query.cancel' op get_first_cta_id_x, get_first_cta_id_y, get_first_cta_id_z query types return an i32}}
+ %res = nvvm.clusterlaunchcontrol.query.cancel query = get_first_cta_id_x, %try_cancel_response : i1
+ llvm.return
+}
More information about the Mlir-commits
mailing list