[llvm] [LLVM][NVPTX] Add NVPTX codegen support for clusterlaunchcontrol instruction (PR #134568)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 7 05:25:58 PDT 2025
================
@@ -0,0 +1,140 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_100 -mattr=+ptx86 | FileCheck %s --check-prefixes=CHECK,CHECK-PTX-SHARED64
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_100 -mattr=+ptx86 --nvptx-short-ptr | FileCheck --check-prefixes=CHECK,CHECK-PTX-SHARED32 %s
+; RUN: %if ptxas-12.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100 -mattr=+ptx86 | %ptxas-verify -arch=sm_100 %}
+; RUN: %if ptxas-12.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100 -mattr=+ptx86 --nvptx-short-ptr | %ptxas-verify -arch=sm_100 %}
+
+define void @nvvm_clusterlaunchcontrol_try_cancel(ptr %addr, ptr %mbar,
+; CHECK-PTX-SHARED64-LABEL: nvvm_clusterlaunchcontrol_try_cancel(
+; CHECK-PTX-SHARED64: {
+; CHECK-PTX-SHARED64-NEXT: .reg .b64 %rd<5>;
+; CHECK-PTX-SHARED64-EMPTY:
+; CHECK-PTX-SHARED64-NEXT: // %bb.0:
+; CHECK-PTX-SHARED64-NEXT: ld.param.u64 %rd1, [nvvm_clusterlaunchcontrol_try_cancel_param_0];
+; CHECK-PTX-SHARED64-NEXT: ld.param.u64 %rd2, [nvvm_clusterlaunchcontrol_try_cancel_param_1];
+; CHECK-PTX-SHARED64-NEXT: clusterlaunchcontrol.try_cancel.async.mbarrier::complete_tx::bytes.b128 [%rd1], [%rd2];
+; CHECK-PTX-SHARED64-NEXT: ld.param.u64 %rd3, [nvvm_clusterlaunchcontrol_try_cancel_param_2];
+; CHECK-PTX-SHARED64-NEXT: ld.param.u64 %rd4, [nvvm_clusterlaunchcontrol_try_cancel_param_3];
+; CHECK-PTX-SHARED64-NEXT: clusterlaunchcontrol.try_cancel.async.shared::cta.mbarrier::complete_tx::bytes.b128 [%rd3], [%rd4];
+; CHECK-PTX-SHARED64-NEXT: ret;
+;
+; CHECK-PTX-SHARED32-LABEL: nvvm_clusterlaunchcontrol_try_cancel(
+; CHECK-PTX-SHARED32: {
+; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-NEXT: .reg .b64 %rd<3>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT: // %bb.0:
+; CHECK-PTX-SHARED32-NEXT: ld.param.u64 %rd1, [nvvm_clusterlaunchcontrol_try_cancel_param_0];
+; CHECK-PTX-SHARED32-NEXT: ld.param.u64 %rd2, [nvvm_clusterlaunchcontrol_try_cancel_param_1];
+; CHECK-PTX-SHARED32-NEXT: clusterlaunchcontrol.try_cancel.async.mbarrier::complete_tx::bytes.b128 [%rd1], [%rd2];
+; CHECK-PTX-SHARED32-NEXT: ld.param.u32 %r1, [nvvm_clusterlaunchcontrol_try_cancel_param_2];
+; CHECK-PTX-SHARED32-NEXT: ld.param.u32 %r2, [nvvm_clusterlaunchcontrol_try_cancel_param_3];
+; CHECK-PTX-SHARED32-NEXT: clusterlaunchcontrol.try_cancel.async.shared::cta.mbarrier::complete_tx::bytes.b128 [%r1], [%r2];
+; CHECK-PTX-SHARED32-NEXT: ret;
+ ptr addrspace(3) %saddr, ptr addrspace(3) %smbar,
+ i128 %try_cancel_response) {
+
+ tail call void @llvm.nvvm.clusterlaunchcontrol.try_cancel.async(ptr %addr, ptr %mbar)
+
+ tail call void @llvm.nvvm.clusterlaunchcontrol.try_cancel.async.shared(ptr addrspace(3) %saddr, ptr addrspace(3) %smbar)
+ ret void;
+}
+
+define i32 @nvvm_clusterlaunchcontrol_query_cancel_is_canceled(i128 %try_cancel_response) local_unnamed_addr #0 {
+; CHECK-LABEL: nvvm_clusterlaunchcontrol_query_cancel_is_canceled(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [nvvm_clusterlaunchcontrol_query_cancel_is_canceled_param_0];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 %handle;
+; CHECK-NEXT: mov.b128 %handle, {%rd1, %rd2};
+; CHECK-NEXT: clusterlaunchcontrol.query_cancel.is_canceled.pred.b128 %p1, %handle;
+; CHECK-NEXT: }
+; CHECK-NEXT: selp.u32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
+ %v0 = call i1 @llvm.nvvm.clusterlaunchcontrol.query_cancel.is_canceled(i128 %try_cancel_response)
+ %v2 = zext i1 %v0 to i32
+ ret i32 %v2;
+}
+
+
+define <4 x i32> @nvvm_clusterlaunchcontrol_query_cancel_get_first_ctaid(i128 %try_cancel_response) local_unnamed_addr #0 {
+; CHECK-LABEL: nvvm_clusterlaunchcontrol_query_cancel_get_first_ctaid(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-NEXT: .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [nvvm_clusterlaunchcontrol_query_cancel_get_first_ctaid_param_0];
+; CHECK-NEXT: {
+; CHECK-NEXT: .reg .b128 %handle;
+; CHECK-NEXT: mov.b128 %handle, {%rd1, %rd2};
+; CHECK-NEXT: clusterlaunchcontrol.query_cancel.get_first_ctaid.v4.b32.b128 {%r1, %r2, %r3, %r4}, %handle;
+; CHECK-NEXT: }
+; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r1, %r2, %r3, %r4};
+; CHECK-NEXT: ret;
+ %v0 = call <4 x i32> @llvm.nvvm.clusterlaunchcontrol.query_cancel.get_first_ctaid(i128 %try_cancel_response)
----------------
gonzalobg wrote:
I think this should return `undef` for the last element of the vector, e.g., allowing `st.param.v4.b32 ..., {%r1, %r2, %r3, _}` to be generated (or just reusing one of the other registers, doesn't really matter which one).
https://github.com/llvm/llvm-project/pull/134568
More information about the llvm-commits
mailing list