[llvm] [AMDGPU][Verifier] Limit kill/wqm.demote intrinsics to PS shaders (PR #151922)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 4 01:52:25 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir
Author: Carl Ritson (perlfu)
<details>
<summary>Changes</summary>
Only amdgpu_ps shaders should be calling llvm.amdgcn.kill and llvm.amdgcn.wqm.demote. Enforce this through the verifier.
---
Full diff: https://github.com/llvm/llvm-project/pull/151922.diff
6 Files Affected:
- (modified) llvm/lib/IR/Verifier.cpp (+7)
- (modified) llvm/test/CodeGen/AMDGPU/default-fp-mode.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll (+21-21)
- (modified) llvm/test/CodeGen/AMDGPU/wave32.ll (+9-3)
- (modified) llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll (+1-1)
- (added) llvm/test/Verifier/amdgpu-intrinsic.ll (+34)
``````````diff
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 3ff9895e161c4..90c60b5d64841 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6714,6 +6714,13 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"invalid vector type for format", &Call, Src0, Call.getArgOperand(0));
Check(Src1Ty->getNumElements() >= getFormatNumRegs(FmtB),
"invalid vector type for format", &Call, Src1, Call.getArgOperand(2));
+ }
+ case Intrinsic::amdgcn_kill:
+ case Intrinsic::amdgcn_wqm_demote: {
+ Check(Call.getCaller()->getCallingConv() == CallingConv::AMDGPU_PS,
+ "Intrinsic can only be used from functions with the amdgpu_ps"
+ " calling convention ",
+ &Call);
break;
}
case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32:
diff --git a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
index b63fff38f34f6..8fb7c5daf081e 100644
--- a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
+++ b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
@@ -145,10 +145,10 @@ define amdgpu_kernel void @test_flush_f64_outputs(ptr addrspace(1) %out0, ptr ad
ret void
}
-; GCN-LABEL: {{^}}kill_gs_const:
+; GCN-LABEL: {{^}}kill_ps_const:
; GCN: FloatMode: 240
; GCN: IeeeMode: 0
-define amdgpu_gs void @kill_gs_const() {
+define amdgpu_ps void @kill_ps_const() {
main_body:
%cmp0 = icmp ule i32 0, 3
call void @llvm.amdgcn.kill(i1 %cmp0)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
index 462090c6e89df..6d85cdceec4c6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
@@ -3,10 +3,10 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
-; GCN-LABEL: {{^}}gs_const:
+; GCN-LABEL: {{^}}ps_const:
; GCN-NOT: v_cmpx
; GCN: s_mov_b64 exec, 0
-define amdgpu_gs void @gs_const() {
+define amdgpu_ps void @ps_const() {
%tmp = icmp ule i32 0, 3
%tmp1 = select i1 %tmp, float 1.000000e+00, float -1.000000e+00
%c1 = fcmp oge float %tmp1, 0.0
@@ -37,7 +37,7 @@ define amdgpu_ps void @vcc_implicit_def(float %arg13, float %arg14) {
; GCN-LABEL: {{^}}true:
; GCN-NEXT: %bb.
; GCN-NEXT: s_endpgm
-define amdgpu_gs void @true() {
+define amdgpu_ps void @true() {
call void @llvm.amdgcn.kill(i1 true)
ret void
}
@@ -45,7 +45,7 @@ define amdgpu_gs void @true() {
; GCN-LABEL: {{^}}false:
; GCN-NOT: v_cmpx
; GCN: s_mov_b64 exec, 0
-define amdgpu_gs void @false() {
+define amdgpu_ps void @false() {
call void @llvm.amdgcn.kill(i1 false)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
ret void
@@ -58,7 +58,7 @@ define amdgpu_gs void @false() {
; GCN: s_and{{n2|_not1}}_b64 s[0:1], exec, s[0:1]
; GCN: s_and{{n2|_not1}}_b64 s[2:3], s[2:3], s[0:1]
; GCN: s_and_b64 exec, exec, s[2:3]
-define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
+define amdgpu_ps void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
%c1 = icmp slt i32 %a, %b
%c2 = icmp slt i32 %c, %d
%x = or i1 %c1, %c2
@@ -73,7 +73,7 @@ define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) {
; GCN: s_xor_b64 s[0:1]
; GCN: s_and{{n2|_not1}}_b64 s[2:3], s[2:3], s[0:1]
; GCN: s_and_b64 exec, exec, s[2:3]
-define amdgpu_gs void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {
+define amdgpu_ps void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {
%c1 = icmp slt i32 %a, %b
%c2 = icmp slt i32 %c, %d
%x = xor i1 %c1, %c2
@@ -85,7 +85,7 @@ define amdgpu_gs void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) {
; GCN-LABEL: {{^}}oeq:
; GCN: v_cmp_neq_f32
-define amdgpu_gs void @oeq(float %a) {
+define amdgpu_ps void @oeq(float %a) {
%c1 = fcmp oeq float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -94,7 +94,7 @@ define amdgpu_gs void @oeq(float %a) {
; GCN-LABEL: {{^}}ogt:
; GCN: v_cmp_nlt_f32
-define amdgpu_gs void @ogt(float %a) {
+define amdgpu_ps void @ogt(float %a) {
%c1 = fcmp ogt float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -103,7 +103,7 @@ define amdgpu_gs void @ogt(float %a) {
; GCN-LABEL: {{^}}oge:
; GCN: v_cmp_nle_f32
-define amdgpu_gs void @oge(float %a) {
+define amdgpu_ps void @oge(float %a) {
%c1 = fcmp oge float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -112,7 +112,7 @@ define amdgpu_gs void @oge(float %a) {
; GCN-LABEL: {{^}}olt:
; GCN: v_cmp_ngt_f32
-define amdgpu_gs void @olt(float %a) {
+define amdgpu_ps void @olt(float %a) {
%c1 = fcmp olt float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -121,7 +121,7 @@ define amdgpu_gs void @olt(float %a) {
; GCN-LABEL: {{^}}ole:
; GCN: v_cmp_nge_f32
-define amdgpu_gs void @ole(float %a) {
+define amdgpu_ps void @ole(float %a) {
%c1 = fcmp ole float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -130,7 +130,7 @@ define amdgpu_gs void @ole(float %a) {
; GCN-LABEL: {{^}}one:
; GCN: v_cmp_nlg_f32
-define amdgpu_gs void @one(float %a) {
+define amdgpu_ps void @one(float %a) {
%c1 = fcmp one float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -139,7 +139,7 @@ define amdgpu_gs void @one(float %a) {
; GCN-LABEL: {{^}}ord:
; GCN: v_cmp_o_f32
-define amdgpu_gs void @ord(float %a) {
+define amdgpu_ps void @ord(float %a) {
%c1 = fcmp ord float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -148,7 +148,7 @@ define amdgpu_gs void @ord(float %a) {
; GCN-LABEL: {{^}}uno:
; GCN: v_cmp_u_f32
-define amdgpu_gs void @uno(float %a) {
+define amdgpu_ps void @uno(float %a) {
%c1 = fcmp uno float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -157,7 +157,7 @@ define amdgpu_gs void @uno(float %a) {
; GCN-LABEL: {{^}}ueq:
; GCN: v_cmp_lg_f32
-define amdgpu_gs void @ueq(float %a) {
+define amdgpu_ps void @ueq(float %a) {
%c1 = fcmp ueq float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -166,7 +166,7 @@ define amdgpu_gs void @ueq(float %a) {
; GCN-LABEL: {{^}}ugt:
; GCN: v_cmp_ge_f32
-define amdgpu_gs void @ugt(float %a) {
+define amdgpu_ps void @ugt(float %a) {
%c1 = fcmp ugt float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -175,7 +175,7 @@ define amdgpu_gs void @ugt(float %a) {
; GCN-LABEL: {{^}}uge:
; GCN: v_cmp_gt_f32_e32 vcc, -1.0
-define amdgpu_gs void @uge(float %a) {
+define amdgpu_ps void @uge(float %a) {
%c1 = fcmp uge float %a, -1.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -184,7 +184,7 @@ define amdgpu_gs void @uge(float %a) {
; GCN-LABEL: {{^}}ult:
; GCN: v_cmp_le_f32_e32 vcc, -2.0
-define amdgpu_gs void @ult(float %a) {
+define amdgpu_ps void @ult(float %a) {
%c1 = fcmp ult float %a, -2.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -193,7 +193,7 @@ define amdgpu_gs void @ult(float %a) {
; GCN-LABEL: {{^}}ule:
; GCN: v_cmp_lt_f32_e32 vcc, 2.0
-define amdgpu_gs void @ule(float %a) {
+define amdgpu_ps void @ule(float %a) {
%c1 = fcmp ule float %a, 2.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -202,7 +202,7 @@ define amdgpu_gs void @ule(float %a) {
; GCN-LABEL: {{^}}une:
; GCN: v_cmp_eq_f32_e32 vcc, 0
-define amdgpu_gs void @une(float %a) {
+define amdgpu_ps void @une(float %a) {
%c1 = fcmp une float %a, 0.0
call void @llvm.amdgcn.kill(i1 %c1)
call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
@@ -211,7 +211,7 @@ define amdgpu_gs void @une(float %a) {
; GCN-LABEL: {{^}}neg_olt:
; GCN: v_cmp_gt_f32_e32 vcc, 1.0
-define amdgpu_gs void @neg_olt(float %a) {
+define amdgpu_ps void @neg_olt(float %a) {
%c1 = fcmp olt float %a, 1.0
%c2 = xor i1 %c1, 1
call void @llvm.amdgcn.kill(i1 %c2)
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index 097154ed23ede..50f1a3ae44f63 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -1760,7 +1760,7 @@ define amdgpu_ps void @test_kill_i1_terminator_float() #0 {
ret void
}
-define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d) #0 {
+define amdgpu_ps void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d) #0 {
; GFX1032-LABEL: test_kill_i1_terminator_i1:
; GFX1032: ; %bb.0:
; GFX1032-NEXT: v_cmp_lt_i32_e32 vcc_lo, v0, v1
@@ -1769,12 +1769,15 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d
; GFX1032-NEXT: s_or_b32 s0, vcc_lo, s0
; GFX1032-NEXT: s_andn2_b32 s0, exec_lo, s0
; GFX1032-NEXT: s_andn2_b32 s1, s1, s0
+; GFX1032-NEXT: s_cbranch_scc0 .LBB32_2
+; GFX1032-NEXT: ; %bb.1:
; GFX1032-NEXT: s_and_b32 exec_lo, exec_lo, s1
; GFX1032-NEXT: v_mov_b32_e32 v0, 0
; GFX1032-NEXT: exp mrt0 off, off, off, off
; GFX1032-NEXT: s_endpgm
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: .LBB32_2:
; GFX1032-NEXT: s_mov_b32 exec_lo, 0
+; GFX1032-NEXT: exp null off, off, off, off done vm
; GFX1032-NEXT: s_endpgm
;
; GFX1064-LABEL: test_kill_i1_terminator_i1:
@@ -1785,12 +1788,15 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d
; GFX1064-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX1064-NEXT: s_andn2_b64 s[0:1], exec, s[0:1]
; GFX1064-NEXT: s_andn2_b64 s[2:3], s[2:3], s[0:1]
+; GFX1064-NEXT: s_cbranch_scc0 .LBB32_2
+; GFX1064-NEXT: ; %bb.1:
; GFX1064-NEXT: s_and_b64 exec, exec, s[2:3]
; GFX1064-NEXT: v_mov_b32_e32 v0, 0
; GFX1064-NEXT: exp mrt0 off, off, off, off
; GFX1064-NEXT: s_endpgm
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: .LBB32_2:
; GFX1064-NEXT: s_mov_b64 exec, 0
+; GFX1064-NEXT: exp null off, off, off, off done vm
; GFX1064-NEXT: s_endpgm
%c1 = icmp slt i32 %a, %b
%c2 = icmp slt i32 %c, %d
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 077da9cda6523..95fc24573c058 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -2727,7 +2727,7 @@ main_body:
declare void @llvm.amdgcn.kill(i1)
-define void @kill_true() {
+define amdgpu_ps void @kill_true() {
; CHECK-LABEL: @kill_true(
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Verifier/amdgpu-intrinsic.ll b/llvm/test/Verifier/amdgpu-intrinsic.ll
new file mode 100644
index 0000000000000..7745e0198b36d
--- /dev/null
+++ b/llvm/test/Verifier/amdgpu-intrinsic.ll
@@ -0,0 +1,34 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+target datalayout = "A5"
+
+declare void @llvm.amdgcn.kill(i1)
+declare void @llvm.amdgcn.wqm.demote(i1)
+
+; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
+; CHECK-NEXT: call void @llvm.amdgcn.kill(i1 true)
+define amdgpu_cs void @cs_kill() {
+ call void @llvm.amdgcn.kill(i1 true)
+ ret void
+}
+
+; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
+; CHECK-NEXT: call void @llvm.amdgcn.kill(i1 true)
+define amdgpu_gs void @gs_kill() {
+ call void @llvm.amdgcn.kill(i1 true)
+ ret void
+}
+
+; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
+; CHECK-NEXT: call void @llvm.amdgcn.wqm.demote(i1 true)
+define amdgpu_cs void @cs_wqm_demote() {
+ call void @llvm.amdgcn.wqm.demote(i1 true)
+ ret void
+}
+
+; CHECK: Intrinsic can only be used from functions with the amdgpu_ps calling convention
+; CHECK-NEXT: call void @llvm.amdgcn.wqm.demote(i1 true)
+define amdgpu_gs void @gs_wqm_demote() {
+ call void @llvm.amdgcn.wqm.demote(i1 true)
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/151922
More information about the llvm-commits
mailing list