[llvm] [AMDGPU] promote i1 arg type for admgpu_cs (PR #82971)
Nick Anderson via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 27 22:51:40 PST 2024
https://github.com/nickleus27 updated https://github.com/llvm/llvm-project/pull/82971
>From 6b4caa808702e59b65af4da9a3e7e3a455bf1495 Mon Sep 17 00:00:00 2001
From: Nick Anderson <nickleus27 at gmail.com>
Date: Mon, 26 Feb 2024 01:08:58 -0800
Subject: [PATCH 1/2] [AMDGPU] promote i1 arg type for admgpu_cs
---
llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td | 2 ++
llvm/test/CodeGen/AMDGPU/calling-conventions.ll | 9 +++++++++
2 files changed, 11 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index c5207228dc913f..be6bef0bd1c9e0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -66,6 +66,8 @@ def RetCC_SI_Gfx : CallingConv<[
def CC_SI_SHADER : CallingConv<[
+ CCIfType<[i1], CCPromoteToType<i16>>,
+
CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
index ce1ce649c227d2..139116604d9c79 100644
--- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
+++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
@@ -2078,4 +2078,13 @@ entry:
ret void
}
+; test that i1 argument for amdgpu_cs is working
+define amdgpu_cs void @amdgpu_cs_i1_arg(i1 %arg0) {
+; GCN-LABEL: amdgpu_cs_i1_arg:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_endpgm
+bb:
+ ret void
+}
+
attributes #0 = { nounwind noinline }
>From 6c7aee0ba51a45dc387d7da6723857bba0c5587c Mon Sep 17 00:00:00 2001
From: Nick Anderson <nickleus27 at gmail.com>
Date: Tue, 27 Feb 2024 22:51:20 -0800
Subject: [PATCH 2/2] fixup! [AMDGPU] promote i1 arg type for admgpu_cs
---
.../CodeGen/AMDGPU/calling-conventions.ll | 219 +++++++++++++++++-
1 file changed, 213 insertions(+), 6 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
index 139116604d9c79..3e636bc2e143a6 100644
--- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
+++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
@@ -2078,12 +2078,219 @@ entry:
ret void
}
-; test that i1 argument for amdgpu_cs is working
-define amdgpu_cs void @amdgpu_cs_i1_arg(i1 %arg0) {
-; GCN-LABEL: amdgpu_cs_i1_arg:
-; GCN: ; %bb.0: ; %bb
-; GCN-NEXT: s_endpgm
-bb:
+define amdgpu_cs void @amdgpu_cs_i1(i1 %arg0) {
+; SI-LABEL: amdgpu_cs_i1:
+; SI: ; %bb.0:
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, 0
+; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: amdgpu_cs_i1:
+; VI: ; %bb.0:
+; VI-NEXT: v_mov_b32_e32 v0, 0
+; VI-NEXT: flat_store_byte v[0:1], v0
+; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: amdgpu_cs_i1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+ %add = add i1 %arg0, %arg0
+ store i1 %add, ptr addrspace(1) undef
+ ret void
+}
+
+define amdgpu_cs void @amdgpu_cs_v8i1(<8 x i1> %arg0) {
+; SI-LABEL: amdgpu_cs_v8i1:
+; SI: ; %bb.0:
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, 0
+; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: amdgpu_cs_v8i1:
+; VI: ; %bb.0:
+; VI-NEXT: v_mov_b32_e32 v0, 0
+; VI-NEXT: flat_store_byte v[0:1], v0
+; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: amdgpu_cs_v8i1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+ %add = add <8 x i1> %arg0, %arg0
+ store <8 x i1> %add, ptr addrspace(1) undef
+ ret void
+}
+
+define amdgpu_cs void @amdgpu_cs_v16i1(<16 x i1> %arg0) {
+; SI-LABEL: amdgpu_cs_v16i1:
+; SI: ; %bb.0:
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, 0
+; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: amdgpu_cs_v16i1:
+; VI: ; %bb.0:
+; VI-NEXT: v_mov_b32_e32 v0, 0
+; VI-NEXT: flat_store_short v[0:1], v0
+; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: amdgpu_cs_v16i1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: global_store_b16 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+ %add = add <16 x i1> %arg0, %arg0
+ store <16 x i1> %add, ptr addrspace(1) undef
+ ret void
+}
+
+define amdgpu_cs void @amdgpu_cs_v32i1(<32 x i1> %arg0) {
+; SI-LABEL: amdgpu_cs_v32i1:
+; SI: ; %bb.0:
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, 0
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: amdgpu_cs_v32i1:
+; VI: ; %bb.0:
+; VI-NEXT: v_mov_b32_e32 v0, 0
+; VI-NEXT: flat_store_dword v[0:1], v0
+; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: amdgpu_cs_v32i1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+ %add = add <32 x i1> %arg0, %arg0
+ store <32 x i1> %add, ptr addrspace(1) undef
+ ret void
+}
+
+define amdgpu_cs void @amdgpu_cs_inreg_i1(i1 inreg %arg0) {
+; SI-LABEL: amdgpu_cs_inreg_i1:
+; SI: ; %bb.0:
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, 0
+; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: amdgpu_cs_inreg_i1:
+; VI: ; %bb.0:
+; VI-NEXT: v_mov_b32_e32 v0, 0
+; VI-NEXT: flat_store_byte v[0:1], v0
+; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: amdgpu_cs_inreg_i1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+ %add = add i1 %arg0, %arg0
+ store i1 %add, ptr addrspace(1) undef
+ ret void
+}
+
+define amdgpu_cs void @amdgpu_cs_inreg_v8i1(<8 x i1> inreg %arg0) {
+; SI-LABEL: amdgpu_cs_inreg_v8i1:
+; SI: ; %bb.0:
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, 0
+; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: amdgpu_cs_inreg_v8i1:
+; VI: ; %bb.0:
+; VI-NEXT: v_mov_b32_e32 v0, 0
+; VI-NEXT: flat_store_byte v[0:1], v0
+; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: amdgpu_cs_inreg_v8i1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: global_store_b8 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+ %add = add <8 x i1> %arg0, %arg0
+ store <8 x i1> %add, ptr addrspace(1) undef
+ ret void
+}
+
+define amdgpu_cs void @amdgpu_cs_inreg_v16i1(<16 x i1> inreg %arg0) {
+; SI-LABEL: amdgpu_cs_inreg_v16i1:
+; SI: ; %bb.0:
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, 0
+; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: amdgpu_cs_inreg_v16i1:
+; VI: ; %bb.0:
+; VI-NEXT: v_mov_b32_e32 v0, 0
+; VI-NEXT: flat_store_short v[0:1], v0
+; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: amdgpu_cs_inreg_v16i1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: global_store_b16 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+ %add = add <16 x i1> %arg0, %arg0
+ store <16 x i1> %add, ptr addrspace(1) undef
+ ret void
+}
+
+define amdgpu_cs void @amdgpu_cs_inreg_v32i1(<32 x i1> inreg %arg0) {
+; SI-LABEL: amdgpu_cs_inreg_v32i1:
+; SI: ; %bb.0:
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, 0
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: amdgpu_cs_inreg_v32i1:
+; VI: ; %bb.0:
+; VI-NEXT: v_mov_b32_e32 v0, 0
+; VI-NEXT: flat_store_dword v[0:1], v0
+; VI-NEXT: s_endpgm
+;
+; GFX11-LABEL: amdgpu_cs_inreg_v32i1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: global_store_b32 v[0:1], v0, off
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+ %add = add <32 x i1> %arg0, %arg0
+ store <32 x i1> %add, ptr addrspace(1) undef
ret void
}
More information about the llvm-commits
mailing list