[llvm] [AMDGPU] Extend llvm.amdgcn.set.inactive intrinsic to support Reg32/Reg64 types (PR #94457)

Vikram Hegde via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 12 04:38:07 PDT 2024


================
@@ -93,6 +93,234 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
   ret void
 }
 
+define amdgpu_kernel void @set_inactive_f32(ptr addrspace(1) %out, float %in) {
+; GCN-LABEL: set_inactive_f32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dword s3, s[0:1], 0x2c
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v1, 0x40400000
+; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v0, s3
+; GCN-NEXT:    s_not_b64 exec, exec
+; GCN-NEXT:    v_mov_b32_e32 v0, v1
+; GCN-NEXT:    s_not_b64 exec, exec
+; GCN-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT:    s_endpgm
+  %tmp = call float @llvm.amdgcn.set.inactive.f32(float %in, float 3.0) #0
+  store float %tmp, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @set_inactive_f64(ptr addrspace(1) %out, double %in) {
+; GCN-LABEL: set_inactive_f64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GCN-NEXT:    s_mov_b32 s4, 0xcccccccd
+; GCN-NEXT:    s_mov_b32 s5, 0x4010cccc
+; GCN-NEXT:    v_mov_b32_e32 v2, s4
+; GCN-NEXT:    v_mov_b32_e32 v3, s5
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v0, s2
+; GCN-NEXT:    v_mov_b32_e32 v1, s3
+; GCN-NEXT:    s_not_b64 exec, exec
+; GCN-NEXT:    v_mov_b32_e32 v0, v2
+; GCN-NEXT:    v_mov_b32_e32 v1, v3
+; GCN-NEXT:    s_not_b64 exec, exec
+; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GCN-NEXT:    s_endpgm
+  %tmp = call double @llvm.amdgcn.set.inactive.f64(double %in, double 4.2) #0
+  store double %tmp, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @set_inactive_v2i16(ptr addrspace(1) %out, <2 x i16> %in) {
+; GCN-LABEL: set_inactive_v2i16:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dword s3, s[0:1], 0x2c
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v1, 0x10001
+; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v0, s3
+; GCN-NEXT:    s_not_b64 exec, exec
+; GCN-NEXT:    v_mov_b32_e32 v0, v1
+; GCN-NEXT:    s_not_b64 exec, exec
+; GCN-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT:    s_endpgm
+  %tmp = call <2 x i16> @llvm.amdgcn.set.inactive.v2i16(<2 x i16> %in, <2 x i16> <i16 1, i16 1>) #0
+  store <2 x i16> %tmp, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @set_inactive_v2f16(ptr addrspace(1) %out, <2 x half> %in) {
----------------
vikramRH wrote:

added tests for v4i16, v4f16, v4bf16, v2bf16

https://github.com/llvm/llvm-project/pull/94457


More information about the llvm-commits mailing list