[llvm] a44d740 - [AMDGPU][NFC] Pre-commit test for PR #94133
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 5 04:56:49 PDT 2024
Author: Carl Ritson
Date: 2024-06-05T20:56:07+09:00
New Revision: a44d7406f45fd3e5af45de116aed03b0bf7a881f
URL: https://github.com/llvm/llvm-project/commit/a44d7406f45fd3e5af45de116aed03b0bf7a881f
DIFF: https://github.com/llvm/llvm-project/commit/a44d7406f45fd3e5af45de116aed03b0bf7a881f.diff
LOG: [AMDGPU][NFC] Pre-commit test for PR #94133
Added:
Modified:
llvm/test/CodeGen/AMDGPU/wqm.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index 95dfb12c8dbae..6fcf5067b0225 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -2936,6 +2936,89 @@ ENDIF:
ret float %r
}
+; WQM -> StrictWQM transition must be preserved because kill breaks WQM mask
+define amdgpu_ps float @test_strict_wqm_within_wqm_with_kill(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %c, i32 %z, float %data, i32 %wqm_data) {
+; GFX9-W64-LABEL: test_strict_wqm_within_wqm_with_kill:
+; GFX9-W64: ; %bb.0: ; %main_body
+; GFX9-W64-NEXT: s_mov_b64 s[12:13], exec
+; GFX9-W64-NEXT: s_mov_b64 s[14:15], exec
+; GFX9-W64-NEXT: s_wqm_b64 exec, exec
+; GFX9-W64-NEXT: v_mov_b32_e32 v3, v2
+; GFX9-W64-NEXT: s_mov_b64 exec, s[14:15]
+; GFX9-W64-NEXT: s_wqm_b64 exec, exec
+; GFX9-W64-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
+; GFX9-W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-W64-NEXT: s_waitcnt vmcnt(0)
+; GFX9-W64-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
+; GFX9-W64-NEXT: s_xor_b64 s[0:1], vcc, exec
+; GFX9-W64-NEXT: s_andn2_b64 s[12:13], s[12:13], s[0:1]
+; GFX9-W64-NEXT: s_cbranch_scc0 .LBB51_2
+; GFX9-W64-NEXT: ; %bb.1: ; %main_body
+; GFX9-W64-NEXT: s_and_b64 exec, exec, vcc
+; GFX9-W64-NEXT: ds_swizzle_b32 v3, v3 offset:swizzle(SWAP,2)
+; GFX9-W64-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-W64-NEXT: v_mov_b32_e32 v1, v3
+; GFX9-W64-NEXT: v_cvt_f32_i32_e32 v1, v1
+; GFX9-W64-NEXT: s_waitcnt vmcnt(0)
+; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec
+; GFX9-W64-NEXT: s_and_b64 exec, exec, s[12:13]
+; GFX9-W64-NEXT: s_branch .LBB51_3
+; GFX9-W64-NEXT: .LBB51_2:
+; GFX9-W64-NEXT: s_mov_b64 exec, 0
+; GFX9-W64-NEXT: exp null off, off, off, off done vm
+; GFX9-W64-NEXT: s_endpgm
+; GFX9-W64-NEXT: .LBB51_3:
+;
+; GFX10-W32-LABEL: test_strict_wqm_within_wqm_with_kill:
+; GFX10-W32: ; %bb.0: ; %main_body
+; GFX10-W32-NEXT: s_mov_b32 s12, exec_lo
+; GFX10-W32-NEXT: s_mov_b32 s13, exec_lo
+; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10-W32-NEXT: v_mov_b32_e32 v3, v2
+; GFX10-W32-NEXT: s_mov_b32 exec_lo, s13
+; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo
+; GFX10-W32-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX10-W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
+; GFX10-W32-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX10-W32-NEXT: s_xor_b32 s0, vcc_lo, exec_lo
+; GFX10-W32-NEXT: s_andn2_b32 s12, s12, s0
+; GFX10-W32-NEXT: s_cbranch_scc0 .LBB51_2
+; GFX10-W32-NEXT: ; %bb.1: ; %main_body
+; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, vcc_lo
+; GFX10-W32-NEXT: ds_swizzle_b32 v3, v3 offset:swizzle(SWAP,2)
+; GFX10-W32-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-W32-NEXT: v_mov_b32_e32 v1, v3
+; GFX10-W32-NEXT: v_cvt_f32_i32_e32 v1, v1
+; GFX10-W32-NEXT: s_waitcnt vmcnt(0)
+; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec
+; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s12
+; GFX10-W32-NEXT: s_branch .LBB51_3
+; GFX10-W32-NEXT: .LBB51_2:
+; GFX10-W32-NEXT: s_mov_b32 exec_lo, 0
+; GFX10-W32-NEXT: exp null off, off, off, off done vm
+; GFX10-W32-NEXT: s_endpgm
+; GFX10-W32-NEXT: .LBB51_3:
+main_body:
+ %c.bc = bitcast i32 %c to float
+ %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
+ %tex0 = extractelement <4 x float> %tex, i32 0
+ %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
+ %cmp = icmp eq i32 %z, 0
+ call void @llvm.amdgcn.kill(i1 %cmp)
+ %dataf = extractelement <4 x float> %dtex, i32 0
+ %data2 = call i32 @llvm.amdgcn.ds.swizzle(i32 %wqm_data, i32 2079)
+ %data3 = call i32 @llvm.amdgcn.strict.wqm.i32(i32 %data2)
+ %data3f = sitofp i32 %data3 to float
+ %result.f = fadd float %dataf, %data3f
+ %result.i = bitcast float %result.f to i32
+ %result.wqm = call i32 @llvm.amdgcn.wqm.i32(i32 %result.i)
+ %result = bitcast i32 %result.wqm to float
+ ret float %result
+}
+
;TODO: StrictWQM -> WQM transition could be improved. WQM could use the exec from the previous state instead of calling s_wqm again.
define amdgpu_ps float @test_strict_wqm_strict_wwm_wqm(i32 inreg %idx0, i32 inreg %idx1, ptr addrspace(8) inreg %res, ptr addrspace(8) inreg %res2, float %inp, <8 x i32> inreg %res3) {
; GFX9-W64-LABEL: test_strict_wqm_strict_wwm_wqm:
@@ -3281,9 +3364,9 @@ define amdgpu_ps void @test_for_deactivating_lanes_in_wave32(ptr addrspace(6) in
; GFX9-W64-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-W64-NEXT: v_cmp_le_f32_e64 vcc, s0, 0
; GFX9-W64-NEXT: s_andn2_b64 s[4:5], exec, vcc
-; GFX9-W64-NEXT: s_cbranch_scc0 .LBB54_1
+; GFX9-W64-NEXT: s_cbranch_scc0 .LBB55_1
; GFX9-W64-NEXT: s_endpgm
-; GFX9-W64-NEXT: .LBB54_1:
+; GFX9-W64-NEXT: .LBB55_1:
; GFX9-W64-NEXT: s_mov_b64 exec, 0
; GFX9-W64-NEXT: exp null off, off, off, off done vm
; GFX9-W64-NEXT: s_endpgm
@@ -3297,9 +3380,9 @@ define amdgpu_ps void @test_for_deactivating_lanes_in_wave32(ptr addrspace(6) in
; GFX10-W32-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-W32-NEXT: v_cmp_le_f32_e64 vcc_lo, s0, 0
; GFX10-W32-NEXT: s_andn2_b32 s4, exec_lo, vcc_lo
-; GFX10-W32-NEXT: s_cbranch_scc0 .LBB54_1
+; GFX10-W32-NEXT: s_cbranch_scc0 .LBB55_1
; GFX10-W32-NEXT: s_endpgm
-; GFX10-W32-NEXT: .LBB54_1:
+; GFX10-W32-NEXT: .LBB55_1:
; GFX10-W32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-W32-NEXT: exp null off, off, off, off done vm
; GFX10-W32-NEXT: s_endpgm
More information about the llvm-commits
mailing list