[llvm] a44d740 - [AMDGPU][NFC] Pre-commit test for PR #94133

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 5 04:56:49 PDT 2024


Author: Carl Ritson
Date: 2024-06-05T20:56:07+09:00
New Revision: a44d7406f45fd3e5af45de116aed03b0bf7a881f

URL: https://github.com/llvm/llvm-project/commit/a44d7406f45fd3e5af45de116aed03b0bf7a881f
DIFF: https://github.com/llvm/llvm-project/commit/a44d7406f45fd3e5af45de116aed03b0bf7a881f.diff

LOG: [AMDGPU][NFC] Pre-commit test for PR #94133

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/wqm.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index 95dfb12c8dbae..6fcf5067b0225 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -2936,6 +2936,89 @@ ENDIF:
   ret float %r
 }
 
+; WQM -> StrictWQM transition must be preserved because kill breaks WQM mask
+define amdgpu_ps float @test_strict_wqm_within_wqm_with_kill(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %c, i32 %z, float %data, i32 %wqm_data) {
+; GFX9-W64-LABEL: test_strict_wqm_within_wqm_with_kill:
+; GFX9-W64:       ; %bb.0: ; %main_body
+; GFX9-W64-NEXT:    s_mov_b64 s[12:13], exec
+; GFX9-W64-NEXT:    s_mov_b64 s[14:15], exec
+; GFX9-W64-NEXT:    s_wqm_b64 exec, exec
+; GFX9-W64-NEXT:    v_mov_b32_e32 v3, v2
+; GFX9-W64-NEXT:    s_mov_b64 exec, s[14:15]
+; GFX9-W64-NEXT:    s_wqm_b64 exec, exec
+; GFX9-W64-NEXT:    image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
+; GFX9-W64-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-W64-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-W64-NEXT:    image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
+; GFX9-W64-NEXT:    s_xor_b64 s[0:1], vcc, exec
+; GFX9-W64-NEXT:    s_andn2_b64 s[12:13], s[12:13], s[0:1]
+; GFX9-W64-NEXT:    s_cbranch_scc0 .LBB51_2
+; GFX9-W64-NEXT:  ; %bb.1: ; %main_body
+; GFX9-W64-NEXT:    s_and_b64 exec, exec, vcc
+; GFX9-W64-NEXT:    ds_swizzle_b32 v3, v3 offset:swizzle(SWAP,2)
+; GFX9-W64-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-W64-NEXT:    v_mov_b32_e32 v1, v3
+; GFX9-W64-NEXT:    v_cvt_f32_i32_e32 v1, v1
+; GFX9-W64-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-W64-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX9-W64-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $exec
+; GFX9-W64-NEXT:    s_and_b64 exec, exec, s[12:13]
+; GFX9-W64-NEXT:    s_branch .LBB51_3
+; GFX9-W64-NEXT:  .LBB51_2:
+; GFX9-W64-NEXT:    s_mov_b64 exec, 0
+; GFX9-W64-NEXT:    exp null off, off, off, off done vm
+; GFX9-W64-NEXT:    s_endpgm
+; GFX9-W64-NEXT:  .LBB51_3:
+;
+; GFX10-W32-LABEL: test_strict_wqm_within_wqm_with_kill:
+; GFX10-W32:       ; %bb.0: ; %main_body
+; GFX10-W32-NEXT:    s_mov_b32 s12, exec_lo
+; GFX10-W32-NEXT:    s_mov_b32 s13, exec_lo
+; GFX10-W32-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-W32-NEXT:    v_mov_b32_e32 v3, v2
+; GFX10-W32-NEXT:    s_mov_b32 exec_lo, s13
+; GFX10-W32-NEXT:    s_wqm_b32 exec_lo, exec_lo
+; GFX10-W32-NEXT:    image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX10-W32-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-W32-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-W32-NEXT:    image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX10-W32-NEXT:    s_xor_b32 s0, vcc_lo, exec_lo
+; GFX10-W32-NEXT:    s_andn2_b32 s12, s12, s0
+; GFX10-W32-NEXT:    s_cbranch_scc0 .LBB51_2
+; GFX10-W32-NEXT:  ; %bb.1: ; %main_body
+; GFX10-W32-NEXT:    s_and_b32 exec_lo, exec_lo, vcc_lo
+; GFX10-W32-NEXT:    ds_swizzle_b32 v3, v3 offset:swizzle(SWAP,2)
+; GFX10-W32-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-W32-NEXT:    v_mov_b32_e32 v1, v3
+; GFX10-W32-NEXT:    v_cvt_f32_i32_e32 v1, v1
+; GFX10-W32-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-W32-NEXT:    v_add_f32_e32 v0, v0, v1
+; GFX10-W32-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $exec
+; GFX10-W32-NEXT:    s_and_b32 exec_lo, exec_lo, s12
+; GFX10-W32-NEXT:    s_branch .LBB51_3
+; GFX10-W32-NEXT:  .LBB51_2:
+; GFX10-W32-NEXT:    s_mov_b32 exec_lo, 0
+; GFX10-W32-NEXT:    exp null off, off, off, off done vm
+; GFX10-W32-NEXT:    s_endpgm
+; GFX10-W32-NEXT:  .LBB51_3:
+main_body:
+  %c.bc = bitcast i32 %c to float
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
+  %tex0 = extractelement <4 x float> %tex, i32 0
+  %dtex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tex0, <8 x i32> %rsrc, <4 x i32> %sampler, i1 false, i32 0, i32 0) #0
+  %cmp = icmp eq i32 %z, 0
+  call void @llvm.amdgcn.kill(i1 %cmp)
+  %dataf = extractelement <4 x float> %dtex, i32 0
+  %data2 = call i32 @llvm.amdgcn.ds.swizzle(i32 %wqm_data, i32 2079)
+  %data3 = call i32 @llvm.amdgcn.strict.wqm.i32(i32 %data2)
+  %data3f = sitofp i32 %data3 to float
+  %result.f = fadd float %dataf, %data3f
+  %result.i = bitcast float %result.f to i32
+  %result.wqm = call i32 @llvm.amdgcn.wqm.i32(i32 %result.i)
+  %result = bitcast i32 %result.wqm to float
+  ret float %result
+}
+
 ;TODO: StrictWQM -> WQM transition could be improved. WQM could use the exec from the previous state instead of calling s_wqm again.
 define amdgpu_ps float @test_strict_wqm_strict_wwm_wqm(i32 inreg %idx0, i32 inreg %idx1, ptr addrspace(8) inreg %res, ptr addrspace(8) inreg %res2, float %inp, <8 x i32> inreg %res3) {
 ; GFX9-W64-LABEL: test_strict_wqm_strict_wwm_wqm:
@@ -3281,9 +3364,9 @@ define amdgpu_ps void @test_for_deactivating_lanes_in_wave32(ptr addrspace(6) in
 ; GFX9-W64-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-W64-NEXT:    v_cmp_le_f32_e64 vcc, s0, 0
 ; GFX9-W64-NEXT:    s_andn2_b64 s[4:5], exec, vcc
-; GFX9-W64-NEXT:    s_cbranch_scc0 .LBB54_1
+; GFX9-W64-NEXT:    s_cbranch_scc0 .LBB55_1
 ; GFX9-W64-NEXT:    s_endpgm
-; GFX9-W64-NEXT:  .LBB54_1:
+; GFX9-W64-NEXT:  .LBB55_1:
 ; GFX9-W64-NEXT:    s_mov_b64 exec, 0
 ; GFX9-W64-NEXT:    exp null off, off, off, off done vm
 ; GFX9-W64-NEXT:    s_endpgm
@@ -3297,9 +3380,9 @@ define amdgpu_ps void @test_for_deactivating_lanes_in_wave32(ptr addrspace(6) in
 ; GFX10-W32-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-W32-NEXT:    v_cmp_le_f32_e64 vcc_lo, s0, 0
 ; GFX10-W32-NEXT:    s_andn2_b32 s4, exec_lo, vcc_lo
-; GFX10-W32-NEXT:    s_cbranch_scc0 .LBB54_1
+; GFX10-W32-NEXT:    s_cbranch_scc0 .LBB55_1
 ; GFX10-W32-NEXT:    s_endpgm
-; GFX10-W32-NEXT:  .LBB54_1:
+; GFX10-W32-NEXT:  .LBB55_1:
 ; GFX10-W32-NEXT:    s_mov_b32 exec_lo, 0
 ; GFX10-W32-NEXT:    exp null off, off, off, off done vm
 ; GFX10-W32-NEXT:    s_endpgm


        


More information about the llvm-commits mailing list