[llvm] [AMDGPU] add missing checks in processBaseWithConstOffset (PR #102310)
Tim Gymnich via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 7 08:07:10 PDT 2024
================
@@ -2562,4 +2562,99 @@ entry:
ret void
}
+define amdgpu_kernel void @negativeoffsetnullptr(ptr %buffer) {
+; GFX8-LABEL: negativeoffsetnullptr:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_load_dword s1, s[2:3], 0xec
+; GFX8-NEXT: s_add_u32 s0, 0, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_addc_u32 s1, s1, -1
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX8-NEXT: s_mov_b64 s[0:1], 0
+; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
+; GFX8-NEXT: .LBB8_1: ; %branch
+; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX8-NEXT: s_and_b64 s[2:3], exec, vcc
+; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
+; GFX8-NEXT: s_andn2_b64 exec, exec, s[0:1]
+; GFX8-NEXT: s_cbranch_execnz .LBB8_1
+; GFX8-NEXT: ; %bb.2: ; %end
+; GFX8-NEXT: s_endpgm
+;
+; GFX9-LABEL: negativeoffsetnullptr:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_mov_b64 s[0:1], src_private_base
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, -1, 0
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX9-NEXT: s_mov_b64 s[0:1], 0
+; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0
+; GFX9-NEXT: .LBB8_1: ; %branch
+; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-NEXT: s_and_b64 s[2:3], exec, vcc
+; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
+; GFX9-NEXT: s_andn2_b64 exec, exec, s[0:1]
+; GFX9-NEXT: s_cbranch_execnz .LBB8_1
+; GFX9-NEXT: ; %bb.2: ; %end
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: negativeoffsetnullptr:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_mov_b64 s[0:1], src_private_base
+; GFX10-NEXT: s_add_u32 s0, 0, -1
+; GFX10-NEXT: s_addc_u32 s1, s1, -1
+; GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-NEXT: s_mov_b32 s0, 0
+; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
+; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0
+; GFX10-NEXT: .LBB8_1: ; %branch
+; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-NEXT: s_and_b32 s1, exec_lo, vcc_lo
+; GFX10-NEXT: s_or_b32 s0, s1, s0
+; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
+; GFX10-NEXT: s_cbranch_execnz .LBB8_1
+; GFX10-NEXT: ; %bb.2: ; %end
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: negativeoffsetnullptr:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_mov_b64 s[0:1], src_private_base
+; GFX11-NEXT: v_add_co_u32 v0, s0, -1, 0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
+; GFX11-NEXT: s_mov_b32 s0, 0
+; GFX11-NEXT: flat_load_u8 v0, v[0:1]
+; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0
+; GFX11-NEXT: .LBB8_1: ; %branch
+; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX11-NEXT: s_and_b32 s1, exec_lo, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_or_b32 s0, s1, s0
+; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0
+; GFX11-NEXT: s_cbranch_execnz .LBB8_1
+; GFX11-NEXT: ; %bb.2: ; %end
+; GFX11-NEXT: s_endpgm
+entry:
+ %null = select i1 false, ptr %buffer, ptr addrspacecast (ptr addrspace(5) null to ptr)
----------------
tgymnich wrote:
Good point. That does definitely happen. I'll add a MIR test.
https://github.com/llvm/llvm-project/pull/102310
More information about the llvm-commits
mailing list