[llvm] [AMDGPU] ISel for @llvm.amdgcn.cs.chain intrinsic (PR #68186)
Nicolai Hähnle via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 23 10:40:56 PDT 2023
================
@@ -30,3 +28,502 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_no_stack({ptr, i3
; DAGISEL-GFX10-NEXT: s_endpgm
ret void
}
+
+define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) {
+; GISEL-GFX11-LABEL: cs_to_chain_preserve:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2
+; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
+; GISEL-GFX11-NEXT: ;;#ASMSTART
+; GISEL-GFX11-NEXT: s_nop
+; GISEL-GFX11-NEXT: ;;#ASMEND
+; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1
+; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
+; GISEL-GFX11-NEXT: s_getpc_b64 s[4:5]
+; GISEL-GFX11-NEXT: s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; GISEL-GFX11-NEXT: s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; GISEL-GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
+;
+; GISEL-GFX10-LABEL: cs_to_chain_preserve:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_mov_b32 s100, SCRATCH_RSRC_DWORD0
+; GISEL-GFX10-NEXT: s_mov_b32 s101, SCRATCH_RSRC_DWORD1
+; GISEL-GFX10-NEXT: s_mov_b32 s102, -1
+; GISEL-GFX10-NEXT: s_mov_b32 s103, 0x31c16000
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0
+; GISEL-GFX10-NEXT: s_add_u32 s100, s100, s3
+; GISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0
+; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
+; GISEL-GFX10-NEXT: ;;#ASMSTART
+; GISEL-GFX10-NEXT: s_nop
+; GISEL-GFX10-NEXT: ;;#ASMEND
+; GISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101]
+; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2
+; GISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103]
+; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
+; GISEL-GFX10-NEXT: s_getpc_b64 s[4:5]
+; GISEL-GFX10-NEXT: s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; GISEL-GFX10-NEXT: s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
+;
+; DAGISEL-GFX11-LABEL: cs_to_chain_preserve:
+; DAGISEL-GFX11: ; %bb.0:
+; DAGISEL-GFX11-NEXT: s_getpc_b64 s[4:5]
+; DAGISEL-GFX11-NEXT: s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; DAGISEL-GFX11-NEXT: s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2
+; DAGISEL-GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
+; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
+; DAGISEL-GFX11-NEXT: ;;#ASMSTART
+; DAGISEL-GFX11-NEXT: s_nop
+; DAGISEL-GFX11-NEXT: ;;#ASMEND
+; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
+; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1
+; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
+; DAGISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
+;
+; DAGISEL-GFX10-LABEL: cs_to_chain_preserve:
+; DAGISEL-GFX10: ; %bb.0:
+; DAGISEL-GFX10-NEXT: s_mov_b32 s100, SCRATCH_RSRC_DWORD0
+; DAGISEL-GFX10-NEXT: s_mov_b32 s101, SCRATCH_RSRC_DWORD1
+; DAGISEL-GFX10-NEXT: s_mov_b32 s102, -1
+; DAGISEL-GFX10-NEXT: s_mov_b32 s103, 0x31c16000
+; DAGISEL-GFX10-NEXT: s_add_u32 s100, s100, s3
+; DAGISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0
+; DAGISEL-GFX10-NEXT: s_getpc_b64 s[4:5]
+; DAGISEL-GFX10-NEXT: s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; DAGISEL-GFX10-NEXT: s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0
+; DAGISEL-GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
+; DAGISEL-GFX10-NEXT: ;;#ASMSTART
+; DAGISEL-GFX10-NEXT: s_nop
+; DAGISEL-GFX10-NEXT: ;;#ASMEND
+; DAGISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101]
+; DAGISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103]
+; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
+; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3
+; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1
+; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2
+; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
+; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
+ call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
+ call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
+ unreachable
+}
+
+define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) {
+; GISEL-GFX11-LABEL: chain_to_chain_preserve:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
+; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
+; GISEL-GFX11-NEXT: ;;#ASMSTART
+; GISEL-GFX11-NEXT: s_nop
+; GISEL-GFX11-NEXT: ;;#ASMEND
+; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
+; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
+; GISEL-GFX11-NEXT: s_getpc_b64 s[4:5]
+; GISEL-GFX11-NEXT: s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; GISEL-GFX11-NEXT: s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; GISEL-GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
+;
+; GISEL-GFX10-LABEL: chain_to_chain_preserve:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
+; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
+; GISEL-GFX10-NEXT: ;;#ASMSTART
+; GISEL-GFX10-NEXT: s_nop
+; GISEL-GFX10-NEXT: ;;#ASMEND
+; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
+; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
+; GISEL-GFX10-NEXT: s_getpc_b64 s[4:5]
+; GISEL-GFX10-NEXT: s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; GISEL-GFX10-NEXT: s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
+;
+; DAGISEL-GFX11-LABEL: chain_to_chain_preserve:
+; DAGISEL-GFX11: ; %bb.0:
+; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-GFX11-NEXT: s_getpc_b64 s[4:5]
+; DAGISEL-GFX11-NEXT: s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; DAGISEL-GFX11-NEXT: s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
+; DAGISEL-GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
+; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
+; DAGISEL-GFX11-NEXT: ;;#ASMSTART
+; DAGISEL-GFX11-NEXT: s_nop
+; DAGISEL-GFX11-NEXT: ;;#ASMEND
+; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
+; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
+; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
+; DAGISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
+;
+; DAGISEL-GFX10-LABEL: chain_to_chain_preserve:
+; DAGISEL-GFX10: ; %bb.0:
+; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-GFX10-NEXT: s_getpc_b64 s[4:5]
+; DAGISEL-GFX10-NEXT: s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; DAGISEL-GFX10-NEXT: s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
+; DAGISEL-GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
+; DAGISEL-GFX10-NEXT: ;;#ASMSTART
+; DAGISEL-GFX10-NEXT: s_nop
+; DAGISEL-GFX10-NEXT: ;;#ASMEND
+; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
+; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
+; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
+; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
+ call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
+ call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
+ unreachable
+}
+
+; FIXME: Preserve things (i.e. v16)!
+; FIXME: Setup s32.
+
+define amdgpu_cs_chain_preserve void @chain_preserve_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) {
+; GISEL-GFX11-LABEL: chain_preserve_to_chain_preserve:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
+; GISEL-GFX11-NEXT: s_mov_b32 s3, s0
+; GISEL-GFX11-NEXT: ;;#ASMSTART
+; GISEL-GFX11-NEXT: s_nop
+; GISEL-GFX11-NEXT: ;;#ASMEND
+; GISEL-GFX11-NEXT: s_mov_b32 s0, s3
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
+; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
+; GISEL-GFX11-NEXT: s_getpc_b64 s[4:5]
+; GISEL-GFX11-NEXT: s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; GISEL-GFX11-NEXT: s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; GISEL-GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
+; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
+;
+; GISEL-GFX10-LABEL: chain_preserve_to_chain_preserve:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
+; GISEL-GFX10-NEXT: s_mov_b32 s3, s0
+; GISEL-GFX10-NEXT: ;;#ASMSTART
+; GISEL-GFX10-NEXT: s_nop
+; GISEL-GFX10-NEXT: ;;#ASMEND
+; GISEL-GFX10-NEXT: s_mov_b32 s0, s3
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
+; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
+; GISEL-GFX10-NEXT: s_getpc_b64 s[4:5]
+; GISEL-GFX10-NEXT: s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; GISEL-GFX10-NEXT: s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; GISEL-GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
+;
+; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_preserve:
+; DAGISEL-GFX11: ; %bb.0:
+; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-GFX11-NEXT: s_getpc_b64 s[4:5]
+; DAGISEL-GFX11-NEXT: s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; DAGISEL-GFX11-NEXT: s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8
+; DAGISEL-GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
+; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0
+; DAGISEL-GFX11-NEXT: ;;#ASMSTART
+; DAGISEL-GFX11-NEXT: s_nop
+; DAGISEL-GFX11-NEXT: ;;#ASMEND
+; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3
+; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1
+; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
+; DAGISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
+;
+; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_preserve:
+; DAGISEL-GFX10: ; %bb.0:
+; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-GFX10-NEXT: s_getpc_b64 s[4:5]
+; DAGISEL-GFX10-NEXT: s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; DAGISEL-GFX10-NEXT: s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8
+; DAGISEL-GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0
+; DAGISEL-GFX10-NEXT: ;;#ASMSTART
+; DAGISEL-GFX10-NEXT: s_nop
+; DAGISEL-GFX10-NEXT: ;;#ASMEND
+; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3
+; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1
+; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
+; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
+ call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
----------------
nhaehnle wrote:
Looks like this case isn't actually handled correctly? v16 is clobbered here, but I don't see it being preserved.
https://github.com/llvm/llvm-project/pull/68186
More information about the llvm-commits
mailing list