[llvm] [AMDGPU] ISel for @llvm.amdgcn.cs.chain intrinsic (PR #68186)

Nicolai Hähnle via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 23 10:40:56 PDT 2023


================
@@ -30,3 +28,502 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_no_stack({ptr, i3
 ; DAGISEL-GFX10-NEXT:    s_endpgm
   ret void
 }
+
+define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) {
+; GISEL-GFX11-LABEL: cs_to_chain_preserve:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2
+; GISEL-GFX11-NEXT:    s_mov_b32 s3, s0
+; GISEL-GFX11-NEXT:    ;;#ASMSTART
+; GISEL-GFX11-NEXT:    s_nop
+; GISEL-GFX11-NEXT:    ;;#ASMEND
+; GISEL-GFX11-NEXT:    s_mov_b32 s0, s3
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1
+; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
+; GISEL-GFX11-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-GFX11-NEXT:    s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; GISEL-GFX11-NEXT:    s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; GISEL-GFX11-NEXT:    s_load_b64 s[4:5], s[4:5], 0x0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
+;
+; GISEL-GFX10-LABEL: cs_to_chain_preserve:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_mov_b32 s100, SCRATCH_RSRC_DWORD0
+; GISEL-GFX10-NEXT:    s_mov_b32 s101, SCRATCH_RSRC_DWORD1
+; GISEL-GFX10-NEXT:    s_mov_b32 s102, -1
+; GISEL-GFX10-NEXT:    s_mov_b32 s103, 0x31c16000
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v3, v0
+; GISEL-GFX10-NEXT:    s_add_u32 s100, s100, s3
+; GISEL-GFX10-NEXT:    s_addc_u32 s101, s101, 0
+; GISEL-GFX10-NEXT:    s_mov_b32 s3, s0
+; GISEL-GFX10-NEXT:    ;;#ASMSTART
+; GISEL-GFX10-NEXT:    s_nop
+; GISEL-GFX10-NEXT:    ;;#ASMEND
+; GISEL-GFX10-NEXT:    s_mov_b64 s[48:49], s[100:101]
+; GISEL-GFX10-NEXT:    s_mov_b32 s0, s3
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v3
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v9, v1
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v10, v2
+; GISEL-GFX10-NEXT:    s_mov_b64 s[50:51], s[102:103]
+; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
+; GISEL-GFX10-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-GFX10-NEXT:    s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; GISEL-GFX10-NEXT:    s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
+;
+; DAGISEL-GFX11-LABEL: cs_to_chain_preserve:
+; DAGISEL-GFX11:       ; %bb.0:
+; DAGISEL-GFX11-NEXT:    s_getpc_b64 s[4:5]
+; DAGISEL-GFX11-NEXT:    s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; DAGISEL-GFX11-NEXT:    s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2
+; DAGISEL-GFX11-NEXT:    s_load_b64 s[4:5], s[4:5], 0x0
+; DAGISEL-GFX11-NEXT:    s_mov_b32 s3, s0
+; DAGISEL-GFX11-NEXT:    ;;#ASMSTART
+; DAGISEL-GFX11-NEXT:    s_nop
+; DAGISEL-GFX11-NEXT:    ;;#ASMEND
+; DAGISEL-GFX11-NEXT:    s_mov_b32 s0, s3
+; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1
+; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
+; DAGISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; DAGISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
+;
+; DAGISEL-GFX10-LABEL: cs_to_chain_preserve:
+; DAGISEL-GFX10:       ; %bb.0:
+; DAGISEL-GFX10-NEXT:    s_mov_b32 s100, SCRATCH_RSRC_DWORD0
+; DAGISEL-GFX10-NEXT:    s_mov_b32 s101, SCRATCH_RSRC_DWORD1
+; DAGISEL-GFX10-NEXT:    s_mov_b32 s102, -1
+; DAGISEL-GFX10-NEXT:    s_mov_b32 s103, 0x31c16000
+; DAGISEL-GFX10-NEXT:    s_add_u32 s100, s100, s3
+; DAGISEL-GFX10-NEXT:    s_addc_u32 s101, s101, 0
+; DAGISEL-GFX10-NEXT:    s_getpc_b64 s[4:5]
+; DAGISEL-GFX10-NEXT:    s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; DAGISEL-GFX10-NEXT:    s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v3, v0
+; DAGISEL-GFX10-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; DAGISEL-GFX10-NEXT:    s_mov_b32 s3, s0
+; DAGISEL-GFX10-NEXT:    ;;#ASMSTART
+; DAGISEL-GFX10-NEXT:    s_nop
+; DAGISEL-GFX10-NEXT:    ;;#ASMEND
+; DAGISEL-GFX10-NEXT:    s_mov_b64 s[48:49], s[100:101]
+; DAGISEL-GFX10-NEXT:    s_mov_b64 s[50:51], s[102:103]
+; DAGISEL-GFX10-NEXT:    s_mov_b32 s0, s3
+; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v3
+; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v9, v1
+; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v10, v2
+; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
+; DAGISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; DAGISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
+  call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
+  call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
+  unreachable
+}
+
+define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) {
+; GISEL-GFX11-LABEL: chain_to_chain_preserve:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
+; GISEL-GFX11-NEXT:    s_mov_b32 s3, s0
+; GISEL-GFX11-NEXT:    ;;#ASMSTART
+; GISEL-GFX11-NEXT:    s_nop
+; GISEL-GFX11-NEXT:    ;;#ASMEND
+; GISEL-GFX11-NEXT:    s_mov_b32 s0, s3
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v1
+; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
+; GISEL-GFX11-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-GFX11-NEXT:    s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; GISEL-GFX11-NEXT:    s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; GISEL-GFX11-NEXT:    s_load_b64 s[4:5], s[4:5], 0x0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
+;
+; GISEL-GFX10-LABEL: chain_to_chain_preserve:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
+; GISEL-GFX10-NEXT:    s_mov_b32 s3, s0
+; GISEL-GFX10-NEXT:    ;;#ASMSTART
+; GISEL-GFX10-NEXT:    s_nop
+; GISEL-GFX10-NEXT:    ;;#ASMEND
+; GISEL-GFX10-NEXT:    s_mov_b32 s0, s3
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
+; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
+; GISEL-GFX10-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-GFX10-NEXT:    s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; GISEL-GFX10-NEXT:    s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
+;
+; DAGISEL-GFX11-LABEL: chain_to_chain_preserve:
+; DAGISEL-GFX11:       ; %bb.0:
+; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-GFX11-NEXT:    s_getpc_b64 s[4:5]
+; DAGISEL-GFX11-NEXT:    s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; DAGISEL-GFX11-NEXT:    s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
+; DAGISEL-GFX11-NEXT:    s_load_b64 s[4:5], s[4:5], 0x0
+; DAGISEL-GFX11-NEXT:    s_mov_b32 s3, s0
+; DAGISEL-GFX11-NEXT:    ;;#ASMSTART
+; DAGISEL-GFX11-NEXT:    s_nop
+; DAGISEL-GFX11-NEXT:    ;;#ASMEND
+; DAGISEL-GFX11-NEXT:    s_mov_b32 s0, s3
+; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v1
+; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
+; DAGISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; DAGISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
+;
+; DAGISEL-GFX10-LABEL: chain_to_chain_preserve:
+; DAGISEL-GFX10:       ; %bb.0:
+; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-GFX10-NEXT:    s_getpc_b64 s[4:5]
+; DAGISEL-GFX10-NEXT:    s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; DAGISEL-GFX10-NEXT:    s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
+; DAGISEL-GFX10-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; DAGISEL-GFX10-NEXT:    s_mov_b32 s3, s0
+; DAGISEL-GFX10-NEXT:    ;;#ASMSTART
+; DAGISEL-GFX10-NEXT:    s_nop
+; DAGISEL-GFX10-NEXT:    ;;#ASMEND
+; DAGISEL-GFX10-NEXT:    s_mov_b32 s0, s3
+; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
+; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
+; DAGISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; DAGISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
+  call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
+  call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
+  unreachable
+}
+
+; FIXME: Preserve things (i.e. v16)!
+; FIXME: Setup s32.
+
+define amdgpu_cs_chain_preserve void @chain_preserve_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) {
+; GISEL-GFX11-LABEL: chain_preserve_to_chain_preserve:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
+; GISEL-GFX11-NEXT:    s_mov_b32 s3, s0
+; GISEL-GFX11-NEXT:    ;;#ASMSTART
+; GISEL-GFX11-NEXT:    s_nop
+; GISEL-GFX11-NEXT:    ;;#ASMEND
+; GISEL-GFX11-NEXT:    s_mov_b32 s0, s3
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v1
+; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
+; GISEL-GFX11-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-GFX11-NEXT:    s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; GISEL-GFX11-NEXT:    s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; GISEL-GFX11-NEXT:    s_load_b64 s[4:5], s[4:5], 0x0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
+;
+; GISEL-GFX10-LABEL: chain_preserve_to_chain_preserve:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
+; GISEL-GFX10-NEXT:    s_mov_b32 s3, s0
+; GISEL-GFX10-NEXT:    ;;#ASMSTART
+; GISEL-GFX10-NEXT:    s_nop
+; GISEL-GFX10-NEXT:    ;;#ASMEND
+; GISEL-GFX10-NEXT:    s_mov_b32 s0, s3
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
+; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
+; GISEL-GFX10-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-GFX10-NEXT:    s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; GISEL-GFX10-NEXT:    s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
+;
+; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_preserve:
+; DAGISEL-GFX11:       ; %bb.0:
+; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-GFX11-NEXT:    s_getpc_b64 s[4:5]
+; DAGISEL-GFX11-NEXT:    s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; DAGISEL-GFX11-NEXT:    s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
+; DAGISEL-GFX11-NEXT:    s_load_b64 s[4:5], s[4:5], 0x0
+; DAGISEL-GFX11-NEXT:    s_mov_b32 s3, s0
+; DAGISEL-GFX11-NEXT:    ;;#ASMSTART
+; DAGISEL-GFX11-NEXT:    s_nop
+; DAGISEL-GFX11-NEXT:    ;;#ASMEND
+; DAGISEL-GFX11-NEXT:    s_mov_b32 s0, s3
+; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v1
+; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
+; DAGISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; DAGISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
+;
+; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_preserve:
+; DAGISEL-GFX10:       ; %bb.0:
+; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-GFX10-NEXT:    s_getpc_b64 s[4:5]
+; DAGISEL-GFX10-NEXT:    s_add_u32 s4, s4, chain_preserve_callee at gotpcrel32@lo+4
+; DAGISEL-GFX10-NEXT:    s_addc_u32 s5, s5, chain_preserve_callee at gotpcrel32@hi+12
+; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
+; DAGISEL-GFX10-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; DAGISEL-GFX10-NEXT:    s_mov_b32 s3, s0
+; DAGISEL-GFX10-NEXT:    ;;#ASMSTART
+; DAGISEL-GFX10-NEXT:    s_nop
+; DAGISEL-GFX10-NEXT:    ;;#ASMEND
+; DAGISEL-GFX10-NEXT:    s_mov_b32 s0, s3
+; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
+; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
+; DAGISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; DAGISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
+  call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
+  call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
+  unreachable
+}
+
+define amdgpu_cs_chain_preserve void @chain_preserve_to_chain(<3 x i32> inreg %a, <3 x i32> %b) {
+; GISEL-GFX11-LABEL: chain_preserve_to_chain:
+; GISEL-GFX11:       ; %bb.0:
+; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
+; GISEL-GFX11-NEXT:    s_mov_b32 s3, s0
+; GISEL-GFX11-NEXT:    ;;#ASMSTART
+; GISEL-GFX11-NEXT:    s_nop
+; GISEL-GFX11-NEXT:    ;;#ASMEND
+; GISEL-GFX11-NEXT:    s_mov_b32 s0, s3
+; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v1
+; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
+; GISEL-GFX11-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-GFX11-NEXT:    s_add_u32 s4, s4, chain_callee at gotpcrel32@lo+4
+; GISEL-GFX11-NEXT:    s_addc_u32 s5, s5, chain_callee at gotpcrel32@hi+12
+; GISEL-GFX11-NEXT:    s_load_b64 s[4:5], s[4:5], 0x0
+; GISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
+;
+; GISEL-GFX10-LABEL: chain_preserve_to_chain:
+; GISEL-GFX10:       ; %bb.0:
+; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
+; GISEL-GFX10-NEXT:    s_mov_b32 s3, s0
+; GISEL-GFX10-NEXT:    ;;#ASMSTART
+; GISEL-GFX10-NEXT:    s_nop
+; GISEL-GFX10-NEXT:    ;;#ASMEND
+; GISEL-GFX10-NEXT:    s_mov_b32 s0, s3
+; GISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
+; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
+; GISEL-GFX10-NEXT:    s_getpc_b64 s[4:5]
+; GISEL-GFX10-NEXT:    s_add_u32 s4, s4, chain_callee at gotpcrel32@lo+4
+; GISEL-GFX10-NEXT:    s_addc_u32 s5, s5, chain_callee at gotpcrel32@hi+12
+; GISEL-GFX10-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
+;
+; DAGISEL-GFX11-LABEL: chain_preserve_to_chain:
+; DAGISEL-GFX11:       ; %bb.0:
+; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-GFX11-NEXT:    s_getpc_b64 s[4:5]
+; DAGISEL-GFX11-NEXT:    s_add_u32 s4, s4, chain_callee at gotpcrel32@lo+4
+; DAGISEL-GFX11-NEXT:    s_addc_u32 s5, s5, chain_callee at gotpcrel32@hi+12
+; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
+; DAGISEL-GFX11-NEXT:    s_load_b64 s[4:5], s[4:5], 0x0
+; DAGISEL-GFX11-NEXT:    s_mov_b32 s3, s0
+; DAGISEL-GFX11-NEXT:    ;;#ASMSTART
+; DAGISEL-GFX11-NEXT:    s_nop
+; DAGISEL-GFX11-NEXT:    ;;#ASMEND
+; DAGISEL-GFX11-NEXT:    s_mov_b32 s0, s3
+; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v1
+; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
+; DAGISEL-GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; DAGISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
+;
+; DAGISEL-GFX10-LABEL: chain_preserve_to_chain:
+; DAGISEL-GFX10:       ; %bb.0:
+; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-GFX10-NEXT:    s_getpc_b64 s[4:5]
+; DAGISEL-GFX10-NEXT:    s_add_u32 s4, s4, chain_callee at gotpcrel32@lo+4
+; DAGISEL-GFX10-NEXT:    s_addc_u32 s5, s5, chain_callee at gotpcrel32@hi+12
+; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
+; DAGISEL-GFX10-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; DAGISEL-GFX10-NEXT:    s_mov_b32 s3, s0
+; DAGISEL-GFX10-NEXT:    ;;#ASMSTART
+; DAGISEL-GFX10-NEXT:    s_nop
+; DAGISEL-GFX10-NEXT:    ;;#ASMEND
+; DAGISEL-GFX10-NEXT:    s_mov_b32 s0, s3
+; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
+; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
+; DAGISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; DAGISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
+  call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
----------------
nhaehnle wrote:

Same here.

https://github.com/llvm/llvm-project/pull/68186


More information about the llvm-commits mailing list