[llvm] [AMDGPU][Scheduler] Refactor ArchVGPR rematerialization during scheduling (PR #125885)
Lucas Ramirez via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 31 09:03:54 PDT 2025
================
@@ -444,65 +444,77 @@ define amdgpu_kernel void @memcpy_known(ptr addrspace(7) inreg %src, ptr addrspa
; GISEL-GFX942-NEXT: s_mov_b32 s2, s7
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-GFX942-NEXT: s_or_b64 s[6:7], s[6:7], s[2:3]
-; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, 0x2000
-; GISEL-GFX942-NEXT: v_mov_b32_e32 v1, s16
+; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, s16
; GISEL-GFX942-NEXT: .LBB0_1: ; %load-store-loop
; GISEL-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
-; GISEL-GFX942-NEXT: v_add_u32_e32 v62, s0, v1
-; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[2:5], v62, s[8:11], 0 offen
-; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[6:9], v62, s[8:11], 0 offen offset:16
-; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v62, s[8:11], 0 offen offset:32
-; GISEL-GFX942-NEXT: v_add_u32_e32 v63, s12, v1
-; GISEL-GFX942-NEXT: v_add_u32_e32 v1, 0x100, v1
-; GISEL-GFX942-NEXT: v_cmp_lt_u32_e32 vcc, v1, v0
+; GISEL-GFX942-NEXT: v_add_u32_e32 v1, s0, v0
+; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[2:5], v1, s[8:11], 0 offen
+; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[6:9], v1, s[8:11], 0 offen offset:16
+; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v1, s[8:11], 0 offen offset:32
+; GISEL-GFX942-NEXT: v_add_u32_e32 v62, s12, v0
+; GISEL-GFX942-NEXT: v_add_u32_e32 v0, 0x100, v0
; GISEL-GFX942-NEXT: s_waitcnt vmcnt(0)
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a0, v13 ; Reload Reuse
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a1, v12 ; Reload Reuse
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a2, v11 ; Reload Reuse
; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a3, v10 ; Reload Reuse
-; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[14:17], v62, s[8:11], 0 offen offset:48
-; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[18:21], v62, s[8:11], 0 offen offset:64
-; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[22:25], v62, s[8:11], 0 offen offset:80
-; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[26:29], v62, s[8:11], 0 offen offset:96
-; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[30:33], v62, s[8:11], 0 offen offset:112
-; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[34:37], v62, s[8:11], 0 offen offset:128
-; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[38:41], v62, s[8:11], 0 offen offset:144
-; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[42:45], v62, s[8:11], 0 offen offset:160
-; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[46:49], v62, s[8:11], 0 offen offset:176
-; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[50:53], v62, s[8:11], 0 offen offset:192
-; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[54:57], v62, s[8:11], 0 offen offset:208
-; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[58:61], v62, s[8:11], 0 offen offset:224
-; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v62, s[8:11], 0 offen offset:240
+; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[14:17], v1, s[8:11], 0 offen offset:48
+; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[18:21], v1, s[8:11], 0 offen offset:64
+; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[22:25], v1, s[8:11], 0 offen offset:80
+; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[26:29], v1, s[8:11], 0 offen offset:96
+; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[30:33], v1, s[8:11], 0 offen offset:112
+; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[34:37], v1, s[8:11], 0 offen offset:128
+; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[38:41], v1, s[8:11], 0 offen offset:144
+; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[42:45], v1, s[8:11], 0 offen offset:160
+; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[46:49], v1, s[8:11], 0 offen offset:176
+; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[50:53], v1, s[8:11], 0 offen offset:192
+; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[54:57], v1, s[8:11], 0 offen offset:208
+; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[58:61], v1, s[8:11], 0 offen offset:224
+; GISEL-GFX942-NEXT: buffer_load_dwordx4 v[10:13], v1, s[8:11], 0 offen offset:240
; GISEL-GFX942-NEXT: s_nop 0
-; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v63, s[4:7], 0 offen
-; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[6:9], v63, s[4:7], 0 offen offset:16
-; GISEL-GFX942-NEXT: s_waitcnt vmcnt(2)
-; GISEL-GFX942-NEXT: v_accvgpr_write_b32 a4, v13 ; Reload Reuse
+; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v62, s[4:7], 0 offen
+; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[6:9], v62, s[4:7], 0 offen offset:16
+; GISEL-GFX942-NEXT: v_mov_b32_e32 v1, 0x2000
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v5, a0 ; Reload Reuse
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v4, a1 ; Reload Reuse
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v3, a2 ; Reload Reuse
; GISEL-GFX942-NEXT: v_accvgpr_read_b32 v2, a3 ; Reload Reuse
+; GISEL-GFX942-NEXT: v_cmp_lt_u32_e32 vcc, v0, v1
+; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[2:5], v62, s[4:7], 0 offen offset:32
+; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
+; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[14:17], v62, s[4:7], 0 offen offset:48
+; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
+; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[18:21], v62, s[4:7], 0 offen offset:64
+; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
+; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[22:25], v62, s[4:7], 0 offen offset:80
+; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
+; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[26:29], v62, s[4:7], 0 offen offset:96
+; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
+; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[30:33], v62, s[4:7], 0 offen offset:112
+; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
+; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[34:37], v62, s[4:7], 0 offen offset:128
+; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
+; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[38:41], v62, s[4:7], 0 offen offset:144
+; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
+; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[42:45], v62, s[4:7], 0 offen offset:160
+; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
+; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[46:49], v62, s[4:7], 0 offen offset:176
+; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
+; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[50:53], v62, s[4:7], 0 offen offset:192
+; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
+; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[54:57], v62, s[4:7], 0 offen offset:208
+; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
+; GISEL-GFX942-NEXT: buffer_store_dwordx4 v[58:61], v62, s[4:7], 0 offen offset:224
+; GISEL-GFX942-NEXT: s_waitcnt vmcnt(15)
----------------
lucas-rami wrote:
Looking at that unit test again, I struggle to see why all those s_waitcnt's appeared. Is this normal?
https://github.com/llvm/llvm-project/pull/125885
More information about the llvm-commits
mailing list