[llvm] [AMDGPU] Enable GCNRewritePartialRegUses pass by default. (PR #72975)

via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 21 03:19:28 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-globalisel

Author: Valery Pykhtin (vpykhtin)

<details>
<summary>Changes</summary>

Let's try once again after https://github.com/llvm/llvm-project/pull/69957 has landed.

Most of the tests has changed their register numbers, but _GlobalISel/extractelement.i128.ll_ got additional _s_set_gpr_idx_on/s_set_gpr_idx_off_ in _extractelement_vgpr_v4i128_sgpr_idx_ function.

I had to disable the pass for some tests involving subregisters, I'm not sure if this also should be done for _spill-vgpr.ll_ as it has lost a lot of spills after this patch, maybe @<!-- -->rampitec knows.

---

Patch is 558.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/72975.diff


30 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll (+346-406) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll (+311-310) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll (+198-322) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll (+144-144) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll (+77-80) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll (+446-445) 
- (modified) llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll (+39-39) 
- (modified) llvm/test/CodeGen/AMDGPU/coalesce-identity-copies-undef-subregs.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/dead-lane.mir (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll (+32-32) 
- (modified) llvm/test/CodeGen/AMDGPU/idiv-licm.ll (+229-228) 
- (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline.ll (+4) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.mulo.ll (+24-24) 
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i1.ll (+531-531) 
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i32.ll (+17-18) 
- (modified) llvm/test/CodeGen/AMDGPU/load-constant-i8.ll (+69-69) 
- (modified) llvm/test/CodeGen/AMDGPU/load-global-i16.ll (+302-303) 
- (modified) llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll (+19-19) 
- (modified) llvm/test/CodeGen/AMDGPU/mad_64_32.ll (+26-28) 
- (modified) llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll (+7-7) 
- (modified) llvm/test/CodeGen/AMDGPU/mul.ll (+46-49) 
- (modified) llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll (+218-219) 
- (modified) llvm/test/CodeGen/AMDGPU/sdiv64.ll (+275-275) 
- (modified) llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/spill-vgpr.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/srem64.ll (+228-228) 
- (modified) llvm/test/CodeGen/AMDGPU/udiv64.ll (+137-137) 
- (modified) llvm/test/CodeGen/AMDGPU/urem64.ll (+79-79) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 951ed9420594b19..ef8f92a8272fe52 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -346,7 +346,7 @@ static cl::opt<bool> EnableMaxIlpSchedStrategy(
 
 static cl::opt<bool> EnableRewritePartialRegUses(
     "amdgpu-enable-rewrite-partial-reg-uses",
-    cl::desc("Enable rewrite partial reg uses pass"), cl::init(false),
+    cl::desc("Enable rewrite partial reg uses pass"), cl::init(true),
     cl::Hidden);
 
 static cl::opt<bool> EnableHipStdPar(
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
index 33a4d3c5494f7c9..d6d7266bbe2afc8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
@@ -11,14 +11,13 @@ define i32 @v_extract_v64i32_varidx(ptr addrspace(1) %ptr, i32 %idx) {
 ; GCN-NEXT:    s_mov_b32 s4, s33
 ; GCN-NEXT:    s_add_i32 s33, s32, 0x3fc0
 ; GCN-NEXT:    s_and_b32 s33, s33, 0xffffc000
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v47, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v56, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v57, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v58, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
@@ -27,133 +26,115 @@ define i32 @v_extract_v64i32_varidx(ptr addrspace(1) %ptr, i32 %idx) {
 ; GCN-NEXT:    buffer_store_dword v61, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v62, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
 ; GCN-NEXT:    buffer_store_dword v63, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    v_mov_b32_e32 v6, v2
-; GCN-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off
-; GCN-NEXT:    global_load_dwordx4 v[16:19], v[0:1], off offset:16
-; GCN-NEXT:    global_load_dwordx4 v[56:59], v[0:1], off offset:32
-; GCN-NEXT:    global_load_dwordx4 v[48:51], v[0:1], off offset:48
-; GCN-NEXT:    global_load_dwordx4 v[20:23], v[0:1], off offset:64
-; GCN-NEXT:    global_load_dwordx4 v[44:47], v[0:1], off offset:80
-; GCN-NEXT:    global_load_dwordx4 v[40:43], v[0:1], off offset:96
-; GCN-NEXT:    global_load_dwordx4 v[60:63], v[0:1], off offset:112
-; GCN-NEXT:    global_load_dwordx4 v[36:39], v[0:1], off offset:128
-; GCN-NEXT:    global_load_dwordx4 v[32:35], v[0:1], off offset:144
-; GCN-NEXT:    global_load_dwordx4 v[28:31], v[0:1], off offset:160
-; GCN-NEXT:    global_load_dwordx4 v[52:55], v[0:1], off offset:176
-; GCN-NEXT:    global_load_dwordx4 v[24:27], v[0:1], off offset:192
-; GCN-NEXT:    global_load_dwordx4 v[7:10], v[0:1], off offset:208
+; GCN-NEXT:    global_load_dwordx4 v[3:6], v[0:1], off
+; GCN-NEXT:    global_load_dwordx4 v[7:10], v[0:1], off offset:16
+; GCN-NEXT:    global_load_dwordx4 v[11:14], v[0:1], off offset:32
+; GCN-NEXT:    global_load_dwordx4 v[15:18], v[0:1], off offset:48
+; GCN-NEXT:    global_load_dwordx4 v[19:22], v[0:1], off offset:64
+; GCN-NEXT:    global_load_dwordx4 v[23:26], v[0:1], off offset:80
+; GCN-NEXT:    global_load_dwordx4 v[27:30], v[0:1], off offset:96
+; GCN-NEXT:    global_load_dwordx4 v[31:34], v[0:1], off offset:112
+; GCN-NEXT:    global_load_dwordx4 v[35:38], v[0:1], off offset:128
+; GCN-NEXT:    global_load_dwordx4 v[48:51], v[0:1], off offset:144
+; GCN-NEXT:    global_load_dwordx4 v[52:55], v[0:1], off offset:160
+; GCN-NEXT:    global_load_dwordx4 v[39:42], v[0:1], off offset:176
+; GCN-NEXT:    global_load_dwordx4 v[43:46], v[0:1], off offset:192
+; GCN-NEXT:    global_load_dwordx4 v[56:59], v[0:1], off offset:208
+; GCN-NEXT:    global_load_dwordx4 v[60:63], v[0:1], off offset:224
 ; GCN-NEXT:    s_add_i32 s32, s32, 0x10000
 ; GCN-NEXT:    s_add_i32 s32, s32, 0xffff0000
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v60, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    buffer_store_dword v4, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v5, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v6, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v7, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v8, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v10, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v11, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v12, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v13, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v14, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v15, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v16, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v17, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill
-; GCN-NEXT:    buffer_store_dword v18, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill
-; GCN-NEXT:    global_load_dwordx4 v[8:11], v[0:1], off offset:224
-; GCN-NEXT:    global_load_dwordx4 v[12:15], v[0:1], off offset:240
+; GCN-NEXT:    buffer_store_dword v61, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v62, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v63, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill
+; GCN-NEXT:    global_load_dwordx4 v[60:63], v[0:1], off offset:240
+; GCN-NEXT:    v_and_b32_e32 v0, 63, v2
 ; GCN-NEXT:    v_lshrrev_b32_e64 v1, 6, s33
-; GCN-NEXT:    v_add_u32_e32 v1, 0x100, v1
-; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:256
-; GCN-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:260
-; GCN-NEXT:    buffer_store_dword v4, off, s[0:3], s33 offset:264
-; GCN-NEXT:    buffer_store_dword v5, off, s[0:3], s33 offset:268
-; GCN-NEXT:    buffer_store_dword v16, off, s[0:3], s33 offset:272
-; GCN-NEXT:    buffer_store_dword v17, off, s[0:3], s33 offset:276
-; GCN-NEXT:    buffer_store_dword v18, off, s[0:3], s33 offset:280
-; GCN-NEXT:    buffer_store_dword v19, off, s[0:3], s33 offset:284
-; GCN-NEXT:    buffer_store_dword v56, off, s[0:3], s33 offset:288
-; GCN-NEXT:    buffer_store_dword v57, off, s[0:3], s33 offset:292
-; GCN-NEXT:    buffer_store_dword v58, off, s[0:3], s33 offset:296
-; GCN-NEXT:    buffer_store_dword v59, off, s[0:3], s33 offset:300
-; GCN-NEXT:    buffer_store_dword v48, off, s[0:3], s33 offset:304
-; GCN-NEXT:    buffer_store_dword v49, off, s[0:3], s33 offset:308
-; GCN-NEXT:    buffer_store_dword v50, off, s[0:3], s33 offset:312
-; GCN-NEXT:    buffer_store_dword v51, off, s[0:3], s33 offset:316
-; GCN-NEXT:    buffer_store_dword v20, off, s[0:3], s33 offset:320
-; GCN-NEXT:    buffer_store_dword v21, off, s[0:3], s33 offset:324
-; GCN-NEXT:    buffer_store_dword v22, off, s[0:3], s33 offset:328
-; GCN-NEXT:    buffer_store_dword v23, off, s[0:3], s33 offset:332
-; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:336
-; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s33 offset:340
-; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s33 offset:344
-; GCN-NEXT:    buffer_store_dword v47, off, s[0:3], s33 offset:348
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:352
-; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:356
-; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:360
-; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:364
-; GCN-NEXT:    buffer_store_dword v60, off, s[0:3], s33 offset:368
-; GCN-NEXT:    buffer_store_dword v61, off, s[0:3], s33 offset:372
-; GCN-NEXT:    buffer_store_dword v62, off, s[0:3], s33 offset:376
-; GCN-NEXT:    buffer_store_dword v63, off, s[0:3], s33 offset:380
-; GCN-NEXT:    buffer_store_dword v36, off, s[0:3], s33 offset:384
-; GCN-NEXT:    buffer_store_dword v37, off, s[0:3], s33 offset:388
-; GCN-NEXT:    buffer_store_dword v38, off, s[0:3], s33 offset:392
-; GCN-NEXT:    buffer_store_dword v39, off, s[0:3], s33 offset:396
-; GCN-NEXT:    buffer_store_dword v32, off, s[0:3], s33 offset:400
-; GCN-NEXT:    buffer_store_dword v33, off, s[0:3], s33 offset:404
-; GCN-NEXT:    buffer_store_dword v34, off, s[0:3], s33 offset:408
-; GCN-NEXT:    buffer_store_dword v35, off, s[0:3], s33 offset:412
-; GCN-NEXT:    buffer_store_dword v28, off, s[0:3], s33 offset:416
-; GCN-NEXT:    buffer_store_dword v29, off, s[0:3], s33 offset:420
-; GCN-NEXT:    buffer_store_dword v30, off, s[0:3], s33 offset:424
-; GCN-NEXT:    buffer_store_dword v31, off, s[0:3], s33 offset:428
-; GCN-NEXT:    buffer_store_dword v52, off, s[0:3], s33 offset:432
-; GCN-NEXT:    buffer_store_dword v53, off, s[0:3], s33 offset:436
-; GCN-NEXT:    buffer_store_dword v54, off, s[0:3], s33 offset:440
-; GCN-NEXT:    buffer_store_dword v55, off, s[0:3], s33 offset:444
-; GCN-NEXT:    buffer_store_dword v24, off, s[0:3], s33 offset:448
-; GCN-NEXT:    buffer_store_dword v25, off, s[0:3], s33 offset:452
-; GCN-NEXT:    buffer_store_dword v26, off, s[0:3], s33 offset:456
-; GCN-NEXT:    buffer_store_dword v27, off, s[0:3], s33 offset:460
-; GCN-NEXT:    buffer_load_dword v16, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v17, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v18, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v19, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v20, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v21, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v22, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v23, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v24, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v25, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v26, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v27, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v28, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v29, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v30, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v31, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload
-; GCN-NEXT:    v_and_b32_e32 v0, 63, v6
 ; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT:    v_add_u32_e32 v1, 0x100, v1
 ; GCN-NEXT:    v_add_u32_e32 v0, v1, v0
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v16, v20
-; GCN-NEXT:    v_mov_b32_e32 v17, v21
-; GCN-NEXT:    v_mov_b32_e32 v18, v22
-; GCN-NEXT:    v_mov_b32_e32 v19, v23
-; GCN-NEXT:    buffer_store_dword v16, off, s[0:3], s33 offset:464
-; GCN-NEXT:    buffer_store_dword v17, off, s[0:3], s33 offset:468
-; GCN-NEXT:    buffer_store_dword v18, off, s[0:3], s33 offset:472
-; GCN-NEXT:    buffer_store_dword v19, off, s[0:3], s33 offset:476
-; GCN-NEXT:    buffer_store_dword v8, off, s[0:3], s33 offset:480
-; GCN-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:484
-; GCN-NEXT:    buffer_store_dword v10, off, s[0:3], s33 offset:488
-; GCN-NEXT:    buffer_store_dword v11, off, s[0:3], s33 offset:492
-; GCN-NEXT:    buffer_store_dword v12, off, s[0:3], s33 offset:496
-; GCN-NEXT:    buffer_store_dword v13, off, s[0:3], s33 offset:500
-; GCN-NEXT:    buffer_store_dword v14, off, s[0:3], s33 offset:504
-; GCN-NEXT:    buffer_store_dword v15, off, s[0:3], s33 offset:508
+; GCN-NEXT:    buffer_store_dword v60, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    buffer_store_dword v61, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v62, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v63, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill
+; GCN-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:256
+; GCN-NEXT:    buffer_store_dword v4, off, s[0:3], s33 offset:260
+; GCN-NEXT:    buffer_store_dword v5, off, s[0:3], s33 offset:264
+; GCN-NEXT:    buffer_store_dword v6, off, s[0:3], s33 offset:268
+; GCN-NEXT:    buffer_store_dword v7, off, s[0:3], s33 offset:272
+; GCN-NEXT:    buffer_store_dword v8, off, s[0:3], s33 offset:276
+; GCN-NEXT:    buffer_store_dword v9, off, s[0:3], s33 offset:280
+; GCN-NEXT:    buffer_store_dword v10, off, s[0:3], s33 offset:284
+; GCN-NEXT:    buffer_store_dword v11, off, s[0:3], s33 offset:288
+; GCN-NEXT:    buffer_store_dword v12, off, s[0:3], s33 offset:292
+; GCN-NEXT:    buffer_store_dword v13, off, s[0:3], s33 offset:296
+; GCN-NEXT:    buffer_store_dword v14, off, s[0:3], s33 offset:300
+; GCN-NEXT:    buffer_store_dword v15, off, s[0:3], s33 offset:304
+; GCN-NEXT:    buffer_store_dword v16, off, s[0:3], s33 offset:308
+; GCN-NEXT:    buffer_store_dword v17, off, s[0:3], s33 offset:312
+; GCN-NEXT:    buffer_store_dword v18, off, s[0:3], s33 offset:316
+; GCN-NEXT:    buffer_store_dword v19, off, s[0:3], s33 offset:320
+; GCN-NEXT:    buffer_store_dword v20, off, s[0:3], s33 offset:324
+; GCN-NEXT:    buffer_store_dword v21, off, s[0:3], s33 offset:328
+; GCN-NEXT:    buffer_store_dword v22, off, s[0:3], s33 offset:332
+; GCN-NEXT:    buffer_store_dword v23, off, s[0:3], s33 offset:336
+; GCN-NEXT:    buffer_store_dword v24, off, s[0:3], s33 offset:340
+; GCN-NEXT:    buffer_store_dword v25, off, s[0:3], s33 offset:344
+; GCN-NEXT:    buffer_store_dword v26, off, s[0:3], s33 offset:348
+; GCN-NEXT:    buffer_store_dword v27, off, s[0:3], s33 offset:352
+; GCN-NEXT:    buffer_store_dword v28, off, s[0:3], s33 offset:356
+; GCN-NEXT:    buffer_store_dword v29, off, s[0:3], s33 offset:360
+; GCN-NEXT:    buffer_store_dword v30, off, s[0:3], s33 offset:364
+; GCN-NEXT:    buffer_store_dword v31, off, s[0:3], s33 offset:368
+; GCN-NEXT:    buffer_store_dword v32, off, s[0:3], s33 offset:372
+; GCN-NEXT:    buffer_store_dword v33, off, s[0:3], s33 offset:376
+; GCN-NEXT:    buffer_store_dword v34, off, s[0:3], s33 offset:380
+; GCN-NEXT:    buffer_store_dword v35, off, s[0:3], s33 offset:384
+; GCN-NEXT:    buffer_store_dword v36, off, s[0:3], s33 offset:388
+; GCN-NEXT:    buffer_store_dword v37, off, s[0:3], s33 offset:392
+; GCN-NEXT:    buffer_store_dword v38, off, s[0:3], s33 offset:396
+; GCN-NEXT:    buffer_store_dword v48, off, s[0:3], s33 offset:400
+; GCN-NEXT:    buffer_store_dword v49, off, s[0:3], s33 offset:404
+; GCN-NEXT:    buffer_store_dword v50, off, s[0:3], s33 offset:408
+; GCN-NEXT:    buffer_store_dword v51, off, s[0:3], s33 offset:412
+; GCN-NEXT:    buffer_store_dword v52, off, s[0:3], s33 offset:416
+; GCN-NEXT:    buffer_store_dword v53, off, s[0:3], s33 offset:420
+; GCN-NEXT:    buffer_store_dword v54, off, s[0:3], s33 offset:424
+; GCN-NEXT:    buffer_store_dword v55, off, s[0:3], s33 offset:428
+; GCN-NEXT:    buffer_store_dword v39, off, s[0:3], s33 offset:432
+; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:436
+; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 offset:440
+; GCN-NEXT:    buffer_store_dword v42, off, s[0:3], s33 offset:444
+; GCN-NEXT:    buffer_store_dword v43, off, s[0:3], s33 offset:448
+; GCN-NEXT:    buffer_store_dword v44, off, s[0:3], s33 offset:452
+; GCN-NEXT:    buffer_store_dword v45, off, s[0:3], s33 offset:456
+; GCN-NEXT:    buffer_store_dword v46, off, s[0:3], s33 offset:460
+; GCN-NEXT:    buffer_store_dword v56, off, s[0:3], s33 offset:464
+; GCN-NEXT:    buffer_store_dword v57, off, s[0:3], s33 offset:468
+; GCN-NEXT:    buffer_store_dword v58, off, s[0:3], s33 offset:472
+; GCN-NEXT:    buffer_store_dword v59, off, s[0:3], s33 offset:476
+; GCN-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v5, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v6, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:480
+; GCN-NEXT:    buffer_store_dword v4, off, s[0:3], s33 offset:484
+; GCN-NEXT:    buffer_store_dword v5, off, s[0:3], s33 offset:488
+; GCN-NEXT:    buffer_store_dword v6, off, s[0:3], s33 offset:492
+; GCN-NEXT:    buffer_load_dword v3, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v4, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v5, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v6, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    buffer_store_dword v3, off, s[0:3], s33 offset:496
+; GCN-NEXT:    buffer_store_dword v4, off, s[0:3], s33 offset:500
+; GCN-NEXT:    buffer_store_dword v5, off, s[0:3], s33 offset:504
+; GCN-NEXT:    buffer_store_dword v6, off, s[0:3], s33 offset:508
 ; GCN-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
 ; GCN-NEXT:    buffer_load_dword v63, off, s[0:3], s33 ; 4-byte Folded Reload
 ; GCN-NEXT:    buffer_load_dword v62, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
@@ -163,14 +144,13 @@ define i32 @v_extract_v64i32_varidx(ptr addrspace(1) %ptr, i32 %idx) {
 ; GCN-NEXT:    buffer_load_dword v58, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
 ; GCN-NEXT:    buffer_load_dword v57, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
 ; GCN-NEXT:    buffer_load_dword v56, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v47, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v46, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v45, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v44, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v43, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v42, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
+; GCN-NEXT:    buffer_load_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
+; GCN-NEXT:...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/72975


More information about the llvm-commits mailing list