[llvm] [AMDGPU] New test for untested line in AMDGPUMarkLastScratchLoad (PR #185430)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 9 07:34:37 PDT 2026
https://github.com/ambergorzynski created https://github.com/llvm/llvm-project/pull/185430
[This line](https://github.com/llvm/llvm-project/blob/af15474262100ade9a8fcfd05f9e05c7ba23ff8c/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp#L121) in the AMDGPU backend is uncovered by the existing test suite (checked using coverage, and by asserting that no tests in the existing test suite fails if we insert an `abort()` at this line).
We propose a test that covers this line. We demonstrate the test by inserting an `abort()` at that line in commit [#4001f0e](https://github.com/llvm/llvm-project/commit/4001f0e8a8342a38125e2b95c1a6c3738b4a99ee). Running all tests shows that only our proposed test fails in the presence of the abort. We'll remove the abort before merging.
>From 5335c273ebbfadc2d8907b71365589f3ffd8b938 Mon Sep 17 00:00:00 2001
From: agorzyns <amber.gorzynski at amd.com>
Date: Mon, 9 Mar 2026 09:29:13 -0500
Subject: [PATCH 1/2] [AMDGPU] Test uncovered line 121 in
AMDGPUMarkLastScratchLoad.cpp
---
.../CodeGen/AMDGPU/mark-last-scratch-load.ll | 306 ++++++++++++++++++
1 file changed, 306 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/mark-last-scratch-load.ll
diff --git a/llvm/test/CodeGen/AMDGPU/mark-last-scratch-load.ll b/llvm/test/CodeGen/AMDGPU/mark-last-scratch-load.ll
new file mode 100644
index 0000000000000..f4a08d9bd7958
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/mark-last-scratch-load.ll
@@ -0,0 +1,306 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1201 -o - %s | FileCheck %s
+
+declare i32 @foo()
+declare <8 x half> @bar(<32 x i64>)
+
+define <8 x half> @baz() {
+; CHECK-LABEL: baz:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
+; CHECK-NEXT: s_wait_expcnt 0x0
+; CHECK-NEXT: s_wait_samplecnt 0x0
+; CHECK-NEXT: s_wait_bvhcnt 0x0
+; CHECK-NEXT: s_wait_kmcnt 0x0
+; CHECK-NEXT: s_mov_b32 s0, s33
+; CHECK-NEXT: s_mov_b32 s33, s32
+; CHECK-NEXT: s_or_saveexec_b32 s1, -1
+; CHECK-NEXT: scratch_store_b32 off, v93, s33 offset:404 ; 4-byte Folded Spill
+; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
+; CHECK-NEXT: s_mov_b32 exec_lo, s1
+; CHECK-NEXT: v_mov_b32_e32 v0, 0x60
+; CHECK-NEXT: s_clause 0x1f ; 128-byte Folded Spill
+; CHECK-NEXT: scratch_store_b32 off, v40, s33 offset:144
+; CHECK-NEXT: scratch_store_b32 off, v41, s33 offset:140
+; CHECK-NEXT: scratch_store_b32 off, v42, s33 offset:136
+; CHECK-NEXT: scratch_store_b32 off, v43, s33 offset:132
+; CHECK-NEXT: scratch_store_b32 off, v44, s33 offset:128
+; CHECK-NEXT: scratch_store_b32 off, v45, s33 offset:124
+; CHECK-NEXT: scratch_store_b32 off, v46, s33 offset:120
+; CHECK-NEXT: scratch_store_b32 off, v47, s33 offset:116
+; CHECK-NEXT: scratch_store_b32 off, v56, s33 offset:112
+; CHECK-NEXT: scratch_store_b32 off, v57, s33 offset:108
+; CHECK-NEXT: scratch_store_b32 off, v58, s33 offset:104
+; CHECK-NEXT: scratch_store_b32 off, v59, s33 offset:100
+; CHECK-NEXT: scratch_store_b32 off, v60, s33 offset:96
+; CHECK-NEXT: scratch_store_b32 off, v61, s33 offset:92
+; CHECK-NEXT: scratch_store_b32 off, v62, s33 offset:88
+; CHECK-NEXT: scratch_store_b32 off, v63, s33 offset:84
+; CHECK-NEXT: scratch_store_b32 off, v72, s33 offset:80
+; CHECK-NEXT: scratch_store_b32 off, v73, s33 offset:76
+; CHECK-NEXT: scratch_store_b32 off, v74, s33 offset:72
+; CHECK-NEXT: scratch_store_b32 off, v75, s33 offset:68
+; CHECK-NEXT: scratch_store_b32 off, v76, s33 offset:64
+; CHECK-NEXT: scratch_store_b32 off, v77, s33 offset:60
+; CHECK-NEXT: scratch_store_b32 off, v78, s33 offset:56
+; CHECK-NEXT: scratch_store_b32 off, v79, s33 offset:52
+; CHECK-NEXT: scratch_store_b32 off, v88, s33 offset:48
+; CHECK-NEXT: scratch_store_b32 off, v89, s33 offset:44
+; CHECK-NEXT: scratch_store_b32 off, v90, s33 offset:40
+; CHECK-NEXT: scratch_store_b32 off, v91, s33 offset:36
+; CHECK-NEXT: scratch_store_b32 off, v92, s33 offset:32
+; CHECK-NEXT: scratch_store_b32 off, v104, s33 offset:28
+; CHECK-NEXT: scratch_store_b32 off, v105, s33 offset:24
+; CHECK-NEXT: scratch_store_b32 off, v106, s33 offset:20
+; CHECK-NEXT: s_clause 0x4 ; 20-byte Folded Spill
+; CHECK-NEXT: scratch_store_b32 off, v107, s33 offset:16
+; CHECK-NEXT: scratch_store_b32 off, v108, s33 offset:12
+; CHECK-NEXT: scratch_store_b32 off, v109, s33 offset:8
+; CHECK-NEXT: scratch_store_b32 off, v110, s33 offset:4
+; CHECK-NEXT: scratch_store_b32 off, v111, s33
+; CHECK-NEXT: v_dual_mov_b32 v92, v31 :: v_dual_mov_b32 v1, 0
+; CHECK-NEXT: v_mov_b32_e32 v2, 0x50
+; CHECK-NEXT: v_mov_b32_e32 v3, 0
+; CHECK-NEXT: s_clause 0x1
+; CHECK-NEXT: global_load_b128 v[56:59], v[0:1], off
+; CHECK-NEXT: global_load_b128 v[104:107], v[2:3], off
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: v_dual_mov_b32 v6, 48 :: v_dual_mov_b32 v1, 0
+; CHECK-NEXT: v_mov_b32_e32 v2, 0x70
+; CHECK-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v8, 0x80
+; CHECK-NEXT: v_mov_b32_e32 v9, 0
+; CHECK-NEXT: s_clause 0x4
+; CHECK-NEXT: global_load_b128 v[72:75], v[0:1], off
+; CHECK-NEXT: global_load_b128 v[10:13], v[2:3], off
+; CHECK-NEXT: global_load_b128 v[14:17], v[8:9], off
+; CHECK-NEXT: global_load_b128 v[18:21], v[8:9], off offset:16
+; CHECK-NEXT: global_load_b128 v[22:25], v[8:9], off offset:32
+; CHECK-NEXT: v_dual_mov_b32 v4, 64 :: v_dual_mov_b32 v7, 0
+; CHECK-NEXT: v_mov_b32_e32 v5, 0
+; CHECK-NEXT: s_clause 0x1
+; CHECK-NEXT: global_load_b128 v[108:111], v[4:5], off
+; CHECK-NEXT: global_load_b128 v[60:63], v[6:7], off
+; CHECK-NEXT: v_mov_b32_e32 v4, 32
+; CHECK-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v6, 16
+; CHECK-NEXT: v_mov_b32_e32 v7, 0
+; CHECK-NEXT: s_clause 0x1
+; CHECK-NEXT: global_load_b128 v[76:79], v[4:5], off
+; CHECK-NEXT: global_load_b128 v[88:91], v[6:7], off
+; CHECK-NEXT: v_writelane_b32 v93, s0, 14
+; CHECK-NEXT: s_addk_co_i32 s32, 0x1a0
+; CHECK-NEXT: s_getpc_b64 s[0:1]
+; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
+; CHECK-NEXT: s_sext_i32_i16 s1, s1
+; CHECK-NEXT: s_add_co_u32 s0, s0, foo at gotpcrel32@lo+12
+; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
+; CHECK-NEXT: s_add_co_ci_u32 s1, s1, foo at gotpcrel32@hi+24
+; CHECK-NEXT: s_wait_loadcnt 0x7
+; CHECK-NEXT: scratch_store_b128 off, v[10:13], s33 offset:148 ; 16-byte Folded Spill
+; CHECK-NEXT: s_wait_loadcnt 0x6
+; CHECK-NEXT: scratch_store_b128 off, v[14:17], s33 offset:164 ; 16-byte Folded Spill
+; CHECK-NEXT: s_wait_loadcnt 0x5
+; CHECK-NEXT: scratch_store_b128 off, v[18:21], s33 offset:180 ; 16-byte Folded Spill
+; CHECK-NEXT: s_wait_loadcnt 0x4
+; CHECK-NEXT: s_clause 0x4 ; 80-byte Folded Spill
+; CHECK-NEXT: scratch_store_b128 off, v[22:25], s33 offset:196
+; CHECK-NEXT: scratch_store_b128 off, v[26:29], s33 offset:212
+; CHECK-NEXT: scratch_store_b128 off, v[30:33], s33 offset:228
+; CHECK-NEXT: scratch_store_b128 off, v[34:37], s33 offset:244
+; CHECK-NEXT: scratch_store_b128 off, v[38:41], s33 offset:260
+; CHECK-NEXT: s_clause 0x4
+; CHECK-NEXT: global_load_b128 v[10:13], v[8:9], off offset:48
+; CHECK-NEXT: global_load_b128 v[14:17], v[8:9], off offset:64
+; CHECK-NEXT: global_load_b128 v[18:21], v[8:9], off offset:80
+; CHECK-NEXT: global_load_b128 v[22:25], v[8:9], off offset:96
+; CHECK-NEXT: global_load_b128 v[26:29], v[8:9], off offset:112
+; CHECK-NEXT: v_writelane_b32 v93, s30, 0
+; CHECK-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; CHECK-NEXT: s_wait_loadcnt 0x4
+; CHECK-NEXT: scratch_store_b128 off, v[10:13], s33 offset:276 ; 16-byte Folded Spill
+; CHECK-NEXT: s_wait_loadcnt 0x3
+; CHECK-NEXT: scratch_store_b128 off, v[14:17], s33 offset:292 ; 16-byte Folded Spill
+; CHECK-NEXT: s_wait_loadcnt 0x2
+; CHECK-NEXT: scratch_store_b128 off, v[18:21], s33 offset:308 ; 16-byte Folded Spill
+; CHECK-NEXT: s_wait_loadcnt 0x1
+; CHECK-NEXT: scratch_store_b128 off, v[22:25], s33 offset:324 ; 16-byte Folded Spill
+; CHECK-NEXT: s_wait_loadcnt 0x0
+; CHECK-NEXT: s_clause 0x3 ; 64-byte Folded Spill
+; CHECK-NEXT: scratch_store_b128 off, v[26:29], s33 offset:340
+; CHECK-NEXT: scratch_store_b128 off, v[30:33], s33 offset:356
+; CHECK-NEXT: scratch_store_b128 off, v[34:37], s33 offset:372
+; CHECK-NEXT: scratch_store_b128 off, v[38:41], s33 offset:388
+; CHECK-NEXT: v_writelane_b32 v93, s31, 1
+; CHECK-NEXT: v_writelane_b32 v93, s34, 2
+; CHECK-NEXT: v_writelane_b32 v93, s35, 3
+; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
+; CHECK-NEXT: v_writelane_b32 v93, s36, 4
+; CHECK-NEXT: v_writelane_b32 v93, s37, 5
+; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
+; CHECK-NEXT: v_writelane_b32 v93, s38, 6
+; CHECK-NEXT: v_writelane_b32 v93, s39, 7
+; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
+; CHECK-NEXT: v_writelane_b32 v93, s48, 8
+; CHECK-NEXT: v_writelane_b32 v93, s49, 9
+; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5]
+; CHECK-NEXT: v_writelane_b32 v93, s50, 10
+; CHECK-NEXT: s_mov_b32 s50, s15
+; CHECK-NEXT: v_writelane_b32 v93, s51, 11
+; CHECK-NEXT: s_mov_b32 s51, s14
+; CHECK-NEXT: v_writelane_b32 v93, s52, 12
+; CHECK-NEXT: s_mov_b32 s52, s13
+; CHECK-NEXT: v_writelane_b32 v93, s53, 13
+; CHECK-NEXT: s_mov_b32 s53, s12
+; CHECK-NEXT: s_wait_kmcnt 0x0
+; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; CHECK-NEXT: s_clause 0x7 ; 128-byte Folded Reload
+; CHECK-NEXT: scratch_load_b128 v[0:3], off, s33 offset:276 th:TH_LOAD_LU
+; CHECK-NEXT: scratch_load_b128 v[4:7], off, s33 offset:292 th:TH_LOAD_LU
+; CHECK-NEXT: scratch_load_b128 v[8:11], off, s33 offset:308 th:TH_LOAD_LU
+; CHECK-NEXT: scratch_load_b128 v[12:15], off, s33 offset:324 th:TH_LOAD_LU
+; CHECK-NEXT: scratch_load_b128 v[16:19], off, s33 offset:340 th:TH_LOAD_LU
+; CHECK-NEXT: scratch_load_b128 v[20:23], off, s33 offset:356 th:TH_LOAD_LU
+; CHECK-NEXT: scratch_load_b128 v[24:27], off, s33 offset:372 th:TH_LOAD_LU
+; CHECK-NEXT: scratch_load_b128 v[28:31], off, s33 offset:388 th:TH_LOAD_LU
+; CHECK-NEXT: s_getpc_b64 s[0:1]
+; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
+; CHECK-NEXT: s_sext_i32_i16 s1, s1
+; CHECK-NEXT: s_add_co_u32 s0, s0, bar at gotpcrel32@lo+12
+; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
+; CHECK-NEXT: s_add_co_ci_u32 s1, s1, bar at gotpcrel32@hi+24
+; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49]
+; CHECK-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
+; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
+; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
+; CHECK-NEXT: s_mov_b32 s12, s53
+; CHECK-NEXT: s_mov_b32 s13, s52
+; CHECK-NEXT: s_mov_b32 s14, s51
+; CHECK-NEXT: s_mov_b32 s15, s50
+; CHECK-NEXT: s_wait_loadcnt 0x3
+; CHECK-NEXT: scratch_store_b32 off, v19, s32 offset:128
+; CHECK-NEXT: s_wait_loadcnt 0x2
+; CHECK-NEXT: scratch_load_b128 v[19:22], off, s33 offset:148 th:TH_LOAD_LU ; 16-byte Folded Reload
+; CHECK-NEXT: s_wait_loadcnt 0x2
+; CHECK-NEXT: scratch_load_b128 v[23:26], off, s33 offset:164 th:TH_LOAD_LU ; 16-byte Folded Reload
+; CHECK-NEXT: s_wait_loadcnt 0x2
+; CHECK-NEXT: s_clause 0x5 ; 96-byte Folded Reload
+; CHECK-NEXT: scratch_load_b128 v[27:30], off, s33 offset:180 th:TH_LOAD_LU
+; CHECK-NEXT: scratch_load_b128 v[31:34], off, s33 offset:196 th:TH_LOAD_LU
+; CHECK-NEXT: scratch_load_b128 v[35:38], off, s33 offset:212 th:TH_LOAD_LU
+; CHECK-NEXT: scratch_load_b128 v[39:42], off, s33 offset:228 th:TH_LOAD_LU
+; CHECK-NEXT: scratch_load_b128 v[43:46], off, s33 offset:244 th:TH_LOAD_LU
+; CHECK-NEXT: scratch_load_b128 v[47:50], off, s33 offset:260 th:TH_LOAD_LU
+; CHECK-NEXT: s_clause 0x3
+; CHECK-NEXT: scratch_store_b128 off, v[15:18], s32 offset:112
+; CHECK-NEXT: scratch_store_b128 off, v[11:14], s32 offset:96
+; CHECK-NEXT: scratch_store_b128 off, v[7:10], s32 offset:80
+; CHECK-NEXT: scratch_store_b128 off, v[3:6], s32 offset:64
+; CHECK-NEXT: v_dual_mov_b32 v4, v88 :: v_dual_mov_b32 v5, v89
+; CHECK-NEXT: v_dual_mov_b32 v6, v90 :: v_dual_mov_b32 v7, v91
+; CHECK-NEXT: v_dual_mov_b32 v8, v76 :: v_dual_mov_b32 v9, v77
+; CHECK-NEXT: v_dual_mov_b32 v10, v78 :: v_dual_mov_b32 v11, v79
+; CHECK-NEXT: v_dual_mov_b32 v12, v60 :: v_dual_mov_b32 v13, v61
+; CHECK-NEXT: v_dual_mov_b32 v14, v62 :: v_dual_mov_b32 v15, v63
+; CHECK-NEXT: v_dual_mov_b32 v16, v108 :: v_dual_mov_b32 v17, v109
+; CHECK-NEXT: v_mov_b32_e32 v18, v110
+; CHECK-NEXT: s_wait_loadcnt 0x1
+; CHECK-NEXT: v_dual_mov_b32 v44, v0 :: v_dual_mov_b32 v45, v1
+; CHECK-NEXT: v_mov_b32_e32 v46, v2
+; CHECK-NEXT: v_dual_mov_b32 v0, v72 :: v_dual_mov_b32 v1, v73
+; CHECK-NEXT: v_dual_mov_b32 v2, v74 :: v_dual_mov_b32 v3, v75
+; CHECK-NEXT: v_mov_b32_e32 v43, v34
+; CHECK-NEXT: v_dual_mov_b32 v42, v33 :: v_dual_mov_b32 v41, v32
+; CHECK-NEXT: v_dual_mov_b32 v40, v31 :: v_dual_mov_b32 v39, v30
+; CHECK-NEXT: v_dual_mov_b32 v38, v29 :: v_dual_mov_b32 v37, v28
+; CHECK-NEXT: v_dual_mov_b32 v36, v27 :: v_dual_mov_b32 v35, v26
+; CHECK-NEXT: v_mov_b32_e32 v34, v25
+; CHECK-NEXT: v_mov_b32_e32 v33, v24
+; CHECK-NEXT: v_mov_b32_e32 v32, v23
+; CHECK-NEXT: v_mov_b32_e32 v31, v22
+; CHECK-NEXT: v_mov_b32_e32 v30, v21
+; CHECK-NEXT: v_mov_b32_e32 v29, v20
+; CHECK-NEXT: v_mov_b32_e32 v28, v19
+; CHECK-NEXT: s_clause 0x3
+; CHECK-NEXT: scratch_store_b128 off, v[43:46], s32 offset:48
+; CHECK-NEXT: scratch_store_b128 off, v[39:42], s32 offset:32
+; CHECK-NEXT: scratch_store_b128 off, v[35:38], s32 offset:16
+; CHECK-NEXT: scratch_store_b128 off, v[31:34], s32
+; CHECK-NEXT: v_mov_b32_e32 v31, v92
+; CHECK-NEXT: v_dual_mov_b32 v19, v111 :: v_dual_mov_b32 v20, v104
+; CHECK-NEXT: v_dual_mov_b32 v21, v105 :: v_dual_mov_b32 v22, v106
+; CHECK-NEXT: v_dual_mov_b32 v23, v107 :: v_dual_mov_b32 v24, v56
+; CHECK-NEXT: v_dual_mov_b32 v25, v57 :: v_dual_mov_b32 v26, v58
+; CHECK-NEXT: v_mov_b32_e32 v27, v59
+; CHECK-NEXT: s_wait_kmcnt 0x0
+; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; CHECK-NEXT: s_clause 0x1f ; 128-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v111, off, s33
+; CHECK-NEXT: scratch_load_b32 v110, off, s33 offset:4
+; CHECK-NEXT: scratch_load_b32 v109, off, s33 offset:8
+; CHECK-NEXT: scratch_load_b32 v108, off, s33 offset:12
+; CHECK-NEXT: scratch_load_b32 v107, off, s33 offset:16
+; CHECK-NEXT: scratch_load_b32 v106, off, s33 offset:20
+; CHECK-NEXT: scratch_load_b32 v105, off, s33 offset:24
+; CHECK-NEXT: scratch_load_b32 v104, off, s33 offset:28
+; CHECK-NEXT: scratch_load_b32 v92, off, s33 offset:32
+; CHECK-NEXT: scratch_load_b32 v91, off, s33 offset:36
+; CHECK-NEXT: scratch_load_b32 v90, off, s33 offset:40
+; CHECK-NEXT: scratch_load_b32 v89, off, s33 offset:44
+; CHECK-NEXT: scratch_load_b32 v88, off, s33 offset:48
+; CHECK-NEXT: scratch_load_b32 v79, off, s33 offset:52
+; CHECK-NEXT: scratch_load_b32 v78, off, s33 offset:56
+; CHECK-NEXT: scratch_load_b32 v77, off, s33 offset:60
+; CHECK-NEXT: scratch_load_b32 v76, off, s33 offset:64
+; CHECK-NEXT: scratch_load_b32 v75, off, s33 offset:68
+; CHECK-NEXT: scratch_load_b32 v74, off, s33 offset:72
+; CHECK-NEXT: scratch_load_b32 v73, off, s33 offset:76
+; CHECK-NEXT: scratch_load_b32 v72, off, s33 offset:80
+; CHECK-NEXT: scratch_load_b32 v63, off, s33 offset:84
+; CHECK-NEXT: scratch_load_b32 v62, off, s33 offset:88
+; CHECK-NEXT: scratch_load_b32 v61, off, s33 offset:92
+; CHECK-NEXT: scratch_load_b32 v60, off, s33 offset:96
+; CHECK-NEXT: scratch_load_b32 v59, off, s33 offset:100
+; CHECK-NEXT: scratch_load_b32 v58, off, s33 offset:104
+; CHECK-NEXT: scratch_load_b32 v57, off, s33 offset:108
+; CHECK-NEXT: scratch_load_b32 v56, off, s33 offset:112
+; CHECK-NEXT: scratch_load_b32 v47, off, s33 offset:116
+; CHECK-NEXT: scratch_load_b32 v46, off, s33 offset:120
+; CHECK-NEXT: scratch_load_b32 v45, off, s33 offset:124
+; CHECK-NEXT: s_clause 0x4 ; 20-byte Folded Reload
+; CHECK-NEXT: scratch_load_b32 v44, off, s33 offset:128
+; CHECK-NEXT: scratch_load_b32 v43, off, s33 offset:132
+; CHECK-NEXT: scratch_load_b32 v42, off, s33 offset:136
+; CHECK-NEXT: scratch_load_b32 v41, off, s33 offset:140
+; CHECK-NEXT: scratch_load_b32 v40, off, s33 offset:144
+; CHECK-NEXT: v_readlane_b32 s53, v93, 13
+; CHECK-NEXT: v_readlane_b32 s52, v93, 12
+; CHECK-NEXT: v_readlane_b32 s51, v93, 11
+; CHECK-NEXT: v_readlane_b32 s50, v93, 10
+; CHECK-NEXT: v_readlane_b32 s49, v93, 9
+; CHECK-NEXT: v_readlane_b32 s48, v93, 8
+; CHECK-NEXT: v_readlane_b32 s39, v93, 7
+; CHECK-NEXT: v_readlane_b32 s38, v93, 6
+; CHECK-NEXT: v_readlane_b32 s37, v93, 5
+; CHECK-NEXT: v_readlane_b32 s36, v93, 4
+; CHECK-NEXT: v_readlane_b32 s35, v93, 3
+; CHECK-NEXT: v_readlane_b32 s34, v93, 2
+; CHECK-NEXT: v_readlane_b32 s31, v93, 1
+; CHECK-NEXT: v_readlane_b32 s30, v93, 0
+; CHECK-NEXT: s_mov_b32 s32, s33
+; CHECK-NEXT: v_readlane_b32 s0, v93, 14
+; CHECK-NEXT: s_or_saveexec_b32 s1, -1
+; CHECK-NEXT: scratch_load_b32 v93, off, s33 offset:404 ; 4-byte Folded Reload
+; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
+; CHECK-NEXT: s_mov_b32 exec_lo, s1
+; CHECK-NEXT: s_mov_b32 s33, s0
+; CHECK-NEXT: s_wait_loadcnt 0x0
+; CHECK-NEXT: s_wait_alu depctr_sa_sdst(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %A = load <32 x i64>, ptr addrspace(1) null, align 256
+ %B = call i32 @foo()
+ %C = call <8 x half> @bar(<32 x i64> %A)
+ ret <8 x half> %C
+}
>From 3cb65cf4451b5e728fb1e4968ba78b8e83d74220 Mon Sep 17 00:00:00 2001
From: agorzyns <amber.gorzynski at amd.com>
Date: Mon, 9 Mar 2026 09:31:47 -0500
Subject: [PATCH 2/2] [AMDGPU] Insert temporary abort() to demonstrate the new
test
---
llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
index 9b6bb56c85d24..f69ad76955bff 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp
@@ -118,6 +118,7 @@ bool AMDGPUMarkLastScratchLoad::run(MachineFunction &MF) {
// If there is no instruction at this slot because it was deleted take the
// instruction from the next slot.
if (!MISegmentEnd) {
+ abort();
SlotIndex NextSlot = Slots.getNextNonNullIndex(Segment.end);
MISegmentEnd = SI->getInstructionFromIndex(NextSlot);
}
More information about the llvm-commits
mailing list