[llvm] [AMDGPU][NFC] Check more autogenerated llc tests for COV5 (PR #75219)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 12 09:16:00 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Saiyedul Islam (saiislam)
<details>
<summary>Changes</summary>
Regenerate a few more llc tests to check for COV5 instead of the default ABI version.
---
Patch is 321.76 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75219.diff
24 Files Affected:
- (modified) llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll (+24-21)
- (modified) llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll (+58-55)
- (modified) llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll (+28-32)
- (modified) llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll (+127-112)
- (modified) llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll (+51-22)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll (+11-7)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll (+3-2)
- (modified) llvm/test/CodeGen/AMDGPU/lower-kernargs.ll (+318-303)
- (modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll (+30-30)
- (modified) llvm/test/CodeGen/AMDGPU/lower-module-lds-via-table.ll (+18-22)
- (modified) llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll (+51-44)
- (modified) llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll (+36-61)
- (modified) llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll (+11-8)
- (modified) llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll (+75-72)
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll (+5-2)
- (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll (+13-23)
- (modified) llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll (+4-1)
- (modified) llvm/test/CodeGen/AMDGPU/spill-m0.ll (+4-1)
- (modified) llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll (+55-51)
- (modified) llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll (+163-170)
- (modified) llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll (+10-7)
- (modified) llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll (+4-1)
- (modified) llvm/test/CodeGen/AMDGPU/vgpr_constant_to_sgpr.ll (+27-36)
- (modified) llvm/test/CodeGen/AMDGPU/wwm-reserved.ll (+115-108)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
index b03d705b43e97e..08f03c22683f92 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
@@ -11,10 +11,10 @@ define amdgpu_kernel void @extract_w_offset_vgpr(ptr addrspace(1) %out) {
; GCN-LABEL: name: extract_w_offset_vgpr
; GCN: bb.0.entry:
; GCN-NEXT: successors: %bb.1(0x80000000)
- ; GCN-NEXT: liveins: $vgpr0, $sgpr4_sgpr5
+ ; GCN-NEXT: liveins: $vgpr0, $sgpr2_sgpr3
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY killed $vgpr0
- ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 36, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4)
+ ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr2_sgpr3, 36, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4)
; GCN-NEXT: renamable $sgpr6 = COPY renamable $sgpr1
; GCN-NEXT: renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1
; GCN-NEXT: renamable $sgpr4 = S_MOV_B32 61440
@@ -56,22 +56,22 @@ define amdgpu_kernel void @extract_w_offset_vgpr(ptr addrspace(1) %out) {
; GCN-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr2
; GCN-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr1
; GCN-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr0
- ; GCN-NEXT: undef %35.sub0:vreg_512 = COPY [[COPY1]]
- ; GCN-NEXT: %35.sub1:vreg_512 = COPY [[COPY2]]
- ; GCN-NEXT: %35.sub2:vreg_512 = COPY [[COPY3]]
- ; GCN-NEXT: %35.sub3:vreg_512 = COPY [[COPY4]]
- ; GCN-NEXT: %35.sub4:vreg_512 = COPY [[COPY5]]
- ; GCN-NEXT: %35.sub5:vreg_512 = COPY [[COPY6]]
- ; GCN-NEXT: %35.sub6:vreg_512 = COPY [[COPY7]]
- ; GCN-NEXT: %35.sub7:vreg_512 = COPY [[COPY8]]
- ; GCN-NEXT: %35.sub8:vreg_512 = COPY [[COPY9]]
- ; GCN-NEXT: %35.sub9:vreg_512 = COPY [[COPY10]]
- ; GCN-NEXT: %35.sub10:vreg_512 = COPY [[COPY11]]
- ; GCN-NEXT: %35.sub11:vreg_512 = COPY [[COPY12]]
- ; GCN-NEXT: %35.sub12:vreg_512 = COPY [[COPY13]]
- ; GCN-NEXT: %35.sub13:vreg_512 = COPY [[COPY14]]
- ; GCN-NEXT: %35.sub14:vreg_512 = COPY [[COPY15]]
- ; GCN-NEXT: %35.sub15:vreg_512 = COPY [[COPY16]]
+ ; GCN-NEXT: undef [[COPY17:%[0-9]+]].sub0:vreg_512 = COPY [[COPY1]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]].sub1:vreg_512 = COPY [[COPY2]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]].sub2:vreg_512 = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]].sub3:vreg_512 = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]].sub4:vreg_512 = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]].sub5:vreg_512 = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]].sub6:vreg_512 = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]].sub7:vreg_512 = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]].sub8:vreg_512 = COPY [[COPY9]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]].sub9:vreg_512 = COPY [[COPY10]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]].sub10:vreg_512 = COPY [[COPY11]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]].sub11:vreg_512 = COPY [[COPY12]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]].sub12:vreg_512 = COPY [[COPY13]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]].sub13:vreg_512 = COPY [[COPY14]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]].sub14:vreg_512 = COPY [[COPY15]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]].sub15:vreg_512 = COPY [[COPY16]]
; GCN-NEXT: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec
; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -81,12 +81,12 @@ define amdgpu_kernel void @extract_w_offset_vgpr(ptr addrspace(1) %out) {
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5)
- ; GCN-NEXT: dead [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
+ ; GCN-NEXT: dead [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
; GCN-NEXT: renamable $sgpr2 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
; GCN-NEXT: renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, [[COPY]](s32), implicit $exec
; GCN-NEXT: renamable $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 %35, killed $sgpr2, 11, implicit-def $m0, implicit $m0, implicit $exec
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_]]
+ ; GCN-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 [[COPY17]], killed $sgpr2, 11, implicit-def $m0, implicit $m0, implicit $exec
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = COPY [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_]]
; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY renamable $sgpr0_sgpr1
; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.2, align 4, addrspace 5)
; GCN-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc
@@ -109,3 +109,6 @@ entry:
store i32 %value, ptr addrspace(1) %out
ret void
}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
index 220ea962b9e1dc..807c19001fd99c 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
@@ -52,44 +52,44 @@ define <2 x i64> @f1() #0 {
define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg4, i1 %arg5, ptr %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10, i1 %arg11) {
; GFX11-LABEL: f2:
; GFX11: ; %bb.0: ; %bb
-; GFX11-NEXT: s_mov_b64 s[16:17], s[4:5]
+; GFX11-NEXT: s_load_b32 s21, s[2:3], 0x24
; GFX11-NEXT: v_mov_b32_e32 v31, v0
-; GFX11-NEXT: s_load_b32 s24, s[16:17], 0x24
; GFX11-NEXT: s_mov_b32 s12, s13
-; GFX11-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX11-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v31
+; GFX11-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX11-NEXT: s_mov_b64 s[4:5], s[0:1]
-; GFX11-NEXT: s_mov_b32 s3, 0
+; GFX11-NEXT: s_mov_b32 s6, 0
+; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v31
; GFX11-NEXT: s_mov_b32 s0, -1
-; GFX11-NEXT: s_mov_b32 s18, exec_lo
+; GFX11-NEXT: s_mov_b32 s20, exec_lo
; GFX11-NEXT: s_mov_b32 s32, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_mul_lo_u32 v0, s24, v0
+; GFX11-NEXT: v_mul_lo_u32 v0, s21, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0
; GFX11-NEXT: s_cbranch_execz .LBB2_13
; GFX11-NEXT: ; %bb.1: ; %bb14
-; GFX11-NEXT: s_load_b128 s[20:23], s[16:17], 0x2c
-; GFX11-NEXT: s_mov_b32 s19, 0
+; GFX11-NEXT: s_load_b128 s[16:19], s[2:3], 0x2c
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_bitcmp1_b32 s21, 0
-; GFX11-NEXT: s_cselect_b32 s25, -1, 0
-; GFX11-NEXT: s_bitcmp0_b32 s21, 0
+; GFX11-NEXT: s_bitcmp1_b32 s17, 0
+; GFX11-NEXT: s_cselect_b32 s22, -1, 0
+; GFX11-NEXT: s_bitcmp0_b32 s17, 0
+; GFX11-NEXT: s_mov_b32 s17, 0
; GFX11-NEXT: s_cbranch_scc0 .LBB2_3
; GFX11-NEXT: ; %bb.2: ; %bb15
-; GFX11-NEXT: s_add_u32 s8, s16, 0x58
-; GFX11-NEXT: s_addc_u32 s9, s17, 0
+; GFX11-NEXT: s_add_u32 s8, s2, 0x58
+; GFX11-NEXT: s_addc_u32 s9, s3, 0
; GFX11-NEXT: s_getpc_b64 s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, f0 at gotpcrel32@lo+4
; GFX11-NEXT: s_addc_u32 s1, s1, f0 at gotpcrel32@hi+12
; GFX11-NEXT: s_mov_b32 s13, s14
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX11-NEXT: s_mov_b32 s3, s14
+; GFX11-NEXT: s_mov_b32 s23, s14
; GFX11-NEXT: s_mov_b32 s14, s15
+; GFX11-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX11-NEXT: s_mov_b32 s14, s3
+; GFX11-NEXT: s_mov_b32 s14, s23
+; GFX11-NEXT: s_mov_b64 s[2:3], s[6:7]
; GFX11-NEXT: s_mov_b32 s1, -1
; GFX11-NEXT: s_cbranch_execz .LBB2_4
; GFX11-NEXT: s_branch .LBB2_12
@@ -98,66 +98,66 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0
; GFX11-NEXT: s_cbranch_vccnz .LBB2_12
; GFX11-NEXT: .LBB2_4: ; %bb16
-; GFX11-NEXT: s_load_b32 s2, s[16:17], 0x54
-; GFX11-NEXT: s_bitcmp1_b32 s23, 0
+; GFX11-NEXT: s_load_b32 s6, s[2:3], 0x54
+; GFX11-NEXT: s_bitcmp1_b32 s19, 0
; GFX11-NEXT: s_cselect_b32 s0, -1, 0
-; GFX11-NEXT: s_and_b32 s3, s23, 1
+; GFX11-NEXT: s_and_b32 s7, s19, 1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_bitcmp1_b32 s2, 0
-; GFX11-NEXT: s_mov_b32 s2, -1
+; GFX11-NEXT: s_bitcmp1_b32 s6, 0
+; GFX11-NEXT: s_mov_b32 s6, -1
; GFX11-NEXT: s_cselect_b32 s8, -1, 0
-; GFX11-NEXT: s_cmp_eq_u32 s3, 0
+; GFX11-NEXT: s_cmp_eq_u32 s7, 0
; GFX11-NEXT: s_cbranch_scc0 .LBB2_8
; GFX11-NEXT: ; %bb.5: ; %bb18.preheader
-; GFX11-NEXT: s_load_b128 s[28:31], s[16:17], 0x44
+; GFX11-NEXT: s_load_b128 s[24:27], s[2:3], 0x44
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_mul_hi_u32 s2, s29, s28
-; GFX11-NEXT: s_mul_i32 s3, s29, s28
+; GFX11-NEXT: s_mul_hi_u32 s6, s25, s24
+; GFX11-NEXT: s_mul_i32 s7, s25, s24
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_alignbit_b32 v0, s2, s3, 1
-; GFX11-NEXT: s_mov_b32 s3, 0
-; GFX11-NEXT: v_readfirstlane_b32 s2, v0
-; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s25
+; GFX11-NEXT: v_alignbit_b32 v0, s6, s7, 1
+; GFX11-NEXT: s_mov_b32 s7, 0
+; GFX11-NEXT: v_readfirstlane_b32 s6, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s22
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: s_or_b32 s2, s2, 1
-; GFX11-NEXT: s_lshr_b32 s2, s2, s30
+; GFX11-NEXT: s_or_b32 s6, s6, 1
+; GFX11-NEXT: s_lshr_b32 s6, s6, s26
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: s_mul_i32 s2, s2, s22
-; GFX11-NEXT: s_mul_i32 s2, s2, s20
+; GFX11-NEXT: s_mul_i32 s6, s6, s18
+; GFX11-NEXT: s_mul_i32 s6, s6, s16
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT: s_or_b32 s2, s24, s2
-; GFX11-NEXT: s_lshl_b64 s[20:21], s[2:3], 1
-; GFX11-NEXT: global_load_u16 v1, v2, s[20:21]
+; GFX11-NEXT: s_or_b32 s6, s21, s6
+; GFX11-NEXT: s_lshl_b64 s[18:19], s[6:7], 1
+; GFX11-NEXT: global_load_u16 v1, v2, s[18:19]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v1
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
; GFX11-NEXT: .p2align 6
; GFX11-NEXT: .LBB2_6: ; %bb18
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX11-NEXT: v_cmp_ne_u16_e64 s2, s3, 0
+; GFX11-NEXT: v_cmp_ne_u16_e64 s6, s7, 0
; GFX11-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s2
+; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s6
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
; GFX11-NEXT: s_and_b32 vcc_lo, s8, vcc_lo
; GFX11-NEXT: v_cndmask_b32_e64 v3, v1, v3, s0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc_lo
; GFX11-NEXT: s_mov_b32 vcc_lo, 0
-; GFX11-NEXT: v_readfirstlane_b32 s2, v3
+; GFX11-NEXT: v_readfirstlane_b32 s6, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX11-NEXT: s_bitcmp1_b32 s2, 0
-; GFX11-NEXT: s_cselect_b32 s2, 0x100, 0
+; GFX11-NEXT: s_bitcmp1_b32 s6, 0
+; GFX11-NEXT: s_cselect_b32 s6, 0x100, 0
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: s_or_b32 s3, s2, s3
+; GFX11-NEXT: s_or_b32 s7, s6, s7
; GFX11-NEXT: s_cbranch_vccz .LBB2_6
; GFX11-NEXT: ; %bb.7: ; %Flow
-; GFX11-NEXT: s_mov_b32 s2, 0
+; GFX11-NEXT: s_mov_b32 s6, 0
; GFX11-NEXT: .LBB2_8: ; %Flow12
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2
+; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s6
; GFX11-NEXT: s_cbranch_vccz .LBB2_12
; GFX11-NEXT: ; %bb.9:
; GFX11-NEXT: s_xor_b32 s0, s8, -1
@@ -167,17 +167,17 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s0
; GFX11-NEXT: s_cbranch_vccz .LBB2_10
; GFX11-NEXT: ; %bb.11: ; %Flow6
-; GFX11-NEXT: s_mov_b32 s19, -1
+; GFX11-NEXT: s_mov_b32 s17, -1
; GFX11-NEXT: .LBB2_12: ; %Flow11
-; GFX11-NEXT: s_and_b32 s3, s1, exec_lo
-; GFX11-NEXT: s_or_not1_b32 s0, s19, exec_lo
+; GFX11-NEXT: s_and_b32 s6, s1, exec_lo
+; GFX11-NEXT: s_or_not1_b32 s0, s17, exec_lo
; GFX11-NEXT: .LBB2_13: ; %Flow9
-; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s18
-; GFX11-NEXT: s_and_saveexec_b32 s18, s0
+; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s20
+; GFX11-NEXT: s_and_saveexec_b32 s7, s0
; GFX11-NEXT: s_cbranch_execz .LBB2_15
; GFX11-NEXT: ; %bb.14: ; %bb43
-; GFX11-NEXT: s_add_u32 s8, s16, 0x58
-; GFX11-NEXT: s_addc_u32 s9, s17, 0
+; GFX11-NEXT: s_add_u32 s8, s2, 0x58
+; GFX11-NEXT: s_addc_u32 s9, s3, 0
; GFX11-NEXT: s_getpc_b64 s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, f0 at gotpcrel32@lo+4
; GFX11-NEXT: s_addc_u32 s1, s1, f0 at gotpcrel32@hi+12
@@ -186,10 +186,10 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
; GFX11-NEXT: s_mov_b32 s14, s15
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX11-NEXT: s_or_b32 s3, s3, exec_lo
+; GFX11-NEXT: s_or_b32 s6, s6, exec_lo
; GFX11-NEXT: .LBB2_15: ; %Flow14
-; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s18
-; GFX11-NEXT: s_and_saveexec_b32 s0, s3
+; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s7
+; GFX11-NEXT: s_and_saveexec_b32 s0, s6
; GFX11-NEXT: ; %bb.16: ; %UnifiedUnreachableBlock
; GFX11-NEXT: ; divergent unreachable
; GFX11-NEXT: ; %bb.17: ; %UnifiedReturnBlock
@@ -246,3 +246,6 @@ bb43:
}
attributes #0 = { noinline optnone }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
index 03c85b4470628f..6e905542ce53c1 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
@@ -9,20 +9,12 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
; CHECK: ; %bb.0:
; CHECK-NEXT: s_mov_b32 s32, 0x180000
; CHECK-NEXT: s_mov_b32 s33, 0
-; CHECK-NEXT: s_add_u32 flat_scratch_lo, s12, s17
-; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
-; CHECK-NEXT: s_add_u32 s0, s0, s17
+; CHECK-NEXT: s_add_u32 flat_scratch_lo, s10, s15
+; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s11, 0
+; CHECK-NEXT: s_add_u32 s0, s0, s15
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
-; CHECK-NEXT: v_writelane_b32 v3, s16, 0
-; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
-; CHECK-NEXT: s_add_i32 s12, s33, 0x100200
-; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s12 ; 4-byte Folded Spill
-; CHECK-NEXT: s_mov_b64 exec, s[34:35]
-; CHECK-NEXT: s_mov_b32 s13, s15
-; CHECK-NEXT: s_mov_b32 s12, s14
-; CHECK-NEXT: v_readlane_b32 s14, v3, 0
-; CHECK-NEXT: s_mov_b64 s[16:17], s[8:9]
+; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9]
; CHECK-NEXT: v_mov_b32_e32 v3, v2
; CHECK-NEXT: v_mov_b32_e32 v2, v1
; CHECK-NEXT: v_mov_b32_e32 v1, v0
@@ -30,9 +22,9 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
; CHECK-NEXT: s_add_i32 s8, s33, 0x100200
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s8 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[34:35]
-; CHECK-NEXT: s_load_dword s8, s[16:17], 0x0
+; CHECK-NEXT: s_load_dword s8, s[6:7], 0x0
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_writelane_b32 v0, s8, 1
+; CHECK-NEXT: v_writelane_b32 v0, s8, 0
; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
; CHECK-NEXT: s_add_i32 s8, s33, 0x100200
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s8 ; 4-byte Folded Spill
@@ -42,28 +34,29 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_add_i32 s8, s33, 0x100100
; CHECK-NEXT: buffer_store_dword v10, off, s[0:3], s8 ; 4-byte Folded Spill
-; CHECK-NEXT: s_mov_b64 s[18:19], 8
-; CHECK-NEXT: s_mov_b32 s8, s16
-; CHECK-NEXT: s_mov_b32 s9, s17
-; CHECK-NEXT: s_mov_b32 s16, s18
-; CHECK-NEXT: s_mov_b32 s15, s19
-; CHECK-NEXT: s_add_u32 s8, s8, s16
-; CHECK-NEXT: s_addc_u32 s15, s9, s15
+; CHECK-NEXT: s_mov_b64 s[16:17], 8
+; CHECK-NEXT: s_mov_b32 s8, s6
+; CHECK-NEXT: s_mov_b32 s6, s7
+; CHECK-NEXT: s_mov_b32 s9, s16
+; CHECK-NEXT: s_mov_b32 s7, s17
+; CHECK-NEXT: s_add_u32 s8, s8, s9
+; CHECK-NEXT: s_addc_u32 s6, s6, s7
; CHECK-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9
-; CHECK-NEXT: s_mov_b32 s9, s15
+; CHECK-NEXT: s_mov_b32 s9, s6
; CHECK-NEXT: v_mov_b32_e32 v0, 0x2000
-; CHECK-NEXT: ; implicit-def: $sgpr15
-; CHECK-NEXT: s_getpc_b64 s[16:17]
-; CHECK-NEXT: s_add_u32 s16, s16, device_func at gotpcrel32@lo+4
-; CHECK-NEXT: s_addc_u32 s17, s17, device_func at gotpcrel32@hi+12
-; CHECK-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
+; CHECK-NEXT: ; implicit-def: $sgpr6
+; CHECK-NEXT: s_getpc_b64 s[6:7]
+; CHECK-NEXT: s_add_u32 s6, s6, device_func at gotpcrel32@lo+4
+; CHECK-NEXT: s_addc_u32 s7, s7, device_func at gotpcrel32@hi+12
+; CHECK-NEXT: s_load_dwordx2 s[16:17], s[6:7], 0x0
; CHECK-NEXT: s_mov_b64 s[22:23], s[2:3]
; CHECK-NEXT: s_mov_b64 s[20:21], s[0:1]
-; CHECK-NEXT: s_mov_b32 s15, 20
-; CHECK-NEXT: v_lshlrev_b32_e64 v3, s15, v3
-; CHECK-NEXT: s_mov_b32 s15, 10
-; CHECK-NEXT: v_lshlrev_b32_e64 v2, s15, v2
+; CHECK-NEXT: s_mov_b32 s6, 20
+; CHECK-NEXT: v_lshlrev_b32_e64 v3, s6, v3
+; CHECK-NEXT: s_mov_b32 s6, 10
+; CHECK-NEXT: v_lshlrev_b32_e64 v2, s6, v2
; CHECK-NEXT: v_or3_b32 v31, v1, v2, v3
+; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7
; CHECK-NEXT: ; implicit-def: $sgpr15
; CHECK-NEXT: s_mov_b64 s[0:1], s[20:21]
; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
@@ -76,7 +69,7 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
; CHECK-NEXT: s_add_i32 s4, s33, 0x100100
; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s4 ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(1)
-; CHECK-NEXT: v_readlane_b32 s4, v0, 1
+; CHECK-NEXT: v_readlane_b32 s4, v0, 0
; CHECK-NEXT: s_mov_b32 s5, 0
; CHECK-NEXT: s_cmp_eq_u32 s4, s5
; CHECK-NEXT: v_mov_b32_e32 v0, 0x4000
@@ -120,3 +113,6 @@ end:
declare void @device_func(ptr addrspace(5))
attributes #0 = { nounwind "frame-pointer"="all" }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll b/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
index c22eb877bd56b6..66f31bbf7afe07 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll
@@ -113,19 +113,20 @@ define amdgpu_kernel void @module_0_kernel_normal_extern_normal(i32 %idx) {
define amdgpu_kernel void @module_1_kernel_normal_extern_normal(i32 %idx) {
; CHECK-LABEL: module_1_kernel_normal_extern_normal:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_add_u32 s8, s8, s11
+; CHECK-NEXT: s_add_u32 s6, s6, s9
; CHECK-NEXT: s_mov_b32 s32, 0
-; ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/75219
More information about the llvm-commits
mailing list