[llvm-branch-commits] [llvm] [AMDGPU] Prevent SI_CS_CHAIN instruction from giving registers classes in generic instructions (PR #131329)
Ana Mihajlovic via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Mar 14 05:56:41 PDT 2025
https://github.com/mihajlovicana created https://github.com/llvm/llvm-project/pull/131329
SI_CS_CHAIN adds register classes to generic instruction. This causes legalize combiner to break.
Patch fixes this issue by adding COPY instructions.
>From 3e36fbad5d782690ef845f754f3203d9d79b0602 Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Mon, 10 Mar 2025 14:19:13 +0100
Subject: [PATCH 1/3] AMDGPU/GlobalISel: add tests for CS_CHAIN
---
.../irtranslator-amdgcn-cs-chain.ll | 183 ++++++++++++++++++
1 file changed, 183 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll
index 4b0ff1b2eb470..e8ee5d8625ccb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll
@@ -134,4 +134,187 @@ define amdgpu_cs_chain void @chain_preserve_call(<3 x i32> inreg %sgpr, { i32, p
unreachable
}
+define amdgpu_cs_chain_preserve void @retry_vgpr_alloc.v20i32(<20 x i32> inreg %0) #10 {
+ ; GFX11-LABEL: name: retry_vgpr_alloc.v20i32
+ ; GFX11: bb.1 (%ir-block.1):
+ ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+ ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $sgpr14
+ ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $sgpr15
+ ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $sgpr16
+ ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $sgpr17
+ ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $sgpr18
+ ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $sgpr19
+ ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<20 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
+ ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 18
+ ; GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
+ ; GFX11-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -4294967296
+ ; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; GFX11-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @retry_vgpr_alloc.v20i32
+ ; GFX11-NEXT: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C]](s32)
+ ; GFX11-NEXT: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C1]](s32)
+ ; GFX11-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C2]](s32)
+ ; GFX11-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.getpc)
+ ; GFX11-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[INT]], [[C3]]
+ ; GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[EVEC2]](s32)
+ ; GFX11-NEXT: [[OR:%[0-9]+]]:_(s64) = disjoint G_OR [[AND]], [[ZEXT]]
+ ; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:ccr_sgpr_64(p0) = G_INTTOPTR [[OR]](s64)
+ ; GFX11-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sleep), 2
+ ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<20 x s32>)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; GFX11-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; GFX11-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32)
+ ; GFX11-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV3]](s32)
+ ; GFX11-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV4]](s32)
+ ; GFX11-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT4]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV5]](s32)
+ ; GFX11-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT5]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT6:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV6]](s32)
+ ; GFX11-NEXT: $sgpr6 = COPY [[INTRINSIC_CONVERGENT6]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT7:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV7]](s32)
+ ; GFX11-NEXT: $sgpr7 = COPY [[INTRINSIC_CONVERGENT7]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT8:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV8]](s32)
+ ; GFX11-NEXT: $sgpr8 = COPY [[INTRINSIC_CONVERGENT8]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT9:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV9]](s32)
+ ; GFX11-NEXT: $sgpr9 = COPY [[INTRINSIC_CONVERGENT9]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT10:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV10]](s32)
+ ; GFX11-NEXT: $sgpr10 = COPY [[INTRINSIC_CONVERGENT10]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT11:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV11]](s32)
+ ; GFX11-NEXT: $sgpr11 = COPY [[INTRINSIC_CONVERGENT11]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT12:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV12]](s32)
+ ; GFX11-NEXT: $sgpr12 = COPY [[INTRINSIC_CONVERGENT12]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT13:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV13]](s32)
+ ; GFX11-NEXT: $sgpr13 = COPY [[INTRINSIC_CONVERGENT13]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT14:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV14]](s32)
+ ; GFX11-NEXT: $sgpr14 = COPY [[INTRINSIC_CONVERGENT14]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT15:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV15]](s32)
+ ; GFX11-NEXT: $sgpr15 = COPY [[INTRINSIC_CONVERGENT15]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT16:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV16]](s32)
+ ; GFX11-NEXT: $sgpr16 = COPY [[INTRINSIC_CONVERGENT16]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT17:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV17]](s32)
+ ; GFX11-NEXT: $sgpr17 = COPY [[INTRINSIC_CONVERGENT17]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT18:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV18]](s32)
+ ; GFX11-NEXT: $sgpr18 = COPY [[INTRINSIC_CONVERGENT18]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT19:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV19]](s32)
+ ; GFX11-NEXT: $sgpr19 = COPY [[INTRINSIC_CONVERGENT19]](s32)
+ ; GFX11-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[INTTOPTR]](p0), 0, 0, [[EVEC1]](s32), [[EVEC]](s32), -1, [[GV]](p0), amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19
+ ;
+ ; GFX10-LABEL: name: retry_vgpr_alloc.v20i32
+ ; GFX10: bb.1 (%ir-block.1):
+ ; GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr5
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr6
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr7
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr8
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr9
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr10
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr11
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $sgpr12
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $sgpr13
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $sgpr14
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $sgpr15
+ ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $sgpr16
+ ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $sgpr17
+ ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $sgpr18
+ ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $sgpr19
+ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<20 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 19
+ ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 18
+ ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
+ ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -4294967296
+ ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; GFX10-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @retry_vgpr_alloc.v20i32
+ ; GFX10-NEXT: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C]](s32)
+ ; GFX10-NEXT: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C1]](s32)
+ ; GFX10-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C2]](s32)
+ ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.getpc)
+ ; GFX10-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[INT]], [[C3]]
+ ; GFX10-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[EVEC2]](s32)
+ ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s64) = disjoint G_OR [[AND]], [[ZEXT]]
+ ; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:ccr_sgpr_64(p0) = G_INTTOPTR [[OR]](s64)
+ ; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sleep), 2
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<20 x s32>)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; GFX10-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32)
+ ; GFX10-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV3]](s32)
+ ; GFX10-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT4:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV4]](s32)
+ ; GFX10-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT4]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT5:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV5]](s32)
+ ; GFX10-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT5]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT6:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV6]](s32)
+ ; GFX10-NEXT: $sgpr6 = COPY [[INTRINSIC_CONVERGENT6]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT7:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV7]](s32)
+ ; GFX10-NEXT: $sgpr7 = COPY [[INTRINSIC_CONVERGENT7]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT8:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV8]](s32)
+ ; GFX10-NEXT: $sgpr8 = COPY [[INTRINSIC_CONVERGENT8]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT9:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV9]](s32)
+ ; GFX10-NEXT: $sgpr9 = COPY [[INTRINSIC_CONVERGENT9]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT10:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV10]](s32)
+ ; GFX10-NEXT: $sgpr10 = COPY [[INTRINSIC_CONVERGENT10]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT11:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV11]](s32)
+ ; GFX10-NEXT: $sgpr11 = COPY [[INTRINSIC_CONVERGENT11]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT12:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV12]](s32)
+ ; GFX10-NEXT: $sgpr12 = COPY [[INTRINSIC_CONVERGENT12]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT13:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV13]](s32)
+ ; GFX10-NEXT: $sgpr13 = COPY [[INTRINSIC_CONVERGENT13]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT14:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV14]](s32)
+ ; GFX10-NEXT: $sgpr14 = COPY [[INTRINSIC_CONVERGENT14]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT15:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV15]](s32)
+ ; GFX10-NEXT: $sgpr15 = COPY [[INTRINSIC_CONVERGENT15]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT16:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV16]](s32)
+ ; GFX10-NEXT: $sgpr16 = COPY [[INTRINSIC_CONVERGENT16]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT17:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV17]](s32)
+ ; GFX10-NEXT: $sgpr17 = COPY [[INTRINSIC_CONVERGENT17]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT18:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV18]](s32)
+ ; GFX10-NEXT: $sgpr18 = COPY [[INTRINSIC_CONVERGENT18]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT19:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV19]](s32)
+ ; GFX10-NEXT: $sgpr19 = COPY [[INTRINSIC_CONVERGENT19]](s32)
+ ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY20]](<4 x s32>)
+ ; GFX10-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[INTTOPTR]](p0), 0, 0, [[EVEC1]](s32), [[EVEC]](s32), -1, [[GV]](p0), amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ %.i19 = extractelement <20 x i32> %0, i64 19
+ %.i18 = extractelement <20 x i32> %0, i64 18
+ %.i17 = extractelement <20 x i32> %0, i64 17
+ %2 = call i64 @llvm.amdgcn.s.getpc()
+ %3 = and i64 %2, -4294967296
+ %4 = zext i32 %.i17 to i64
+ %5 = or disjoint i64 %3, %4
+ %6 = inttoptr i64 %5 to ptr
+ call void @llvm.amdgcn.s.sleep(i32 2)
+ call void (ptr, i32, <20 x i32>, {}, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_s(ptr inreg %6, i32 inreg %.i18, <20 x i32> inreg %0, {} poison, i32 1, i32 %.i19, i32 -1, ptr nonnull @retry_vgpr_alloc.v20i32)
+ unreachable
+}
+
>From 636bae850d6d939c422690b8b5e9be4ba28f2d8a Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Mon, 10 Mar 2025 16:29:18 +0100
Subject: [PATCH 2/3] isel fix
---
llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 6 +-
.../irtranslator-amdgcn-cs-chain.ll | 26 +++---
.../amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll | 76 +++++++++++++++--
...-amdgpu-cs-chain-intrinsic-dyn-vgpr-w32.ll | 84 ++++++++++---------
4 files changed, 134 insertions(+), 58 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index a15f193549936..30c09227a634d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -1278,7 +1278,11 @@ bool AMDGPUCallLowering::lowerTailCall(
if (auto CI = dyn_cast<ConstantInt>(Arg.OrigValue)) {
MIB.addImm(CI->getSExtValue());
} else {
- MIB.addReg(Arg.Regs[0]);
+ Register Reg = Arg.Regs[0];
+ if (!MRI.getVRegDef(Reg)->isCopy())
+ Reg = MIRBuilder.buildCopy(MRI.getType(Reg), Reg).getReg(0);
+
+ MIB.addReg(Reg);
unsigned Idx = MIB->getNumOperands() - 1;
MIB->getOperand(Idx).setReg(constrainOperandRegClass(
MF, *TRI, MRI, *TII, *ST.getRegBankInfo(), *MIB, MIB->getDesc(),
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll
index e8ee5d8625ccb..a5a0defb833cb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll
@@ -166,9 +166,9 @@ define amdgpu_cs_chain_preserve void @retry_vgpr_alloc.v20i32(<20 x i32> inreg %
; GFX11-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -4294967296
; GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
- ; GFX11-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @retry_vgpr_alloc.v20i32
- ; GFX11-NEXT: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C]](s32)
- ; GFX11-NEXT: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C1]](s32)
+ ; GFX11-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @retry_vgpr_alloc.v20i32
+ ; GFX11-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C]](s32)
+ ; GFX11-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C1]](s32)
; GFX11-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C2]](s32)
; GFX11-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.getpc)
; GFX11-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[INT]], [[C3]]
@@ -176,6 +176,9 @@ define amdgpu_cs_chain_preserve void @retry_vgpr_alloc.v20i32(<20 x i32> inreg %
; GFX11-NEXT: [[OR:%[0-9]+]]:_(s64) = disjoint G_OR [[AND]], [[ZEXT]]
; GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:ccr_sgpr_64(p0) = G_INTTOPTR [[OR]](s64)
; GFX11-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sleep), 2
+ ; GFX11-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s32) = COPY [[EVEC1]](s32)
+ ; GFX11-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s32) = COPY [[EVEC]](s32)
+ ; GFX11-NEXT: [[COPY22:%[0-9]+]]:ccr_sgpr_64(p0) = COPY [[GV]](p0)
; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<20 x s32>)
; GFX11-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
; GFX11-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
@@ -217,7 +220,7 @@ define amdgpu_cs_chain_preserve void @retry_vgpr_alloc.v20i32(<20 x i32> inreg %
; GFX11-NEXT: $sgpr18 = COPY [[INTRINSIC_CONVERGENT18]](s32)
; GFX11-NEXT: [[INTRINSIC_CONVERGENT19:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV19]](s32)
; GFX11-NEXT: $sgpr19 = COPY [[INTRINSIC_CONVERGENT19]](s32)
- ; GFX11-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[INTTOPTR]](p0), 0, 0, [[EVEC1]](s32), [[EVEC]](s32), -1, [[GV]](p0), amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19
+ ; GFX11-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[INTTOPTR]](p0), 0, 0, [[COPY20]](s32), [[COPY21]](s32), -1, [[COPY22]](p0), amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19
;
; GFX10-LABEL: name: retry_vgpr_alloc.v20i32
; GFX10: bb.1 (%ir-block.1):
@@ -250,9 +253,9 @@ define amdgpu_cs_chain_preserve void @retry_vgpr_alloc.v20i32(<20 x i32> inreg %
; GFX10-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -4294967296
; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
- ; GFX10-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @retry_vgpr_alloc.v20i32
- ; GFX10-NEXT: [[EVEC:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C]](s32)
- ; GFX10-NEXT: [[EVEC1:%[0-9]+]]:sreg_32(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C1]](s32)
+ ; GFX10-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @retry_vgpr_alloc.v20i32
+ ; GFX10-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C]](s32)
+ ; GFX10-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C1]](s32)
; GFX10-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<20 x s32>), [[C2]](s32)
; GFX10-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.getpc)
; GFX10-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[INT]], [[C3]]
@@ -260,6 +263,9 @@ define amdgpu_cs_chain_preserve void @retry_vgpr_alloc.v20i32(<20 x i32> inreg %
; GFX10-NEXT: [[OR:%[0-9]+]]:_(s64) = disjoint G_OR [[AND]], [[ZEXT]]
; GFX10-NEXT: [[INTTOPTR:%[0-9]+]]:ccr_sgpr_64(p0) = G_INTTOPTR [[OR]](s64)
; GFX10-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sleep), 2
+ ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s32) = COPY [[EVEC1]](s32)
+ ; GFX10-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s32) = COPY [[EVEC]](s32)
+ ; GFX10-NEXT: [[COPY22:%[0-9]+]]:ccr_sgpr_64(p0) = COPY [[GV]](p0)
; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<20 x s32>)
; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
@@ -301,9 +307,9 @@ define amdgpu_cs_chain_preserve void @retry_vgpr_alloc.v20i32(<20 x i32> inreg %
; GFX10-NEXT: $sgpr18 = COPY [[INTRINSIC_CONVERGENT18]](s32)
; GFX10-NEXT: [[INTRINSIC_CONVERGENT19:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV19]](s32)
; GFX10-NEXT: $sgpr19 = COPY [[INTRINSIC_CONVERGENT19]](s32)
- ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr48_sgpr49_sgpr50_sgpr51
- ; GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY20]](<4 x s32>)
- ; GFX10-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[INTTOPTR]](p0), 0, 0, [[EVEC1]](s32), [[EVEC]](s32), -1, [[GV]](p0), amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX10-NEXT: [[COPY23:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY23]](<4 x s32>)
+ ; GFX10-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[INTTOPTR]](p0), 0, 0, [[COPY20]](s32), [[COPY21]](s32), -1, [[COPY22]](p0), amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr48_sgpr49_sgpr50_sgpr51
%.i19 = extractelement <20 x i32> %0, i64 19
%.i18 = extractelement <20 x i32> %0, i64 18
%.i17 = extractelement <20 x i32> %0, i64 17
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll
index 77c9b9813571a..f766baf73f9ab 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll
@@ -18,16 +18,16 @@ define amdgpu_cs_chain void @dynamic_vgprs(i32 inreg %exec, <3 x i32> inreg %sgp
; GISEL-GFX12-NEXT: s_mov_b32 s0, s1
; GISEL-GFX12-NEXT: s_mov_b32 s1, s2
; GISEL-GFX12-NEXT: s_mov_b32 s2, s3
-; GISEL-GFX12-NEXT: s_mov_b32 s6, callee at abs32@lo
-; GISEL-GFX12-NEXT: s_mov_b32 s7, callee at abs32@hi
-; GISEL-GFX12-NEXT: s_mov_b32 s8, retry_vgpr_alloc at abs32@lo
-; GISEL-GFX12-NEXT: s_mov_b32 s9, retry_vgpr_alloc at abs32@hi
+; GISEL-GFX12-NEXT: s_mov_b32 s6, retry_vgpr_alloc at abs32@lo
+; GISEL-GFX12-NEXT: s_mov_b32 s7, retry_vgpr_alloc at abs32@hi
+; GISEL-GFX12-NEXT: s_mov_b32 s8, callee at abs32@lo
+; GISEL-GFX12-NEXT: s_mov_b32 s9, callee at abs32@hi
; GISEL-GFX12-NEXT: s_alloc_vgpr s4
; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
-; GISEL-GFX12-NEXT: s_cselect_b64 s[6:7], s[6:7], s[8:9]
+; GISEL-GFX12-NEXT: s_cselect_b64 s[8:9], s[8:9], s[6:7]
; GISEL-GFX12-NEXT: s_cselect_b32 exec_lo, s5, -1
; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
-; GISEL-GFX12-NEXT: s_setpc_b64 s[6:7]
+; GISEL-GFX12-NEXT: s_setpc_b64 s[8:9]
;
; DAGISEL-GFX12-LABEL: dynamic_vgprs:
; DAGISEL-GFX12: ; %bb.0:
@@ -95,3 +95,67 @@ define amdgpu_cs_chain void @constants(<3 x i32> inreg %sgpr, { i32, ptr addrspa
}
declare amdgpu_cs_chain_preserve void @retry_vgpr_alloc(<3 x i32> inreg %sgpr)
+
+define amdgpu_cs_chain_preserve void @retry_vgpr_alloc.v20i32(<20 x i32> inreg %0) #10 {
+; GISEL-GFX12-LABEL: retry_vgpr_alloc.v20i32:
+; GISEL-GFX12: ; %bb.0:
+; GISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-NEXT: s_wait_expcnt 0x0
+; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0
+; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
+; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0
+; GISEL-GFX12-NEXT: s_getpc_b64 s[20:21]
+; GISEL-GFX12-NEXT: s_mov_b32 s22, 0
+; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; GISEL-GFX12-NEXT: s_sext_i32_i16 s21, s21
+; GISEL-GFX12-NEXT: s_mov_b32 s23, -1
+; GISEL-GFX12-NEXT: s_mov_b32 s24, s17
+; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; GISEL-GFX12-NEXT: s_and_b64 s[20:21], s[20:21], s[22:23]
+; GISEL-GFX12-NEXT: s_mov_b32 s25, 0
+; GISEL-GFX12-NEXT: s_mov_b32 s22, retry_vgpr_alloc.v20i32 at abs32@lo
+; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; GISEL-GFX12-NEXT: s_or_b64 s[20:21], s[20:21], s[24:25]
+; GISEL-GFX12-NEXT: s_mov_b32 s23, retry_vgpr_alloc.v20i32 at abs32@hi
+; GISEL-GFX12-NEXT: s_sleep 2
+; GISEL-GFX12-NEXT: s_alloc_vgpr s19
+; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; GISEL-GFX12-NEXT: s_cselect_b64 s[20:21], s[20:21], s[22:23]
+; GISEL-GFX12-NEXT: s_cselect_b32 exec_lo, s18, -1
+; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; GISEL-GFX12-NEXT: s_setpc_b64 s[20:21]
+;
+; DAGISEL-GFX12-LABEL: retry_vgpr_alloc.v20i32:
+; DAGISEL-GFX12: ; %bb.0:
+; DAGISEL-GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_expcnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_samplecnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
+; DAGISEL-GFX12-NEXT: s_wait_kmcnt 0x0
+; DAGISEL-GFX12-NEXT: s_getpc_b64 s[24:25]
+; DAGISEL-GFX12-NEXT: s_mov_b32 s20, s17
+; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; DAGISEL-GFX12-NEXT: s_sext_i32_i16 s25, s25
+; DAGISEL-GFX12-NEXT: s_mov_b32 s23, retry_vgpr_alloc.v20i32 at abs32@hi
+; DAGISEL-GFX12-NEXT: s_mov_b32 s22, retry_vgpr_alloc.v20i32 at abs32@lo
+; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; DAGISEL-GFX12-NEXT: s_mov_b32 s21, s25
+; DAGISEL-GFX12-NEXT: s_sleep 2
+; DAGISEL-GFX12-NEXT: s_alloc_vgpr s19
+; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; DAGISEL-GFX12-NEXT: s_cselect_b64 s[20:21], s[20:21], s[22:23]
+; DAGISEL-GFX12-NEXT: s_cselect_b32 exec_lo, s18, -1
+; DAGISEL-GFX12-NEXT: s_wait_alu 0xfffe
+; DAGISEL-GFX12-NEXT: s_setpc_b64 s[20:21]
+ %.i19 = extractelement <20 x i32> %0, i64 19
+ %.i18 = extractelement <20 x i32> %0, i64 18
+ %.i17 = extractelement <20 x i32> %0, i64 17
+ %2 = call i64 @llvm.amdgcn.s.getpc()
+ %3 = and i64 %2, -4294967296
+ %4 = zext i32 %.i17 to i64
+ %5 = or disjoint i64 %3, %4
+ %6 = inttoptr i64 %5 to ptr
+ call void @llvm.amdgcn.s.sleep(i32 2)
+ call void (ptr, i32, <20 x i32>, {}, i32, ...) @llvm.amdgcn.cs.chain.p0.i32.v20i32.sl_s(ptr inreg %6, i32 inreg %.i18, <20 x i32> inreg %0, {} poison, i32 1, i32 %.i19, i32 -1, ptr nonnull @retry_vgpr_alloc.v20i32)
+ unreachable
+}
diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-intrinsic-dyn-vgpr-w32.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-intrinsic-dyn-vgpr-w32.ll
index 4e040748a34d8..e160072c1cebd 100644
--- a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-intrinsic-dyn-vgpr-w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-intrinsic-dyn-vgpr-w32.ll
@@ -19,28 +19,28 @@ define amdgpu_cs_chain void @direct_callee_direct_fallback(<3 x i32> inreg %sgpr
; GISEL-GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
- ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GISEL-GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @retry_vgpr_alloc
+ ; GISEL-GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @retry_vgpr_alloc
+ ; GISEL-GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GISEL-GFX12-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+ ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
; GISEL-GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GISEL-GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
- ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GISEL-GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
; GISEL-GFX12-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GISEL-GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
- ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
; GISEL-GFX12-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX12-NEXT: $vgpr8 = COPY [[COPY3]]
; GISEL-GFX12-NEXT: $vgpr9 = COPY [[COPY4]]
; GISEL-GFX12-NEXT: $vgpr10 = COPY [[COPY5]]
; GISEL-GFX12-NEXT: $vgpr11 = COPY [[COPY6]]
- ; GISEL-GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee
- ; GISEL-GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee
- ; GISEL-GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX12-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX12-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @retry_vgpr_alloc
- ; GISEL-GFX12-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @retry_vgpr_alloc
+ ; GISEL-GFX12-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee
+ ; GISEL-GFX12-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee
; GISEL-GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1
; GISEL-GFX12-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]]
- ; GISEL-GFX12-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[COPY10]], 0, 0, 15, 64, -1, [[COPY11]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
+ ; GISEL-GFX12-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[COPY11]], 0, 0, 15, 64, -1, [[COPY7]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
;
; DAGISEL-GFX12-LABEL: name: direct_callee_direct_fallback
; DAGISEL-GFX12: bb.0 (%ir-block.0):
@@ -94,24 +94,24 @@ define amdgpu_cs_chain void @indirect_callee_direct_fallback(i32 inreg %exec, pt
; GISEL-GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr11
; GISEL-GFX12-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GISEL-GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
- ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
+ ; GISEL-GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @retry_vgpr_alloc
+ ; GISEL-GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @retry_vgpr_alloc
+ ; GISEL-GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GISEL-GFX12-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]]
+ ; GISEL-GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
+ ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
; GISEL-GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GISEL-GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
- ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
+ ; GISEL-GFX12-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+ ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
; GISEL-GFX12-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GISEL-GFX12-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
+ ; GISEL-GFX12-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
; GISEL-GFX12-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX12-NEXT: $vgpr8 = COPY [[COPY6]]
; GISEL-GFX12-NEXT: $vgpr9 = COPY [[COPY7]]
; GISEL-GFX12-NEXT: $vgpr10 = COPY [[COPY8]]
; GISEL-GFX12-NEXT: $vgpr11 = COPY [[COPY9]]
- ; GISEL-GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @retry_vgpr_alloc
- ; GISEL-GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @retry_vgpr_alloc
- ; GISEL-GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX12-NEXT: [[COPY14:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]]
- ; GISEL-GFX12-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[REG_SEQUENCE]], 0, 0, [[COPY]], [[COPY10]], -1, [[COPY14]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
+ ; GISEL-GFX12-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[REG_SEQUENCE]], 0, 0, [[COPY]], [[COPY10]], -1, [[COPY11]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
;
; DAGISEL-GFX12-LABEL: name: indirect_callee_direct_fallback
; DAGISEL-GFX12: bb.0 (%ir-block.0):
@@ -165,7 +165,7 @@ define amdgpu_cs_chain void @direct_callee_indirect_fallback(i32 inreg %exec, pt
; GISEL-GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GISEL-GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GISEL-GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
- ; GISEL-GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GISEL-GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GISEL-GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3
; GISEL-GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GISEL-GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5
@@ -174,14 +174,15 @@ define amdgpu_cs_chain void @direct_callee_indirect_fallback(i32 inreg %exec, pt
; GISEL-GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr11
; GISEL-GFX12-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6
- ; GISEL-GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
- ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
+ ; GISEL-GFX12-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
+ ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
; GISEL-GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GISEL-GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
- ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
+ ; GISEL-GFX12-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+ ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
; GISEL-GFX12-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GISEL-GFX12-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
- ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
+ ; GISEL-GFX12-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
; GISEL-GFX12-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX12-NEXT: $vgpr8 = COPY [[COPY6]]
; GISEL-GFX12-NEXT: $vgpr9 = COPY [[COPY7]]
@@ -190,8 +191,8 @@ define amdgpu_cs_chain void @direct_callee_indirect_fallback(i32 inreg %exec, pt
; GISEL-GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee
; GISEL-GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee
; GISEL-GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX12-NEXT: [[COPY14:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]]
- ; GISEL-GFX12-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[COPY14]], 0, 0, [[COPY]], [[COPY10]], -1, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
+ ; GISEL-GFX12-NEXT: [[COPY15:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]]
+ ; GISEL-GFX12-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[COPY15]], 0, 0, [[COPY]], [[COPY10]], -1, [[COPY11]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
;
; DAGISEL-GFX12-LABEL: name: direct_callee_indirect_fallback
; DAGISEL-GFX12: bb.0 (%ir-block.0):
@@ -243,7 +244,7 @@ define amdgpu_cs_chain void @indirect_callee_indirect_fallback(i32 inreg %exec,
; GISEL-GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
; GISEL-GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GISEL-GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5
- ; GISEL-GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GISEL-GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; GISEL-GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GISEL-GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr7
; GISEL-GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr8
@@ -252,20 +253,21 @@ define amdgpu_cs_chain void @indirect_callee_indirect_fallback(i32 inreg %exec,
; GISEL-GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY $vgpr11
; GISEL-GFX12-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY $sgpr9
- ; GISEL-GFX12-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
- ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
+ ; GISEL-GFX12-NEXT: [[COPY14:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]]
+ ; GISEL-GFX12-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
+ ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
; GISEL-GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GISEL-GFX12-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
- ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec
+ ; GISEL-GFX12-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
+ ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
; GISEL-GFX12-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GISEL-GFX12-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
- ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec
+ ; GISEL-GFX12-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
+ ; GISEL-GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec
; GISEL-GFX12-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX12-NEXT: $vgpr8 = COPY [[COPY9]]
; GISEL-GFX12-NEXT: $vgpr9 = COPY [[COPY10]]
; GISEL-GFX12-NEXT: $vgpr10 = COPY [[COPY11]]
; GISEL-GFX12-NEXT: $vgpr11 = COPY [[COPY12]]
- ; GISEL-GFX12-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[REG_SEQUENCE]], 0, 0, [[COPY]], [[COPY13]], [[COPY1]], [[REG_SEQUENCE1]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
+ ; GISEL-GFX12-NEXT: SI_CS_CHAIN_TC_W32_DVGPR [[REG_SEQUENCE]], 0, 0, [[COPY]], [[COPY13]], [[COPY1]], [[COPY14]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
;
; DAGISEL-GFX12-LABEL: name: indirect_callee_indirect_fallback
; DAGISEL-GFX12: bb.0 (%ir-block.0):
>From 07e2090d07d66fbaab138c405a8015197ec991de Mon Sep 17 00:00:00 2001
From: Ana Mihajlovic <Ana.Mihajlovic at amd.com>
Date: Fri, 14 Mar 2025 13:48:37 +0100
Subject: [PATCH 3/3] update test
---
.../AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll
index f766baf73f9ab..e9ad1f39cf09b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll
@@ -62,16 +62,16 @@ define amdgpu_cs_chain void @constants(<3 x i32> inreg %sgpr, { i32, ptr addrspa
; GISEL-GFX12-NEXT: s_wait_samplecnt 0x0
; GISEL-GFX12-NEXT: s_wait_bvhcnt 0x0
; GISEL-GFX12-NEXT: s_wait_kmcnt 0x0
-; GISEL-GFX12-NEXT: s_mov_b32 s4, callee at abs32@lo
-; GISEL-GFX12-NEXT: s_mov_b32 s5, callee at abs32@hi
-; GISEL-GFX12-NEXT: s_mov_b32 s6, retry_vgpr_alloc at abs32@lo
-; GISEL-GFX12-NEXT: s_mov_b32 s7, retry_vgpr_alloc at abs32@hi
+; GISEL-GFX12-NEXT: s_mov_b32 s4, retry_vgpr_alloc at abs32@lo
+; GISEL-GFX12-NEXT: s_mov_b32 s5, retry_vgpr_alloc at abs32@hi
+; GISEL-GFX12-NEXT: s_mov_b32 s6, callee at abs32@lo
+; GISEL-GFX12-NEXT: s_mov_b32 s7, callee at abs32@hi
; GISEL-GFX12-NEXT: s_alloc_vgpr 64
; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
-; GISEL-GFX12-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
+; GISEL-GFX12-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5]
; GISEL-GFX12-NEXT: s_cselect_b32 exec_lo, 7, -1
; GISEL-GFX12-NEXT: s_wait_alu 0xfffe
-; GISEL-GFX12-NEXT: s_setpc_b64 s[4:5]
+; GISEL-GFX12-NEXT: s_setpc_b64 s[6:7]
;
; DAGISEL-GFX12-LABEL: constants:
; DAGISEL-GFX12: ; %bb.0:
More information about the llvm-branch-commits
mailing list