[llvm] [AMDGPU] Fix @llvm.amdgcn.cs.chain with SGPR args not provably uniform (PR #114232)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 30 06:53:33 PDT 2024
https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/114232
>From 68772151ee682fd222e5004336afe9b9ce07a2bc Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Wed, 30 Oct 2024 13:42:37 +0000
Subject: [PATCH 1/2] [AMDGPU] Fix @llvm.amdgcn.cs.chain with SGPR args not
provably uniform
The correct behaviour is to insert a readfirstlane. SelectionDAG was
already doing this in some cases, but not in the general case for chain
calls. GlobalISel was already doing this for return values but not for
arguments.
---
llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 7 -
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 10 +-
.../irtranslator-amdgcn-cs-chain.ll | 36 ++-
.../GlobalISel/irtranslator-call-non-fixed.ll | 9 +-
.../AMDGPU/GlobalISel/irtranslator-call.ll | 96 ++++---
.../test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll | 73 ++++++
.../isel-amdgcn-cs-chain-intrinsic-w32.ll | 236 ++++++++++++------
.../isel-amdgcn-cs-chain-intrinsic-w64.ll | 236 ++++++++++++------
8 files changed, 496 insertions(+), 207 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 351e9f25e29cfc..ab62e530a18d0c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -230,13 +230,6 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
return AddrReg.getReg(0);
}
- void assignValueToReg(Register ValVReg, Register PhysReg,
- const CCValAssign &VA) override {
- MIB.addUse(PhysReg, RegState::Implicit);
- Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
- MIRBuilder.buildCopy(PhysReg, ExtReg);
- }
-
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
const MachinePointerInfo &MPO,
const CCValAssign &VA) override {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 52ca38aca5c771..bc2b1bb4525fd0 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3855,10 +3855,14 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
unsigned ArgIdx = 0;
for (auto [Reg, Val] : RegsToPass) {
- if (ArgIdx++ >= NumSpecialInputs && !Val->isDivergent() &&
+ if (ArgIdx++ >= NumSpecialInputs && (IsChainCallConv || !Val->isDivergent()) &&
TRI->isSGPRPhysReg(Reg)) {
- // Speculatively insert a readfirstlane in case this is a uniform value in
- // a VGPR.
+ // For chain calls, the inreg arguments are required to be
+ // uniform. Speculatively Insert a readfirstlane in case we cannot prove
+ // they are uniform.
+ //
+ // For other calls, if an inreg arguments is known to be uniform,
+ // speculatively insert a readfirstlane in case it is in a VGPR.
//
// FIXME: We need to execute this in a waterfall loop if it is a divergent
// value, so let that continue to produce invalid code.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll
index 3438cbdd476d85..4b0ff1b2eb4704 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll
@@ -24,9 +24,12 @@ define amdgpu_cs_chain void @chain_call(<3 x i32> inreg %sgpr, { i32, ptr addrsp
; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX11-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee
; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
- ; GFX11-NEXT: $sgpr0 = COPY [[UV]](s32)
- ; GFX11-NEXT: $sgpr1 = COPY [[UV1]](s32)
- ; GFX11-NEXT: $sgpr2 = COPY [[UV2]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; GFX11-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; GFX11-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32)
+ ; GFX11-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32)
; GFX11-NEXT: $vgpr8 = COPY [[COPY3]](s32)
; GFX11-NEXT: $vgpr9 = COPY [[COPY4]](p5)
; GFX11-NEXT: $vgpr10 = COPY [[COPY5]](s32)
@@ -50,9 +53,12 @@ define amdgpu_cs_chain void @chain_call(<3 x i32> inreg %sgpr, { i32, ptr addrsp
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee
; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
- ; GFX10-NEXT: $sgpr0 = COPY [[UV]](s32)
- ; GFX10-NEXT: $sgpr1 = COPY [[UV1]](s32)
- ; GFX10-NEXT: $sgpr2 = COPY [[UV2]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; GFX10-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32)
+ ; GFX10-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32)
; GFX10-NEXT: $vgpr8 = COPY [[COPY3]](s32)
; GFX10-NEXT: $vgpr9 = COPY [[COPY4]](p5)
; GFX10-NEXT: $vgpr10 = COPY [[COPY5]](s32)
@@ -82,9 +88,12 @@ define amdgpu_cs_chain void @chain_preserve_call(<3 x i32> inreg %sgpr, { i32, p
; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX11-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee_preserve
; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
- ; GFX11-NEXT: $sgpr0 = COPY [[UV]](s32)
- ; GFX11-NEXT: $sgpr1 = COPY [[UV1]](s32)
- ; GFX11-NEXT: $sgpr2 = COPY [[UV2]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; GFX11-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; GFX11-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
+ ; GFX11-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32)
+ ; GFX11-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32)
; GFX11-NEXT: $vgpr8 = COPY [[COPY3]](s32)
; GFX11-NEXT: $vgpr9 = COPY [[COPY4]](p5)
; GFX11-NEXT: $vgpr10 = COPY [[COPY5]](s32)
@@ -108,9 +117,12 @@ define amdgpu_cs_chain void @chain_preserve_call(<3 x i32> inreg %sgpr, { i32, p
; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX10-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee_preserve
; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
- ; GFX10-NEXT: $sgpr0 = COPY [[UV]](s32)
- ; GFX10-NEXT: $sgpr1 = COPY [[UV1]](s32)
- ; GFX10-NEXT: $sgpr2 = COPY [[UV2]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; GFX10-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
+ ; GFX10-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32)
+ ; GFX10-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32)
; GFX10-NEXT: $vgpr8 = COPY [[COPY3]](s32)
; GFX10-NEXT: $vgpr9 = COPY [[COPY4]](p5)
; GFX10-NEXT: $vgpr10 = COPY [[COPY5]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
index 5effd24a752088..adad38de380d7d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
@@ -50,7 +50,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg
- ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32)
+ ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3
@@ -99,8 +100,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
- ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32)
- ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT1]](s32)
+ ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[LOAD2]](s32)
+ ; CHECK-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index c3694158e7b971..96c3575e3190c0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -942,7 +942,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg
- ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32)
+ ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3
@@ -3984,8 +3985,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
- ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32)
- ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT1]](s32)
+ ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[LOAD2]](s32)
+ ; CHECK-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
@@ -5309,7 +5312,8 @@ define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 {
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
- ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -5354,7 +5358,8 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 {
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[COPY9]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -5402,8 +5407,10 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 {
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
- ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32)
- ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
@@ -5451,8 +5458,10 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 {
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
- ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32)
- ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
@@ -5499,7 +5508,8 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 {
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
- ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -5546,7 +5556,8 @@ define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
- ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -5591,7 +5602,8 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 {
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[COPY9]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -5639,8 +5651,10 @@ define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 {
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
- ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32)
- ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
@@ -5685,7 +5699,9 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[COPY9]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -5738,8 +5754,12 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF]](s16)
; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s16>)
- ; CHECK-NEXT: $sgpr0 = COPY [[UV7]](<2 x s16>)
- ; CHECK-NEXT: $sgpr1 = COPY [[UV8]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST1]](s32)
+ ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
@@ -5787,8 +5807,12 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
- ; CHECK-NEXT: $sgpr0 = COPY [[UV]](<2 x s16>)
- ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST1]](s32)
+ ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
@@ -5836,8 +5860,10 @@ define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 {
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p0)
- ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32)
- ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
@@ -5885,8 +5911,10 @@ define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1)
- ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32)
- ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
@@ -5931,7 +5959,9 @@ define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg)
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[COPY9]](p3)
+ ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY9]](p3)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[PTRTOINT]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -5983,10 +6013,14 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x p1>)
- ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32)
- ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32)
- ; CHECK-NEXT: $sgpr2 = COPY [[UV2]](s32)
- ; CHECK-NEXT: $sgpr3 = COPY [[UV3]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32)
+ ; CHECK-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV3]](s32)
+ ; CHECK-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](s32)
; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY13]](p4)
@@ -6034,8 +6068,10 @@ define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inre
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x p5>)
- ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32)
- ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll
index 06f66e05d6747e..8ca3e8255b6340 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll
@@ -501,6 +501,79 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %a, <3 x i32> %b) {
unreachable
}
+; Chain call with SGPR arguments that we cannot prove are uniform.
+define amdgpu_cs void @cs_to_chain_nonuniform(<3 x i32> %a, <3 x i32> %b) {
+; GISEL-GFX11-LABEL: cs_to_chain_nonuniform:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: v_readfirstlane_b32 s0, v0
+; GISEL-GFX11-NEXT: v_readfirstlane_b32 s1, v1
+; GISEL-GFX11-NEXT: v_readfirstlane_b32 s2, v2
+; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
+; GISEL-GFX11-NEXT: v_mov_b32_e32 v10, v5
+; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee at abs32@lo
+; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee at abs32@hi
+; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
+; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
+;
+; GISEL-GFX10-LABEL: cs_to_chain_nonuniform:
+; GISEL-GFX10: ; %bb.0:
+; GISEL-GFX10-NEXT: s_getpc_b64 s[100:101]
+; GISEL-GFX10-NEXT: s_mov_b32 s100, s0
+; GISEL-GFX10-NEXT: v_readfirstlane_b32 s1, v1
+; GISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10
+; GISEL-GFX10-NEXT: v_readfirstlane_b32 s2, v2
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v4
+; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v5
+; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee at abs32@lo
+; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee at abs32@hi
+; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-GFX10-NEXT: s_bitset0_b32 s103, 21
+; GISEL-GFX10-NEXT: s_add_u32 s100, s100, s0
+; GISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0
+; GISEL-GFX10-NEXT: v_readfirstlane_b32 s0, v0
+; GISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101]
+; GISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103]
+; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
+; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
+;
+; DAGISEL-GFX11-LABEL: cs_to_chain_nonuniform:
+; DAGISEL-GFX11: ; %bb.0:
+; DAGISEL-GFX11-NEXT: v_readfirstlane_b32 s0, v0
+; DAGISEL-GFX11-NEXT: v_readfirstlane_b32 s1, v1
+; DAGISEL-GFX11-NEXT: v_readfirstlane_b32 s2, v2
+; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4
+; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v10, v5
+; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee at abs32@hi
+; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee at abs32@lo
+; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1
+; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5]
+;
+; DAGISEL-GFX10-LABEL: cs_to_chain_nonuniform:
+; DAGISEL-GFX10: ; %bb.0:
+; DAGISEL-GFX10-NEXT: s_getpc_b64 s[100:101]
+; DAGISEL-GFX10-NEXT: s_mov_b32 s100, s0
+; DAGISEL-GFX10-NEXT: v_readfirstlane_b32 s1, v1
+; DAGISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10
+; DAGISEL-GFX10-NEXT: v_readfirstlane_b32 s2, v2
+; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3
+; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v4
+; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v5
+; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee at abs32@hi
+; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee at abs32@lo
+; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; DAGISEL-GFX10-NEXT: s_bitset0_b32 s103, 21
+; DAGISEL-GFX10-NEXT: s_add_u32 s100, s100, s0
+; DAGISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0
+; DAGISEL-GFX10-NEXT: v_readfirstlane_b32 s0, v0
+; DAGISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101]
+; DAGISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103]
+; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1
+; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5]
+ call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
+ unreachable
+}
+
define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %a, <3 x i32> %b) {
; GISEL-GFX11-LABEL: chain_to_chain:
; GISEL-GFX11: ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll
index 469d0453b9dfb1..75616d276754c7 100644
--- a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll
@@ -20,9 +20,15 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad
; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]]
- ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]]
- ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]]
; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]]
; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]]
@@ -30,8 +36,8 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad
; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee
; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee
; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
+ ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY10]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
;
; GISEL-GFX10-LABEL: name: chain_to_chain
; GISEL-GFX10: bb.1 (%ir-block.0):
@@ -44,20 +50,26 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad
; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]]
- ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]]
- ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]]
; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]]
; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]]
; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]]
- ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
- ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]]
+ ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]]
; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee
; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee
; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
;
; DAGISEL-GFX11-LABEL: name: chain_to_chain
; DAGISEL-GFX11: bb.0 (%ir-block.0):
@@ -136,9 +148,15 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5
; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]]
- ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]]
- ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]]
; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]]
; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]]
@@ -146,8 +164,8 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5
; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee
; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee
; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
+ ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY10]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
;
; GISEL-GFX10-LABEL: name: cs_to_chain
; GISEL-GFX10: bb.1 (%ir-block.0):
@@ -160,20 +178,26 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5
; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]]
- ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]]
- ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]]
; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]]
; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]]
; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]]
- ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
- ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]]
+ ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
+ ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]]
; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee
; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee
; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
;
; DAGISEL-GFX11-LABEL: name: cs_to_chain
; DAGISEL-GFX11: bb.0 (%ir-block.0):
@@ -252,9 +276,15 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3
; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]]
- ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]]
- ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]]
; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]]
; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]]
@@ -262,8 +292,8 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3
; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve
; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve
; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
+ ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY10]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
;
; GISEL-GFX10-LABEL: name: chain_to_chain_preserve
; GISEL-GFX10: bb.1 (%ir-block.0):
@@ -276,20 +306,26 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3
; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]]
- ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]]
- ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]]
; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]]
; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]]
; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]]
- ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
- ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]]
+ ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]]
; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve
; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve
; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
;
; DAGISEL-GFX11-LABEL: name: chain_to_chain_preserve
; DAGISEL-GFX11: bb.0 (%ir-block.0):
@@ -368,9 +404,15 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad
; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]]
- ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]]
- ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]]
; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]]
; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]]
@@ -378,8 +420,8 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad
; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve
; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve
; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
+ ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY10]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
;
; GISEL-GFX10-LABEL: name: cs_to_chain_preserve
; GISEL-GFX10: bb.1 (%ir-block.0):
@@ -392,20 +434,26 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad
; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]]
- ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]]
- ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]]
; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]]
; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]]
; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]]
- ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
- ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]]
+ ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
+ ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]]
; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve
; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve
; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
;
; DAGISEL-GFX11-LABEL: name: cs_to_chain_preserve
; DAGISEL-GFX11: bb.0 (%ir-block.0):
@@ -487,9 +535,15 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr,
; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY2]]
- ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY3]]
- ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]]
+ ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY5]]
; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY6]]
; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY7]]
@@ -510,15 +564,21 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr,
; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY2]]
- ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY3]]
- ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]]
+ ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY5]]
; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY6]]
; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY7]]
; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY8]]
- ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
- ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY9]]
+ ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY12]]
; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[REG_SEQUENCE]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
;
; DAGISEL-GFX11-LABEL: name: indirect
@@ -613,9 +673,15 @@ define amdgpu_cs_chain void @non_imm_exec(i32 inreg %exec, <3 x i32> inreg %sgpr
; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY1]]
- ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY2]]
- ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY3]]
+ ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY4]]
; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY5]]
; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY6]]
@@ -623,8 +689,8 @@ define amdgpu_cs_chain void @non_imm_exec(i32 inreg %exec, <3 x i32> inreg %sgpr
; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee
; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee
; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
+ ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
;
; GISEL-GFX10-LABEL: name: non_imm_exec
; GISEL-GFX10: bb.1 (%ir-block.0):
@@ -638,20 +704,26 @@ define amdgpu_cs_chain void @non_imm_exec(i32 inreg %exec, <3 x i32> inreg %sgpr
; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY1]]
- ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY2]]
- ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY3]]
+ ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY4]]
; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY5]]
; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY6]]
; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY7]]
- ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
- ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY8]]
+ ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY11]]
; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee
; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee
; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY9]], @callee, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY12]], @callee, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
;
; DAGISEL-GFX11-LABEL: name: non_imm_exec
; DAGISEL-GFX11: bb.0 (%ir-block.0):
@@ -734,9 +806,15 @@ define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i32 i
; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY3]]
- ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY4]]
- ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY5]]
+ ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY6]]
; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY7]]
; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY8]]
@@ -758,15 +836,21 @@ define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i32 i
; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY3]]
- ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY4]]
- ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY5]]
+ ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY6]]
; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY7]]
; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY8]]
; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY9]]
- ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
- ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]]
+ ; GISEL-GFX10-NEXT: [[COPY13:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY13]]
; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[REG_SEQUENCE]], 0, 0, [[COPY2]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
;
; DAGISEL-GFX11-LABEL: name: indirect_with_non_imm_exec
diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll
index 51c28a02b7f821..6deac9f55f3203 100644
--- a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll
@@ -20,9 +20,15 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad
; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]]
- ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]]
- ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]]
; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]]
; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]]
@@ -30,8 +36,8 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad
; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee
; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee
; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
+ ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
;
; GISEL-GFX10-LABEL: name: chain_to_chain
; GISEL-GFX10: bb.1 (%ir-block.0):
@@ -44,20 +50,26 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad
; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]]
- ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]]
- ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]]
; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]]
; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]]
; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]]
- ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
- ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]]
+ ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]]
; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee
; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee
; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY11]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
;
; DAGISEL-GFX11-LABEL: name: chain_to_chain
; DAGISEL-GFX11: bb.0 (%ir-block.0):
@@ -136,9 +148,15 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5
; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]]
- ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]]
- ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]]
; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]]
; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]]
@@ -146,8 +164,8 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5
; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee
; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee
; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
+ ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
;
; GISEL-GFX10-LABEL: name: cs_to_chain
; GISEL-GFX10: bb.1 (%ir-block.0):
@@ -160,20 +178,26 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5
; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]]
- ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]]
- ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]]
; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]]
; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]]
; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]]
- ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
- ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]]
+ ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
+ ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]]
; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee
; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee
; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY11]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
;
; DAGISEL-GFX11-LABEL: name: cs_to_chain
; DAGISEL-GFX11: bb.0 (%ir-block.0):
@@ -252,9 +276,15 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3
; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]]
- ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]]
- ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]]
; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]]
; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]]
@@ -262,8 +292,8 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3
; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve
; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve
; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
+ ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
;
; GISEL-GFX10-LABEL: name: chain_to_chain_preserve
; GISEL-GFX10: bb.1 (%ir-block.0):
@@ -276,20 +306,26 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3
; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]]
- ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]]
- ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]]
; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]]
; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]]
; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]]
- ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
- ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]]
+ ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]]
; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve
; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve
; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY11]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
;
; DAGISEL-GFX11-LABEL: name: chain_to_chain_preserve
; DAGISEL-GFX11: bb.0 (%ir-block.0):
@@ -368,9 +404,15 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad
; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]]
- ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]]
- ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]]
; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]]
; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]]
@@ -378,8 +420,8 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad
; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve
; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve
; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
+ ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
;
; GISEL-GFX10-LABEL: name: cs_to_chain_preserve
; GISEL-GFX10: bb.1 (%ir-block.0):
@@ -392,20 +434,26 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad
; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]]
- ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]]
- ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]]
; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]]
; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]]
; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]]
- ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
- ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]]
+ ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103
+ ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]]
; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve
; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve
; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
- ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]]
+ ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY11]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
;
; DAGISEL-GFX11-LABEL: name: cs_to_chain_preserve
; DAGISEL-GFX11: bb.0 (%ir-block.0):
@@ -487,9 +535,15 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr,
; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY2]]
- ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY3]]
- ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]]
+ ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY5]]
; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY6]]
; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY7]]
@@ -510,15 +564,21 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr,
; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY2]]
- ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY3]]
- ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]]
+ ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY5]]
; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY6]]
; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY7]]
; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY8]]
- ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
- ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY9]]
+ ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY12]]
; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[REG_SEQUENCE]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
;
; DAGISEL-GFX11-LABEL: name: indirect
@@ -615,9 +675,15 @@ define amdgpu_cs_chain void @non_imm_exec(i64 inreg %exec, <3 x i32> inreg %sgpr
; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY2]]
- ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY3]]
- ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]]
+ ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY5]]
; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY6]]
; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY7]]
@@ -625,8 +691,8 @@ define amdgpu_cs_chain void @non_imm_exec(i64 inreg %exec, <3 x i32> inreg %sgpr
; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee
; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee
; GISEL-GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]]
- ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY9]], @callee, 0, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
+ ; GISEL-GFX11-NEXT: [[COPY12:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]]
+ ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY12]], @callee, 0, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
;
; GISEL-GFX10-LABEL: name: non_imm_exec
; GISEL-GFX10: bb.1 (%ir-block.0):
@@ -642,20 +708,26 @@ define amdgpu_cs_chain void @non_imm_exec(i64 inreg %exec, <3 x i32> inreg %sgpr
; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY2]]
- ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY3]]
- ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]]
+ ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY5]]
; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY6]]
; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY7]]
; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY8]]
- ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
- ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY9]]
+ ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY12]]
; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee
; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee
; GISEL-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]]
- ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee, 0, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]]
+ ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY13]], @callee, 0, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
;
; DAGISEL-GFX11-LABEL: name: non_imm_exec
; DAGISEL-GFX11: bb.0 (%ir-block.0):
@@ -744,9 +816,15 @@ define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i64 i
; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY4]]
- ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]]
- ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY6]]
+ ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX11-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
+ ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
+ ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY7]]
; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY8]]
; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY9]]
@@ -770,15 +848,21 @@ define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i64 i
; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr9
; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr10
; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr11
- ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY4]]
- ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]]
- ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY6]]
+ ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GISEL-GFX10-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY6]]
+ ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
+ ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY7]]
; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY8]]
; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY9]]
; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY10]]
- ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
- ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY11]]
+ ; GISEL-GFX10-NEXT: [[COPY14:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51
+ ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY14]]
; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[REG_SEQUENCE]], 0, 0, [[REG_SEQUENCE1]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
;
; DAGISEL-GFX11-LABEL: name: indirect_with_non_imm_exec
>From f9a0c8baaec2ac7de543b1b96d8623bcad00973e Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Wed, 30 Oct 2024 13:53:20 +0000
Subject: [PATCH 2/2] clang-format
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index bc2b1bb4525fd0..059b415b75ff1b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3855,8 +3855,8 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
unsigned ArgIdx = 0;
for (auto [Reg, Val] : RegsToPass) {
- if (ArgIdx++ >= NumSpecialInputs && (IsChainCallConv || !Val->isDivergent()) &&
- TRI->isSGPRPhysReg(Reg)) {
+ if (ArgIdx++ >= NumSpecialInputs &&
+ (IsChainCallConv || !Val->isDivergent()) && TRI->isSGPRPhysReg(Reg)) {
// For chain calls, the inreg arguments are required to be
// uniform. Speculatively Insert a readfirstlane in case we cannot prove
// they are uniform.
More information about the llvm-commits
mailing list