[llvm-branch-commits] [llvm] [AMDGPU] Fix caller/callee mismatch in SGPR assignment for inreg args (PR #182754)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Feb 22 08:51:59 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: Shilei Tian (shiltian)
<details>
<summary>Changes</summary>
On the callee side, `LowerFormalArguments` marks SGPR0-3 as allocated in
`CCState` before running the CC analysis. On the caller side, `LowerCall` (and
GlobalISel's `lowerCall`/`lowerTailCall`) added the scratch resource to
`RegsToPass` without marking it in `CCState`. This caused `CC_AMDGPU_Func` to
treat SGPR0-3 as available on the caller side, assigning user inreg args there,
while the callee skipped them without marking it in `CCState`. This caused
`CC_AMDGPU_Func` to treat SGPR0-3 as available on the caller side, assigning
user inreg args there, while the callee skipped them.
---
Patch is 83.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/182754.diff
11 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp (+12)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+5)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll (+41-41)
- (modified) llvm/test/CodeGen/AMDGPU/call-args-inreg-bfloat.ll (-2)
- (modified) llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll (+84-2)
- (modified) llvm/test/CodeGen/AMDGPU/call-args-inreg.ll (+405-7)
- (modified) llvm/test/CodeGen/AMDGPU/cc-inreg-sgpr0-3-mismatch.ll (+168-39)
- (modified) llvm/test/CodeGen/AMDGPU/function-args-inreg.ll (-3)
- (modified) llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.convergencetokens.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.error.ll (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/tail-call-inreg-arguments.ll (+4-8)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 5c6affdae0c5b..8e464c6168429 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -1399,6 +1399,11 @@ bool AMDGPUCallLowering::lowerTailCall(
return false;
}
+ // Mark the scratch resource descriptor as allocated so the CC analysis
+ // does not assign user arguments to these registers, matching the callee.
+ if (!ST.hasFlatScratchEnabled())
+ CCInfo.AllocateReg(FuncInfo->getScratchRSrcReg());
+
OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg);
if (!determineAssignments(Assigner, OutArgs, CCInfo))
@@ -1598,6 +1603,13 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
return false;
}
+ // Mark the scratch resource descriptor as allocated so the CC analysis
+ // does not assign user arguments to these registers, matching the callee.
+ if (!ST.hasFlatScratchEnabled()) {
+ const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ CCInfo.AllocateReg(FuncInfo->getScratchRSrcReg());
+ }
+
// Do the actual argument marshalling.
OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg);
if (!determineAssignments(Assigner, OutArgs, CCInfo))
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 8533b1bd06d90..bf88005ce8737 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4329,6 +4329,11 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);
}
+ // Mark the scratch resource descriptor as allocated so the CC analysis
+ // does not assign user arguments to these registers, matching the callee.
+ if (!Subtarget->hasFlatScratchEnabled())
+ CCInfo.AllocateReg(Info->getScratchRSrcReg());
+
CCInfo.AnalyzeCallOperands(Outs, AssignFn);
// Get a count of how many bytes are to be pushed on the stack.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index c935310584949..2c1fd5800899a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -5378,7 +5378,7 @@ define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 {
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: $sgpr16 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -5390,7 +5390,7 @@ define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16_inreg, csr_amdgpu, implicit $sgpr16, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_i16_inreg(i16 inreg %arg)
@@ -5424,7 +5424,7 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 {
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: $sgpr16 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -5436,7 +5436,7 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32_inreg, csr_amdgpu, implicit $sgpr16, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_i32_inreg(i32 inreg %arg)
@@ -5473,9 +5473,9 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 {
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: $sgpr16 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
- ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
+ ; CHECK-NEXT: $sgpr17 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
@@ -5487,7 +5487,7 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i64_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i64_inreg, csr_amdgpu, implicit $sgpr16, implicit $sgpr17, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_i64_inreg(i64 inreg %arg)
@@ -5524,9 +5524,9 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 {
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: $sgpr16 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
- ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
+ ; CHECK-NEXT: $sgpr17 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
@@ -5538,7 +5538,7 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i32_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i32_inreg, csr_amdgpu, implicit $sgpr16, implicit $sgpr17, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_v2i32_inreg(<2 x i32> inreg %arg)
@@ -5574,7 +5574,7 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 {
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: $sgpr16 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -5586,7 +5586,7 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f16_inreg, csr_amdgpu, implicit $sgpr16, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_f16_inreg(half inreg %arg)
@@ -5634,7 +5634,7 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 {
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: $sgpr16 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -5646,7 +5646,7 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f32_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f32_inreg, csr_amdgpu, implicit $sgpr16, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_f32_inreg(float inreg %arg)
@@ -5683,9 +5683,9 @@ define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 {
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: $sgpr16 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
- ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
+ ; CHECK-NEXT: $sgpr17 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
@@ -5697,7 +5697,7 @@ define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f64_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f64_inreg, csr_amdgpu, implicit $sgpr16, implicit $sgpr17, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_f64_inreg(double inreg %arg)
@@ -5732,7 +5732,7 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: $sgpr16 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -5744,7 +5744,7 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f16_inreg, csr_amdgpu, implicit $sgpr16, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_v2f16_inreg(<2 x half> inreg %arg)
@@ -5787,10 +5787,10 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0
; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s16>)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32)
- ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
+ ; CHECK-NEXT: $sgpr16 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST1]](s32)
- ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
+ ; CHECK-NEXT: $sgpr17 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
@@ -5802,7 +5802,7 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0
; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f16_inreg, csr_amdgpu, implicit $sgpr16, implicit $sgpr17, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_v3f16_inreg(<3 x half> inreg %arg)
@@ -5840,10 +5840,10 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/182754
More information about the llvm-branch-commits
mailing list