[llvm] [AMDGPU][GISel] Add RegBankLegalize support for G_SI_CALL (PR #165747)
Chinmay Deshpande via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 11 10:10:30 PST 2026
https://github.com/chinmaydd updated https://github.com/llvm/llvm-project/pull/165747
>From bc43a5c729c286f1da602f62e35c11212fe378ea Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <ChinmayDiwakar.Deshpande at amd.com>
Date: Mon, 27 Oct 2025 15:48:09 -0400
Subject: [PATCH 1/8] [AMDGPU][GISel] Add RegBankLegalize support for G_SI_CALL
Change-Id: If01f1920f7ea0550e56f6a295a1bf4aa3c42a851
---
.../AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 15 ++
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 8 +
.../AMDGPU/AMDGPURegBankLegalizeRules.h | 4 +-
.../GlobalISel/irtranslator-assert-align.ll | 2 +-
.../irtranslator-call-abi-attribute-hints.ll | 2 +-
.../irtranslator-call-implicit-args.ll | 4 +-
.../GlobalISel/irtranslator-call-non-fixed.ll | 2 +-
.../irtranslator-call-return-values.ll | 2 +-
.../GlobalISel/irtranslator-call-sret.ll | 2 +-
.../AMDGPU/GlobalISel/irtranslator-call.ll | 2 +-
.../GlobalISel/irtranslator-indirect-call.ll | 2 +-
.../GlobalISel/irtranslator-sibling-call.ll | 2 +-
.../localizer-wrong-insert-point.mir | 2 +-
.../regbankselect-waterfall-call.mir | 215 ++++++++++++++++++
.../test/CodeGen/AMDGPU/convergence-tokens.ll | 2 +-
.../irtranslator-whole-wave-functions.ll | 2 +-
16 files changed, 254 insertions(+), 14 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-call.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index d262f074679a8..3e46dd7f4d2c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -1055,6 +1055,21 @@ bool RegBankLegalizeHelper::lower(MachineInstr &MI,
MI.eraseFromParent();
return true;
+ case WaterfallCall: {
+ SmallSet<Register, 4> SGPROperandRegs;
+ SGPROperandRegs.insert(MI.getOperand(1).getReg());
+
+ MachineBasicBlock::iterator Start(&MI);
+ while (Start->getOpcode() != AMDGPU::ADJCALLSTACKUP)
+ --Start;
+ MachineBasicBlock::iterator End(&MI);
+ while (End->getOpcode() != AMDGPU::ADJCALLSTACKDOWN)
+ ++End;
+ ++End;
+ B.setInsertPt(B.getMBB(), Start);
+
+ executeInWaterfallLoop(B, make_range(Start, End), SGPROperandRegs);
+ break;
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 984ac6a10e84e..1f877cd9db14c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -213,6 +213,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
}
case _:
return true;
+ case PhysReg:
+ return true;
default:
llvm_unreachable("missing matchUniformityAndLLT");
}
@@ -1146,6 +1148,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).Any({{UniP5}, {{SgprP5}, {}}});
+ addRulesForGOpcs({G_SI_CALL})
+ .Any({{PhysReg, UniP0}, {{None}, {SgprP0}}})
+ .Any({{PhysReg, DivP0}, {{None}, {VgprP0}, WaterfallCall}})
+ .Any({{PhysReg, UniP4}, {{None}, {SgprP4}}})
+ .Any({{PhysReg, DivP4}, {{None}, {VgprP4}, WaterfallCall}});
+
bool hasSALUFloat = ST->hasSALUFloatInsts();
addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, Standard)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index eee4f6276b925..eec273e0b2302 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -38,6 +38,7 @@ bool isAnyPtr(LLT Ty, unsigned Width);
// be checked.
enum UniformityLLTOpPredicateID {
_,
+ PhysReg,
// scalars
S1,
S16,
@@ -261,7 +262,8 @@ enum LoweringMethodID {
UnpackAExt,
VerifyAllSgpr,
ApplyAllVgpr,
- UnmergeToShiftTrunc
+ UnmergeToShiftTrunc,
+ WaterfallCall
};
enum FastRulesTypes {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
index 4098f643831f1..ca9d7854fb619 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel -o - %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel -new-reg-bank-select -o - %s | FileCheck %s
; TODO: Could potentially insert it here
define void @arg_align_8(ptr addrspace(1) align 8 %arg0) {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
index 1bf2a589cb597..02ce52b7450f6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
+; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
; Test that we don't insert code to pass implicit arguments we know
; the callee does not need.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
index d69515591ecee..2bb059a0fbb1e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope -check-prefix=GFX900 %s
-; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -o - %s | FileCheck -enable-var-scope -check-prefix=GFX908 %s
+; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope -check-prefix=GFX900 %s
+; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -o - %s | FileCheck -enable-var-scope -check-prefix=GFX908 %s
; Workitem IDs are passed to the kernel differently for gfx908
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
index 6bfd0f060aa20..6694ad741cf48 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
+; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
; amdgpu_gfx calling convention
declare hidden amdgpu_gfx void @external_gfx_void_func_void() #0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 6573088a41fc2..bb6d1f8d161cb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator < %s | FileCheck -check-prefix=GCN %s
declare i1 @external_i1_func_void() #0
declare zeroext i1 @external_i1_zeroext_func_void() #0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
index 070d35a99d62a..0dc9068c1984b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator < %s | FileCheck -check-prefix=GCN %s
declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index c935310584949..efec8f4309013 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -global-isel-abort=2 -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
+; RUN: llc -global-isel -new-reg-bank-select -global-isel-abort=2 -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
declare hidden void @external_void_func_void() #0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
index af9bcc40dc55e..5dc6932837d79 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
+; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -enable-var-scope %s
define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) {
; CHECK-LABEL: name: test_indirect_call_sgpr_ptr
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index 7b2e3bf13c368..24e85aa280bf6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; This is a copy of sibling-call.ll, but stops after the IRTranslator.
define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer-wrong-insert-point.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer-wrong-insert-point.mir
index cbfa1c7c741b8..ed0330957ae9c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer-wrong-insert-point.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer-wrong-insert-point.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs -run-pass=localizer -o - %s | FileCheck %s
+# RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs -run-pass=localizer -o - %s | FileCheck %s
# Previously this was placing the new G_CONSTANT after the use call
---
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-call.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-call.mir
new file mode 100644
index 0000000000000..5207d992ea74d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-call.mir
@@ -0,0 +1,215 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize -o - %s | FileCheck %s
+
+---
+name: waterfall_divergent_call_p0_no_args
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ ; CHECK-LABEL: name: waterfall_divergent_call_p0_no_args
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES %func_ptr(p0)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p0) = G_MERGE_VALUES [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[MV]](p0), %func_ptr
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT2]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .2:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+ ; CHECK-NEXT: %g_ptr:sgpr(p0) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY %g_ptr(p0)
+ ; CHECK-NEXT: %func_ptr:vgpr(p0) = G_LOAD [[COPY]](p0) :: (load (p0))
+ ; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .4:
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3
+ ADJCALLSTACKUP 0, 0, implicit-def $scc
+ %g_ptr:_(p0) = COPY $sgpr0_sgpr1
+ %func_ptr:_(p0) = G_LOAD %g_ptr(p0) :: (load (p0))
+ $sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu
+ ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ S_SETPC_B64_return undef $sgpr2_sgpr3
+
+...
+
+---
+name: waterfall_divergent_call_p4_no_args
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ ; CHECK-LABEL: name: waterfall_divergent_call_p4_no_args
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES %func_ptr(p4)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p4) = G_MERGE_VALUES [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[MV]](p4), %func_ptr
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT2]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .2:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+ ; CHECK-NEXT: %g_ptr:sgpr(p4) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p4) = COPY %g_ptr(p4)
+ ; CHECK-NEXT: %func_ptr:vgpr(p4) = G_LOAD [[COPY]](p4) :: (load (p4))
+ ; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p4), 0, csr_amdgpu
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .4:
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3
+ ADJCALLSTACKUP 0, 0, implicit-def $scc
+ %g_ptr:_(p4) = COPY $sgpr0_sgpr1
+ %func_ptr:_(p4) = G_LOAD %g_ptr(p4) :: (load (p4))
+ $sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu
+ ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ S_SETPC_B64_return undef $sgpr2_sgpr3
+
+...
+
+---
+name: waterfall_divergent_call_p0_with_args
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ ; CHECK-LABEL: name: waterfall_divergent_call_p0_with_args
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES %func_ptr(p0)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p0) = G_MERGE_VALUES [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[MV]](p0), %func_ptr
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT2]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .2:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+ ; CHECK-NEXT: %g_ptr:sgpr(p0) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY %g_ptr(p0)
+ ; CHECK-NEXT: %func_ptr:vgpr(p0) = G_LOAD [[COPY]](p0) :: (load (p0))
+ ; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .4:
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3
+ ADJCALLSTACKUP 0, 0, implicit-def $scc
+ %g_ptr:_(p0) = COPY $sgpr0_sgpr1
+ %func_ptr:_(p0) = G_LOAD %g_ptr(p0) :: (load (p0))
+ $sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0
+ ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ S_SETPC_B64_return undef $sgpr2_sgpr3
+
+...
+
+---
+name: waterfall_divergent_call_p4_with_args
+legalized: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ ; CHECK-LABEL: name: waterfall_divergent_call_p4_with_args
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES %func_ptr(p4)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p4) = G_MERGE_VALUES [[INTRINSIC_CONVERGENT]](s32), [[INTRINSIC_CONVERGENT1]](s32)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[MV]](p4), %func_ptr
+ ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.ballot), [[ICMP]](s1)
+ ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INTRINSIC_CONVERGENT2]](s64), implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .2:
+ ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+ ; CHECK-NEXT: %g_ptr:sgpr(p4) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p4) = COPY %g_ptr(p4)
+ ; CHECK-NEXT: %func_ptr:vgpr(p4) = G_LOAD [[COPY]](p4) :: (load (p4))
+ ; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p4), 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
+ ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .3:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: .4:
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3
+ ADJCALLSTACKUP 0, 0, implicit-def $scc
+ %g_ptr:_(p4) = COPY $sgpr0_sgpr1
+ %func_ptr:_(p4) = G_LOAD %g_ptr(p4) :: (load (p4))
+ $sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0
+ ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ S_SETPC_B64_return undef $sgpr2_sgpr3
+
+...
+
diff --git a/llvm/test/CodeGen/AMDGPU/convergence-tokens.ll b/llvm/test/CodeGen/AMDGPU/convergence-tokens.ll
index 61d102d2222bd..6fe538c01c2c6 100644
--- a/llvm/test/CodeGen/AMDGPU/convergence-tokens.ll
+++ b/llvm/test/CodeGen/AMDGPU/convergence-tokens.ll
@@ -1,6 +1,6 @@
; RUN: llc -stop-after=amdgpu-isel -mtriple=amdgcn-- -mcpu=gfx900 -o - %s | FileCheck --check-prefixes=CHECK,ISEL %s
; RUN: llc -stop-after=dead-mi-elimination -mtriple=amdgcn-- -mcpu=gfx900 -o - %s | FileCheck --check-prefixes=CHECK,DEADMI %s
-; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-- -mcpu=gfx900 -o - %s | FileCheck %s --check-prefixes=CHECK,GISEL
+; RUN: llc -global-isel -new-reg-bank-select -stop-after=irtranslator -mtriple=amdgcn-- -mcpu=gfx900 -o - %s | FileCheck %s --check-prefixes=CHECK,GISEL
; CHECK-LABEL: name: basic_call
; CHECK: [[TOKEN:%[0-9]+]]{{[^ ]*}} = CONVERGENCECTRL_ENTRY
diff --git a/llvm/test/CodeGen/AMDGPU/irtranslator-whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/irtranslator-whole-wave-functions.ll
index 17c8010bcbe05..38610d8b4c410 100644
--- a/llvm/test/CodeGen/AMDGPU/irtranslator-whole-wave-functions.ll
+++ b/llvm/test/CodeGen/AMDGPU/irtranslator-whole-wave-functions.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1200 -stop-after=irtranslator < %s | FileCheck %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn--amdpal -mcpu=gfx1200 -stop-after=irtranslator < %s | FileCheck %s
define amdgpu_gfx_whole_wave i32 @basic_test(i1 %active, i32 %a, i32 %b) {
; CHECK-LABEL: name: basic_test
>From c80e84b38eefb6e73eb28a1d32027d78d882d630 Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <chdeshpa at amd.com>
Date: Mon, 9 Feb 2026 13:39:54 -0500
Subject: [PATCH 2/8] [AMDGPU][GISel] Apply src mappings to divergent pointers
instead
Change-Id: I0df1a64f055bc5f491b6b2e2c10b0b409dd6f084
---
.../AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 42 ++++++++++---------
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 4 +-
.../AMDGPU/AMDGPURegBankLegalizeRules.h | 5 ++-
3 files changed, 28 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 3e46dd7f4d2c5..bea197a80e8e3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -1055,27 +1055,25 @@ bool RegBankLegalizeHelper::lower(MachineInstr &MI,
MI.eraseFromParent();
return true;
- case WaterfallCall: {
- SmallSet<Register, 4> SGPROperandRegs;
- SGPROperandRegs.insert(MI.getOperand(1).getReg());
-
- MachineBasicBlock::iterator Start(&MI);
- while (Start->getOpcode() != AMDGPU::ADJCALLSTACKUP)
- --Start;
- MachineBasicBlock::iterator End(&MI);
- while (End->getOpcode() != AMDGPU::ADJCALLSTACKDOWN)
- ++End;
- ++End;
- B.setInsertPt(B.getMBB(), Start);
-
- executeInWaterfallLoop(B, make_range(Start, End), SGPROperandRegs);
- break;
}
}
if (!WaterfallSgprs.empty()) {
- MachineBasicBlock::iterator I = MI.getIterator();
- if (!executeInWaterfallLoop(B, make_range(I, std::next(I)), WaterfallSgprs))
+ MachineBasicBlock::iterator Start = MI.getIterator();
+ MachineBasicBlock::iterator End = std::next(Start);
+
+ // For calls, the waterfall must wrap the entire call sequence.
+ if (MI.getOpcode() == AMDGPU::G_SI_CALL) {
+ while (Start->getOpcode() != AMDGPU::ADJCALLSTACKUP)
+ --Start;
+ End = std::next(MI.getIterator());
+ while (End->getOpcode() != AMDGPU::ADJCALLSTACKDOWN)
+ ++End;
+ ++End;
+ B.setInsertPt(B.getMBB(), Start);
+ }
+
+ if (!executeInWaterfallLoop(B, make_range(Start, End), WaterfallSgprs))
return false;
}
return true;
@@ -1111,6 +1109,7 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
case Vgpr128:
return LLT::scalar(128);
case SgprP0:
+ case SgprP0_WF:
case VgprP0:
return LLT::pointer(0, 64);
case SgprP1:
@@ -1123,6 +1122,7 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
case VgprP3:
return LLT::pointer(3, 32);
case SgprP4:
+ case SgprP4_WF:
case VgprP4:
return LLT::pointer(4, 64);
case SgprP5:
@@ -1243,10 +1243,12 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
case Sgpr64:
case Sgpr128:
case SgprP0:
+ case SgprP0_WF:
case SgprP1:
case SgprP2:
case SgprP3:
case SgprP4:
+ case SgprP4_WF:
case SgprP5:
case SgprP8:
case SgprPtr32:
@@ -1571,9 +1573,11 @@ bool RegBankLegalizeHelper::applyMappingSrc(
}
break;
}
- // sgpr waterfall, scalars and vectors
+ // sgpr waterfall, scalars, vectors and pointers
case Sgpr32_WF:
- case SgprV4S32_WF: {
+ case SgprV4S32_WF:
+ case SgprP0_WF:
+ case SgprP4_WF: {
assert(Ty == getTyFromID(MethodIDs[i]));
if (RB != SgprRB)
SgprWaterfallOperandRegs.insert(Reg);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 1f877cd9db14c..128d294e8b8b4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1150,9 +1150,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForGOpcs({G_SI_CALL})
.Any({{PhysReg, UniP0}, {{None}, {SgprP0}}})
- .Any({{PhysReg, DivP0}, {{None}, {VgprP0}, WaterfallCall}})
+ .Any({{PhysReg, DivP0}, {{None}, {SgprP0_WF}}})
.Any({{PhysReg, UniP4}, {{None}, {SgprP4}}})
- .Any({{PhysReg, DivP4}, {{None}, {VgprP4}, WaterfallCall}});
+ .Any({{PhysReg, DivP4}, {{None}, {SgprP4_WF}}});
bool hasSALUFloat = ST->hasSALUFloatInsts();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index eec273e0b2302..20c1c4038d76c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -221,6 +221,8 @@ enum RegBankLLTMappingApplyID {
// Src only modifiers: execute in waterfall loop if divergent
Sgpr32_WF,
SgprV4S32_WF,
+ SgprP0_WF,
+ SgprP4_WF,
// Src only modifiers: extends
Sgpr32AExt,
@@ -262,8 +264,7 @@ enum LoweringMethodID {
UnpackAExt,
VerifyAllSgpr,
ApplyAllVgpr,
- UnmergeToShiftTrunc,
- WaterfallCall
+ UnmergeToShiftTrunc
};
enum FastRulesTypes {
>From c7dab3c0c211359416d450325fc13870285c3def Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <chdeshpa at amd.com>
Date: Mon, 9 Feb 2026 14:03:47 -0500
Subject: [PATCH 3/8] [AMDGPU][GISel] Remove PhysReg constraint
Change-Id: I7447b736b3ca6607ce7424718ec1e51cf329e08d
---
.../Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp | 16 ++++++++--------
.../Target/AMDGPU/AMDGPURegBankLegalizeRules.h | 1 -
2 files changed, 8 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 128d294e8b8b4..c1c91d4b46363 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -213,8 +213,6 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
}
case _:
return true;
- case PhysReg:
- return true;
default:
llvm_unreachable("missing matchUniformityAndLLT");
}
@@ -226,12 +224,14 @@ bool PredicateMapping::match(const MachineInstr &MI,
// Check LLT signature.
for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
if (OpUniformityAndTypes[i] == _) {
- if (MI.getOperand(i).isReg())
+ // Skip non-register operands and physical registers, which don't
+ // need register bank consideration.
+ if (MI.getOperand(i).isReg() && MI.getOperand(i).getReg().isVirtual())
return false;
continue;
}
- // Remaining IDs check registers.
+ // Remaining IDs check virtual registers.
if (!MI.getOperand(i).isReg())
return false;
@@ -1149,10 +1149,10 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).Any({{UniP5}, {{SgprP5}, {}}});
addRulesForGOpcs({G_SI_CALL})
- .Any({{PhysReg, UniP0}, {{None}, {SgprP0}}})
- .Any({{PhysReg, DivP0}, {{None}, {SgprP0_WF}}})
- .Any({{PhysReg, UniP4}, {{None}, {SgprP4}}})
- .Any({{PhysReg, DivP4}, {{None}, {SgprP4_WF}}});
+ .Any({{_, UniP0}, {{None}, {SgprP0}}})
+ .Any({{_, DivP0}, {{None}, {SgprP0_WF}}})
+ .Any({{_, UniP4}, {{None}, {SgprP4}}})
+ .Any({{_, DivP4}, {{None}, {SgprP4_WF}}});
bool hasSALUFloat = ST->hasSALUFloatInsts();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 20c1c4038d76c..6e8cf571f3b24 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -38,7 +38,6 @@ bool isAnyPtr(LLT Ty, unsigned Width);
// be checked.
enum UniformityLLTOpPredicateID {
_,
- PhysReg,
// scalars
S1,
S16,
>From d721adb7eb8d99063ff4303c7e473e536405b6cc Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <chdeshpa at amd.com>
Date: Mon, 9 Feb 2026 15:21:09 -0500
Subject: [PATCH 4/8] trigger build
Change-Id: I8705adc04e97bb81e3492fafba835b7e203feef7
>From fbe6660cf0cc5190ec14ad656ff284baaea147d0 Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <chdeshpa at amd.com>
Date: Tue, 10 Feb 2026 14:03:42 -0500
Subject: [PATCH 5/8] [AMDGPU][GISel] Move G_SI_CALL logic to
executeInWaterfallLoop
Change-Id: If15b9f1996bda3c43509ccdb919d2538229a852b
---
.../AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 35 +++++++++++--------
1 file changed, 20 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index bea197a80e8e3..c11c44840bc83 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -80,6 +80,23 @@ bool RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) {
bool RegBankLegalizeHelper::executeInWaterfallLoop(
MachineIRBuilder &B, iterator_range<MachineBasicBlock::iterator> Range,
SmallSet<Register, 4> &SGPROperandRegs) {
+
+ // For calls, the waterfall must wrap the entire call sequence.
+ for (auto I = Range.begin(), E = Range.end(); I != E; ++I) {
+ if (I->getOpcode() == AMDGPU::G_SI_CALL) {
+ auto Start = Range.begin();
+ while (Start->getOpcode() != AMDGPU::ADJCALLSTACKUP)
+ --Start;
+ auto End = std::next(I);
+ while (End->getOpcode() != AMDGPU::ADJCALLSTACKDOWN)
+ ++End;
+ ++End;
+ B.setInsertPt(B.getMBB(), Start);
+ Range = make_range(Start, End);
+ break;
+ }
+ }
+
// Track use registers which have already been expanded with a readfirstlane
// sequence. This may have multiple uses if moving a sequence.
DenseMap<Register, Register> WaterfalledRegMap;
@@ -1059,21 +1076,9 @@ bool RegBankLegalizeHelper::lower(MachineInstr &MI,
}
if (!WaterfallSgprs.empty()) {
- MachineBasicBlock::iterator Start = MI.getIterator();
- MachineBasicBlock::iterator End = std::next(Start);
-
- // For calls, the waterfall must wrap the entire call sequence.
- if (MI.getOpcode() == AMDGPU::G_SI_CALL) {
- while (Start->getOpcode() != AMDGPU::ADJCALLSTACKUP)
- --Start;
- End = std::next(MI.getIterator());
- while (End->getOpcode() != AMDGPU::ADJCALLSTACKDOWN)
- ++End;
- ++End;
- B.setInsertPt(B.getMBB(), Start);
- }
-
- if (!executeInWaterfallLoop(B, make_range(Start, End), WaterfallSgprs))
+ MachineBasicBlock::iterator I = MI.getIterator();
+ if (!executeInWaterfallLoop(B, make_range(I, std::next(I)),
+ WaterfallSgprs))
return false;
}
return true;
>From c9738deb1280bf024c53194f6d4119f9af486be0 Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <chdeshpa at amd.com>
Date: Tue, 10 Feb 2026 14:23:37 -0500
Subject: [PATCH 6/8] [AMDGPU] Fix formatting
Change-Id: Ifdf930bde0c5830d919d0fba2ae0c121bc816515
---
llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index c11c44840bc83..316cbf5548533 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -1077,8 +1077,7 @@ bool RegBankLegalizeHelper::lower(MachineInstr &MI,
if (!WaterfallSgprs.empty()) {
MachineBasicBlock::iterator I = MI.getIterator();
- if (!executeInWaterfallLoop(B, make_range(I, std::next(I)),
- WaterfallSgprs))
+ if (!executeInWaterfallLoop(B, make_range(I, std::next(I)), WaterfallSgprs))
return false;
}
return true;
>From 995a4b7f67fc0d4e18b86b60973aa55b4657c09d Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <chdeshpa at amd.com>
Date: Wed, 11 Feb 2026 12:26:28 -0500
Subject: [PATCH 7/8] [AMDGPU][GISel] Move waterfall logic
Change-Id: I7c782f9a279283e9d28ea8a6b0cfc65c3bfc719c
---
.../AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 75 +++++++++++--------
.../AMDGPU/AMDGPURegBankLegalizeHelper.h | 9 ++-
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 4 +-
.../AMDGPU/AMDGPURegBankLegalizeRules.h | 6 +-
4 files changed, 56 insertions(+), 38 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 316cbf5548533..29fb25297cb23 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -59,6 +59,7 @@ bool RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) {
}
SmallSet<Register, 4> WaterfallSgprs;
+ std::optional<iterator_range<MachineBasicBlock::iterator>> WaterfallRange;
unsigned OpIdx = 0;
if (Mapping->DstOpMapping.size() > 0) {
B.setInsertPt(*MI.getParent(), std::next(MI.getIterator()));
@@ -67,11 +68,12 @@ bool RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) {
}
if (Mapping->SrcOpMapping.size() > 0) {
B.setInstr(MI);
- if (!applyMappingSrc(MI, OpIdx, Mapping->SrcOpMapping, WaterfallSgprs))
+ if (!applyMappingSrc(MI, OpIdx, Mapping->SrcOpMapping, WaterfallSgprs,
+ WaterfallRange))
return false;
}
- if (!lower(MI, *Mapping, WaterfallSgprs))
+ if (!lower(MI, *Mapping, WaterfallSgprs, WaterfallRange))
return false;
return true;
@@ -81,22 +83,6 @@ bool RegBankLegalizeHelper::executeInWaterfallLoop(
MachineIRBuilder &B, iterator_range<MachineBasicBlock::iterator> Range,
SmallSet<Register, 4> &SGPROperandRegs) {
- // For calls, the waterfall must wrap the entire call sequence.
- for (auto I = Range.begin(), E = Range.end(); I != E; ++I) {
- if (I->getOpcode() == AMDGPU::G_SI_CALL) {
- auto Start = Range.begin();
- while (Start->getOpcode() != AMDGPU::ADJCALLSTACKUP)
- --Start;
- auto End = std::next(I);
- while (End->getOpcode() != AMDGPU::ADJCALLSTACKDOWN)
- ++End;
- ++End;
- B.setInsertPt(B.getMBB(), Start);
- Range = make_range(Start, End);
- break;
- }
- }
-
// Track use registers which have already been expanded with a readfirstlane
// sequence. This may have multiple uses if moving a sequence.
DenseMap<Register, Register> WaterfalledRegMap;
@@ -844,9 +830,10 @@ bool RegBankLegalizeHelper::lowerSplitTo32SExtInReg(MachineInstr &MI) {
return true;
}
-bool RegBankLegalizeHelper::lower(MachineInstr &MI,
- const RegBankLLTMapping &Mapping,
- SmallSet<Register, 4> &WaterfallSgprs) {
+bool RegBankLegalizeHelper::lower(
+ MachineInstr &MI, const RegBankLLTMapping &Mapping,
+ SmallSet<Register, 4> &WaterfallSgprs,
+ std::optional<iterator_range<MachineBasicBlock::iterator>> WaterfallRange) {
switch (Mapping.LoweringMethod) {
case DoNotLower:
@@ -1076,8 +1063,10 @@ bool RegBankLegalizeHelper::lower(MachineInstr &MI,
}
if (!WaterfallSgprs.empty()) {
- MachineBasicBlock::iterator I = MI.getIterator();
- if (!executeInWaterfallLoop(B, make_range(I, std::next(I)), WaterfallSgprs))
+ auto Range = WaterfallRange ? *WaterfallRange
+ : make_range(MI.getIterator(),
+ std::next(MI.getIterator()));
+ if (!executeInWaterfallLoop(B, Range, WaterfallSgprs))
return false;
}
return true;
@@ -1113,7 +1102,7 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
case Vgpr128:
return LLT::scalar(128);
case SgprP0:
- case SgprP0_WF:
+ case SgprP0Call_WF:
case VgprP0:
return LLT::pointer(0, 64);
case SgprP1:
@@ -1126,7 +1115,7 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
case VgprP3:
return LLT::pointer(3, 32);
case SgprP4:
- case SgprP4_WF:
+ case SgprP4Call_WF:
case VgprP4:
return LLT::pointer(4, 64);
case SgprP5:
@@ -1247,12 +1236,12 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
case Sgpr64:
case Sgpr128:
case SgprP0:
- case SgprP0_WF:
+ case SgprP0Call_WF:
case SgprP1:
case SgprP2:
case SgprP3:
case SgprP4:
- case SgprP4_WF:
+ case SgprP4Call_WF:
case SgprP5:
case SgprP8:
case SgprPtr32:
@@ -1480,7 +1469,9 @@ bool RegBankLegalizeHelper::applyMappingDst(
bool RegBankLegalizeHelper::applyMappingSrc(
MachineInstr &MI, unsigned &OpIdx,
const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs,
- SmallSet<Register, 4> &SgprWaterfallOperandRegs) {
+ SmallSet<Register, 4> &SgprWaterfallOperandRegs,
+ std::optional<iterator_range<MachineBasicBlock::iterator>>
+ &WaterfallRange) {
for (unsigned i = 0; i < MethodIDs.size(); ++OpIdx, ++i) {
if (MethodIDs[i] == None || MethodIDs[i] == IntrId || MethodIDs[i] == Imm)
continue;
@@ -1577,16 +1568,36 @@ bool RegBankLegalizeHelper::applyMappingSrc(
}
break;
}
- // sgpr waterfall, scalars, vectors and pointers
+ // sgpr waterfall, scalars, and vectors
case Sgpr32_WF:
- case SgprV4S32_WF:
- case SgprP0_WF:
- case SgprP4_WF: {
+ case SgprV4S32_WF: {
assert(Ty == getTyFromID(MethodIDs[i]));
if (RB != SgprRB)
SgprWaterfallOperandRegs.insert(Reg);
break;
}
+ case SgprP0Call_WF:
+ case SgprP4Call_WF: {
+ assert(Ty == getTyFromID(MethodIDs[i]));
+ if (RB != SgprRB) {
+ SgprWaterfallOperandRegs.insert(Reg);
+
+ // Find the ADJCALLSTACKUP before the call.
+ MachineBasicBlock::iterator Start = MI.getIterator();
+ while (Start->getOpcode() != AMDGPU::ADJCALLSTACKUP)
+ --Start;
+
+ // Find the ADJCALLSTACKDOWN after the call (include it in range).
+ MachineBasicBlock::iterator End = MI.getIterator();
+ while (End->getOpcode() != AMDGPU::ADJCALLSTACKDOWN)
+ ++End;
+ ++End;
+
+ B.setInsertPt(*MI.getParent(), Start);
+ WaterfallRange = make_range(Start, End);
+ }
+ break;
+ }
// sgpr and vgpr scalars with extend
case Sgpr32AExt: {
// Note: this ext allows S1, and it is meant to be combined away.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
index 86669ae6ff6c7..55dbd85297007 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include <optional>
namespace llvm {
@@ -107,7 +108,9 @@ class RegBankLegalizeHelper {
bool
applyMappingSrc(MachineInstr &MI, unsigned &OpIdx,
const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs,
- SmallSet<Register, 4> &SgprWaterfallOperandRegs);
+ SmallSet<Register, 4> &SgprWaterfallOperandRegs,
+ std::optional<iterator_range<MachineBasicBlock::iterator>>
+ &WaterfallRange);
bool splitLoad(MachineInstr &MI, ArrayRef<LLT> LLTBreakdown,
LLT MergeTy = LLT());
@@ -115,7 +118,9 @@ class RegBankLegalizeHelper {
bool widenMMOToS32(GAnyLoad &MI) const;
bool lower(MachineInstr &MI, const RegBankLLTMapping &Mapping,
- SmallSet<Register, 4> &SgprWaterfallOperandRegs);
+ SmallSet<Register, 4> &SgprWaterfallOperandRegs,
+ std::optional<iterator_range<MachineBasicBlock::iterator>>
+ WaterfallRange);
bool lowerVccExtToSel(MachineInstr &MI);
std::pair<Register, Register> unpackZExt(Register Reg);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index c1c91d4b46363..5717a55a92dad 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1150,9 +1150,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForGOpcs({G_SI_CALL})
.Any({{_, UniP0}, {{None}, {SgprP0}}})
- .Any({{_, DivP0}, {{None}, {SgprP0_WF}}})
+ .Any({{_, DivP0}, {{None}, {SgprP0Call_WF}}})
.Any({{_, UniP4}, {{None}, {SgprP4}}})
- .Any({{_, DivP4}, {{None}, {SgprP4_WF}}});
+ .Any({{_, DivP4}, {{None}, {SgprP4Call_WF}}});
bool hasSALUFloat = ST->hasSALUFloatInsts();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 6e8cf571f3b24..b2a89f9bb2c45 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -220,8 +220,10 @@ enum RegBankLLTMappingApplyID {
// Src only modifiers: execute in waterfall loop if divergent
Sgpr32_WF,
SgprV4S32_WF,
- SgprP0_WF,
- SgprP4_WF,
+
+ // Src only modifiers: execute in waterfall loop for calls
+ SgprP0Call_WF,
+ SgprP4Call_WF,
// Src only modifiers: extends
Sgpr32AExt,
>From 195dd5263bcf6da642a6fa9797e369d9f53033ed Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <chdeshpa at amd.com>
Date: Wed, 11 Feb 2026 13:10:07 -0500
Subject: [PATCH 8/8] [AMDGPU][GISel] G_SI_CALL now uses a virtual SGPR for the
return value and then generates a copy
Change-Id: I0fa1ff42b26031b26ea828e5f9958bc12e4718f6
---
llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 6 +-
.../AMDGPU/AMDGPUInstructionSelector.cpp | 16 +-
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 8 +-
.../lib/Target/AMDGPU/AMDGPURegBankSelect.cpp | 9 +-
llvm/lib/Target/AMDGPU/SIInstructions.td | 2 +-
.../GlobalISel/dereferenceable-declaration.ll | 30 +-
.../GlobalISel/irtranslator-assert-align.ll | 9 +-
.../irtranslator-call-abi-attribute-hints.ll | 18 +-
.../irtranslator-call-implicit-args.ll | 60 ++--
.../GlobalISel/irtranslator-call-non-fixed.ll | 15 +-
.../irtranslator-call-return-values.ll | 150 ++++++----
.../GlobalISel/irtranslator-call-sret.ll | 3 +-
.../AMDGPU/GlobalISel/irtranslator-call.ll | 276 ++++++++++++------
.../GlobalISel/irtranslator-indirect-call.ll | 6 +-
.../GlobalISel/irtranslator-sibling-call.ll | 12 +-
.../regbankselect-waterfall-call.mir | 40 ++-
.../irtranslator-whole-wave-functions.ll | 3 +-
17 files changed, 445 insertions(+), 218 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 5c6affdae0c5b..339111115e5aa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -1571,7 +1571,8 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
Info.CallConv);
auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
- MIB.addDef(TRI->getReturnAddressReg(MF));
+ Register ReturnAddrVReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
+ MIB.addDef(ReturnAddrVReg);
if (!Info.IsConvergent)
MIB.setMIFlag(MachineInstr::NoConvergent);
@@ -1634,6 +1635,9 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Now we can add the actual call instruction to the correct position.
MIRBuilder.insertInstr(MIB);
+ // Copy the return address from the virtual register to the physical register.
+ MIRBuilder.buildCopy(Register(TRI->getReturnAddressReg(MF)), ReturnAddrVReg);
+
// Finally we can copy the returned value back into its virtual-register. In
// symmetry with the arguments, the physical register must be an
// implicit-define of the call instruction.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index b96c2ef70dd83..c1d335d637adf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4408,9 +4408,23 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case AMDGPU::G_SBFX:
case AMDGPU::G_UBFX:
return selectG_SBFX_UBFX(I);
- case AMDGPU::G_SI_CALL:
+ case AMDGPU::G_SI_CALL: {
I.setDesc(TII.get(AMDGPU::SI_CALL));
+
+ // G_SI_CALL defines a virtual register that is copied to the return
+ // address physical register. Replace the virtual reg def with the
+ // physical register and remove the COPY.
+ Register DstReg = I.getOperand(0).getReg();
+ MCRegister RetAddrReg = TRI.getReturnAddressReg(*MF);
+ if (MRI->hasOneUse(DstReg)) {
+ MachineInstr &Copy = *MRI->use_instr_begin(DstReg);
+ if (Copy.isCopy() && Copy.getOperand(0).getReg() == RetAddrReg) {
+ Copy.eraseFromParent();
+ }
+ }
+ I.getOperand(0).setReg(RetAddrReg);
return true;
+ }
case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
return selectWaveAddress(I);
case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 5717a55a92dad..e1eb880f3cb70 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1149,10 +1149,10 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).Any({{UniP5}, {{SgprP5}, {}}});
addRulesForGOpcs({G_SI_CALL})
- .Any({{_, UniP0}, {{None}, {SgprP0}}})
- .Any({{_, DivP0}, {{None}, {SgprP0Call_WF}}})
- .Any({{_, UniP4}, {{None}, {SgprP4}}})
- .Any({{_, DivP4}, {{None}, {SgprP4Call_WF}}});
+ .Any({{UniS64, UniP0}, {{Sgpr64}, {SgprP0}}})
+ .Any({{UniS64, DivP0}, {{Sgpr64}, {SgprP0Call_WF}}})
+ .Any({{UniS64, UniP4}, {{Sgpr64}, {SgprP4}}})
+ .Any({{UniS64, DivP4}, {{Sgpr64}, {SgprP4Call_WF}}});
bool hasSALUFloat = ST->hasSALUFloatInsts();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp
index 493b7541cdd81..7f39a6333c85e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp
@@ -109,6 +109,8 @@ class RegBankSelectHelper {
return MI->implicit_operands().begin()->getReg() == TRI.getExec();
}
+ const RegisterBank *getRegBankSgpr() const { return SgprRB; }
+
const RegisterBank *getRegBankToAssign(Register Reg) {
if (!isTemporalDivergenceCopy(Reg) &&
(MUI.isUniform(Reg) || ILMA.isS32S64LaneMask(Reg)))
@@ -189,7 +191,7 @@ static Register getVReg(MachineOperand &Op) {
if (!Op.isReg())
return {};
- // Operands of COPY and G_SI_CALL can be physical registers.
+ // Operands of COPY can be physical registers.
Register Reg = Op.getReg();
if (!Reg.isVirtual())
return {};
@@ -260,6 +262,11 @@ bool AMDGPURegBankSelect::runOnMachineFunction(MachineFunction &MF) {
continue;
const RegisterBank *RB = RBSHelper.getRegBankToAssign(DefReg);
+
+ // The return address defined by G_SI_CALL is always uniform.
+ if (MI.getOpcode() == AMDGPU::G_SI_CALL)
+ RB = RBSHelper.getRegBankSgpr();
+
if (MRI.getRegClassOrNull(DefReg))
RBSHelper.reAssignRegBankOnDef(MI, DefOP, RB);
else {
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index cde352313f86a..e08942135abdd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -4793,7 +4793,7 @@ def G_AMDGPU_BVH8_INTERSECT_RAY : AMDGPUGenericInstruction {
// Generic instruction for SI_CALL, so we can select the register bank and insert a waterfall loop
// if necessary.
def G_SI_CALL : AMDGPUGenericInstruction {
- let OutOperandList = (outs SReg_64:$dst);
+ let OutOperandList = (outs type1:$dst);
let InOperandList = (ins type0:$src0, unknown:$callee);
let Size = 4;
let isCall = 1;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll
index edb36079fdfbe..f4af410fd1b49 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dereferenceable-declaration.ll
@@ -16,7 +16,8 @@ define i64 @load_deref_declaration_only() {
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
@@ -40,7 +41,8 @@ define i64 @load_deref_unknown_decl() {
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @unknown_decl
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @unknown_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @unknown_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
@@ -64,7 +66,8 @@ define i64 @load_deref_callsite_only() {
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @unknown_decl
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @unknown_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @unknown_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
@@ -89,7 +92,8 @@ define i64 @load_deref_maxmimum_callsite_declaration_only() {
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
@@ -100,7 +104,8 @@ define i64 @load_deref_maxmimum_callsite_declaration_only() {
; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref4
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV1]](p0), @declared_with_ret_deref4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: [[SI_CALL1:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV1]](p0), @declared_with_ret_deref4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL1]](s64)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
@@ -128,7 +133,8 @@ define i64 @load_deref_or_null_declaration_only() {
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref_or_null
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref_or_null, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref_or_null, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
@@ -152,7 +158,8 @@ define i64 @load_deref_or_null_nonnull_decl() {
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @nonnull_decl
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @nonnull_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @nonnull_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
@@ -176,7 +183,8 @@ define i64 @load_deref_or_null_callsite_only() {
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @nonnull_decl
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @nonnull_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @nonnull_decl, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
@@ -201,7 +209,8 @@ define i64 @load_deref_or_null_maxmimum_callsite_declaration_only() {
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref_or_null
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref_or_null, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @declared_with_ret_deref_or_null, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
@@ -212,7 +221,8 @@ define i64 @load_deref_or_null_maxmimum_callsite_declaration_only() {
; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @declared_with_ret_deref_or_null4
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV1]](p0), @declared_with_ret_deref_or_null4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: [[SI_CALL1:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV1]](p0), @declared_with_ret_deref_or_null4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL1]](s64)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
index ca9d7854fb619..3dabdbab8962c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
@@ -57,7 +57,8 @@ define void @call_result_align_1() {
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
@@ -106,7 +107,8 @@ define void @call_result_align_8() {
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
@@ -156,7 +158,8 @@ define void @declaration_result_align_8() {
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @returns_ptr_align8, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @returns_ptr_align8, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
index 02ce52b7450f6..1e191e631c86f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
@@ -40,7 +40,8 @@ define amdgpu_kernel void @kernel_call_no_workitem_ids() {
; CHECK-NEXT: $sgpr13 = COPY [[COPY12]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY13]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @extern() "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
@@ -85,7 +86,8 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z"
@@ -121,7 +123,8 @@ define amdgpu_kernel void @kernel_call_no_other_sgprs() {
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z"
@@ -161,7 +164,8 @@ define void @func_call_no_workitem_ids() {
; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @extern() "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
@@ -195,7 +199,8 @@ define void @func_call_no_workgroup_ids() {
; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64)
; CHECK-NEXT: $sgpr15 = COPY [[COPY10]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY11]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z"
@@ -220,7 +225,8 @@ define void @func_call_no_other_sgprs() {
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY3]](p4)
; CHECK-NEXT: $sgpr15 = COPY [[COPY4]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY5]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z"
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
index 2bb059a0fbb1e..d0ac8f1e45a91 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
@@ -58,7 +58,8 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
;
@@ -111,7 +112,8 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -156,7 +158,8 @@ define void @test_func_call_external_void_func_i32() #0 {
; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; GFX900-NEXT: $sgpr15 = COPY [[COPY16]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[COPY17]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: SI_RETURN
;
@@ -197,7 +200,8 @@ define void @test_func_call_external_void_func_i32() #0 {
; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; GFX908-NEXT: $sgpr15 = COPY [[COPY16]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[COPY17]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: SI_RETURN
call void @external_void_func_i32(i32 99)
@@ -292,7 +296,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
;
@@ -381,7 +386,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_v32i32(<32 x i32> zeroinitializer)
@@ -513,7 +519,8 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-NEXT: $sgpr14 = COPY [[COPY32]](s32)
; GFX900-NEXT: $sgpr15 = COPY [[COPY33]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[COPY34]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; GFX900-NEXT: SI_RETURN
;
@@ -641,7 +648,8 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX908-NEXT: $sgpr14 = COPY [[COPY32]](s32)
; GFX908-NEXT: $sgpr15 = COPY [[COPY33]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[COPY34]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; GFX908-NEXT: SI_RETURN
call void @external_void_func_v32i32(<32 x i32> zeroinitializer)
@@ -687,7 +695,8 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[COPY15]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
;
@@ -729,7 +738,8 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[COPY15]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -779,7 +789,8 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
;
@@ -825,7 +836,8 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -875,7 +887,8 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
;
@@ -921,7 +934,8 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -972,7 +986,8 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
;
@@ -1019,7 +1034,8 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -1074,7 +1090,8 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
;
@@ -1125,7 +1142,8 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -1176,7 +1194,8 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !
; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
;
@@ -1223,7 +1242,8 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !
; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
index 6694ad741cf48..6552c39fb3332 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
@@ -15,7 +15,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call amdgpu_gfx void @external_gfx_void_func_void()
@@ -34,7 +35,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call amdgpu_gfx void @external_gfx_void_func_i32(i32 42)
@@ -54,7 +56,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg
; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42)
@@ -78,7 +81,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
%ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
@@ -106,7 +110,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
; CHECK-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
%ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index bb6d1f8d161cb..2a1f1694cf390 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -118,7 +118,8 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i32_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_i32_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY21]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1)
@@ -142,7 +143,8 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(ptr addrspace(1)
; GCN-NEXT: $vgpr0 = COPY [[C]](s32)
; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
@@ -200,7 +202,8 @@ define amdgpu_kernel void @test_call_external_i1_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_i1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -219,7 +222,8 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 {
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void
; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -278,7 +282,8 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i1_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_i1_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 1
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32)
@@ -340,7 +345,8 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i1_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_i1_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 1
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32)
@@ -402,7 +408,8 @@ define amdgpu_kernel void @test_call_external_i8_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
@@ -422,7 +429,8 @@ define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 {
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i8_func_void
; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_i8_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_i8_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
@@ -482,7 +490,8 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i8_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_i8_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 8
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32)
@@ -544,7 +553,8 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i8_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_i8_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 8
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
@@ -606,7 +616,8 @@ define amdgpu_kernel void @test_call_external_i16_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -665,7 +676,8 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i16_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_i16_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY21]], 16
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
@@ -727,7 +739,8 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i16_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_i16_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY21]], 16
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
@@ -789,7 +802,8 @@ define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) poison`, addrspace 1)
@@ -807,7 +821,8 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 {
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_void
; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) poison`, addrspace 1)
@@ -865,7 +880,8 @@ define amdgpu_kernel void @test_call_external_i48_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i48_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_i48_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
@@ -926,7 +942,8 @@ define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i48_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_i48_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
@@ -989,7 +1006,8 @@ define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i48_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_i48_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
@@ -1052,7 +1070,8 @@ define amdgpu_kernel void @test_call_external_i64_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
@@ -1112,7 +1131,8 @@ define amdgpu_kernel void @test_call_external_p1_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
@@ -1172,7 +1192,8 @@ define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v2p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1236,7 +1257,8 @@ define amdgpu_kernel void @test_call_external_p3_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY21]](p3), [[DEF]](p3) :: (volatile store (p3) into `ptr addrspace(3) poison`, addrspace 3)
@@ -1294,7 +1316,8 @@ define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v2p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(p3) = COPY $vgpr1
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY21]](p3), [[COPY22]](p3)
@@ -1354,7 +1377,8 @@ define amdgpu_kernel void @test_call_external_f16_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1413,7 +1437,8 @@ define amdgpu_kernel void @test_call_external_f32_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) poison`, addrspace 1)
@@ -1471,7 +1496,8 @@ define amdgpu_kernel void @test_call_external_f64_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
@@ -1531,7 +1557,8 @@ define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v2f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1595,7 +1622,8 @@ define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32)
@@ -1655,7 +1683,8 @@ define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v3i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v3i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1716,7 +1745,8 @@ define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v4i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v4i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1778,7 +1808,8 @@ define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v5i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v5i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1841,7 +1872,8 @@ define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v8i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v8i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1907,7 +1939,8 @@ define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v16i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v16i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1981,7 +2014,8 @@ define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v32i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v32i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -2071,7 +2105,8 @@ define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) poison`, addrspace 1)
@@ -2129,7 +2164,8 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
@@ -2191,7 +2227,8 @@ define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v4i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v4i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
@@ -2251,7 +2288,8 @@ define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) poison`, addrspace 1)
@@ -2309,7 +2347,8 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
@@ -2371,7 +2410,8 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v4f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v4f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
@@ -2431,7 +2471,8 @@ define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v3f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v3f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -2492,7 +2533,8 @@ define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v5f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v5f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -2556,7 +2598,8 @@ define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i32_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_i32_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -2581,7 +2624,8 @@ define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 {
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_i64_func_void
; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -2646,7 +2690,8 @@ define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_a2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_a2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2709,7 +2754,8 @@ define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_a5i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_a5i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
@@ -2796,7 +2842,8 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5)
; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
@@ -2863,7 +2910,8 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5)
; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
@@ -2930,7 +2978,8 @@ define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 {
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5)
; GCN-NEXT: G_STORE [[LOAD]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `ptr addrspace(1) poison`, align 8, addrspace 1)
@@ -2999,7 +3048,8 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspa
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5)
; GCN-NEXT: G_STORE [[LOAD2]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `ptr addrspace(1) poison`, align 8, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
index 0dc9068c1984b..b79e4d5b8c9ff 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
@@ -66,7 +66,8 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
; GCN-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = nuw nusw inbounds G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32)
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load (s8) from %ir.out.val, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index efec8f4309013..cfb49f35f7698 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -159,7 +159,8 @@ define amdgpu_kernel void @test_call_external_void_func_void() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_void()
@@ -173,7 +174,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call amdgpu_gfx void @external_gfx_void_func_void()
@@ -216,7 +218,8 @@ define void @test_func_call_external_void_func_void() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_void()
@@ -272,7 +275,8 @@ define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_empty_struct, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_empty_struct, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_empty_struct({} zeroinitializer, i32 23)
@@ -328,7 +332,8 @@ define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_empty_array, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_empty_array, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_empty_array([0 x i8] zeroinitializer, i32 23)
@@ -385,7 +390,8 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i1, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i1, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_i1(i1 true)
@@ -444,7 +450,8 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i1_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i1_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i1, ptr addrspace(1) poison
@@ -504,7 +511,8 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i1_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i1_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i1, ptr addrspace(1) poison
@@ -564,7 +572,8 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i8, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i8, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_i8(i8 123)
@@ -624,7 +633,8 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i8_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i8_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i8, ptr addrspace(1) poison
@@ -685,7 +695,8 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i8_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i8_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i8, ptr addrspace(1) poison
@@ -743,7 +754,8 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_i16(i16 123)
@@ -802,7 +814,8 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i16, ptr addrspace(1) poison
@@ -862,7 +875,8 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i16, ptr addrspace(1) poison
@@ -920,7 +934,8 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -939,7 +954,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call amdgpu_gfx void @external_gfx_void_func_i32(i32 42)
@@ -959,7 +975,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg
; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42)
@@ -1017,7 +1034,8 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_i64(i64 123)
@@ -1078,7 +1096,8 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <2 x i64>, ptr addrspace(1) null
@@ -1141,7 +1160,8 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>)
@@ -1202,7 +1222,8 @@ define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i48, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i48, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i48, ptr addrspace(1) poison
@@ -1264,7 +1285,8 @@ define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i48_signext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i48_signext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i48, ptr addrspace(1) poison
@@ -1326,7 +1348,8 @@ define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i48_zeroext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i48_zeroext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i48, ptr addrspace(1) poison
@@ -1386,7 +1409,8 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(ptr %arg) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_p0(ptr %arg)
@@ -1447,7 +1471,8 @@ define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <2 x ptr>, ptr addrspace(1) null
@@ -1515,7 +1540,8 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%load = load <2 x i64>, ptr addrspace(1) null
@@ -1587,7 +1613,8 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%load = load <2 x i64>, ptr addrspace(1) null
@@ -1646,7 +1673,8 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_f16(half 4.0)
@@ -1702,7 +1730,8 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_f32(float 4.0)
@@ -1762,7 +1791,8 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>)
@@ -1824,7 +1854,8 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>)
@@ -1890,7 +1921,8 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
@@ -1948,7 +1980,8 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_f64(double 4.0)
@@ -2010,7 +2043,8 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>)
@@ -2075,7 +2109,8 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>)
@@ -2132,7 +2167,8 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <2 x i16>, ptr addrspace(1) poison
@@ -2195,7 +2231,8 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <3 x i16>, ptr addrspace(1) poison
@@ -2258,7 +2295,8 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <3 x half>, ptr addrspace(1) poison
@@ -2318,7 +2356,8 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <4 x i16>, ptr addrspace(1) poison
@@ -2381,7 +2420,8 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>)
@@ -2444,7 +2484,8 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v5i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v5i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <5 x i16>, ptr addrspace(1) poison
@@ -2509,7 +2550,8 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v7i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v7i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <7 x i16>, ptr addrspace(1) poison
@@ -2605,7 +2647,8 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v63i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v63i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <63 x i16>, ptr addrspace(1) poison
@@ -2704,7 +2747,8 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v65i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v65i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <65 x i16>, ptr addrspace(1) poison
@@ -2800,7 +2844,8 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v66i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v66i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <66 x i16>, ptr addrspace(1) poison
@@ -2858,7 +2903,8 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <2 x half>, ptr addrspace(1) poison
@@ -2918,7 +2964,8 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <2 x i32>, ptr addrspace(1) poison
@@ -2979,7 +3026,8 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>)
@@ -3042,7 +3090,8 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>)
@@ -3107,7 +3156,8 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6)
@@ -3168,7 +3218,8 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <4 x i32>, ptr addrspace(1) poison
@@ -3233,7 +3284,8 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>)
@@ -3299,7 +3351,8 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>)
@@ -3365,7 +3418,8 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load ptr addrspace(1), ptr addrspace(4) poison
@@ -3439,7 +3493,8 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
@@ -3513,7 +3568,8 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v16i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v16i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load ptr addrspace(1), ptr addrspace(4) poison
@@ -3608,7 +3664,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load ptr addrspace(1), ptr addrspace(4) poison
@@ -3709,7 +3766,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
@@ -3819,7 +3877,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i8_i8_i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i8_i8_i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 16, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
@@ -3927,7 +3986,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32_p3_p5, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v32i32_p3_p5, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
@@ -3995,7 +4055,8 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
@@ -4021,7 +4082,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
%ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
@@ -4049,7 +4111,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
; CHECK-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT1]](s32)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
%ptr0 = load ptr addrspace(1), ptr addrspace(4) poison
@@ -4117,7 +4180,8 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_byval_struct_i8_i32, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_byval_struct_i8_i32, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = alloca { i8, i32 }, align 4, addrspace(5)
@@ -4180,7 +4244,8 @@ define void @call_byval_3ai32_byval_i8_align32(ptr addrspace(5) %incoming0, ptr
; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @void_func_byval_a3i32_byval_i8_align32(ptr addrspace(5) byval([3 x i32]) %incoming0, ptr addrspace(5) align 32 %incoming1, i32 999)
@@ -4233,7 +4298,8 @@ define void @call_byval_a4i64_align4_higher_source_align(ptr addrspace(5) align
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 32, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @void_func_byval_a4i64_align4(ptr addrspace(5) byval([4 x i64]) align 4 %incoming_high_align)
@@ -4297,7 +4363,8 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load ptr addrspace(1), ptr addrspace(4) poison
@@ -4366,7 +4433,8 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load ptr addrspace(1), ptr addrspace(4) poison
@@ -4438,7 +4506,8 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load ptr addrspace(1), ptr addrspace(4) poison
@@ -4522,7 +4591,8 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v8i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v8i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load ptr addrspace(1), ptr addrspace(4) poison
@@ -4630,7 +4700,8 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v16i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v16i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load ptr addrspace(1), ptr addrspace(4) poison
@@ -4734,7 +4805,8 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @stack_passed_f64_arg, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @stack_passed_f64_arg, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
entry:
@@ -4865,7 +4937,8 @@ define void @stack_12xv3i32() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_12xv3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_12xv3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc
; CHECK-NEXT: SI_RETURN
entry:
@@ -5008,7 +5081,8 @@ define void @stack_12xv3f32() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_12xv3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_12xv3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc
; CHECK-NEXT: SI_RETURN
entry:
@@ -5155,7 +5229,8 @@ define void @stack_8xv5i32() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_8xv5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_8xv5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc
; CHECK-NEXT: SI_RETURN
entry:
@@ -5298,7 +5373,8 @@ define void @stack_8xv5f32() #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_8xv5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_8xv5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc
; CHECK-NEXT: SI_RETURN
entry:
@@ -5341,7 +5417,8 @@ define amdgpu_ps void @amdgpu_ps_call_default_cc() {
; CHECK-NEXT: $sgpr14 = COPY [[COPY3]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY4]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY5]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[C]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[C]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
main_body:
@@ -5390,7 +5467,8 @@ define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_i16_inreg(i16 inreg %arg)
@@ -5436,7 +5514,8 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i32_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_i32_inreg(i32 inreg %arg)
@@ -5487,7 +5566,8 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i64_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_i64_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_i64_inreg(i64 inreg %arg)
@@ -5538,7 +5618,8 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i32_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2i32_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_v2i32_inreg(<2 x i32> inreg %arg)
@@ -5586,7 +5667,8 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_f16_inreg(half inreg %arg)
@@ -5646,7 +5728,8 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f32_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f32_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_f32_inreg(float inreg %arg)
@@ -5697,7 +5780,8 @@ define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f64_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_f64_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_f64_inreg(double inreg %arg)
@@ -5744,7 +5828,8 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2f16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_v2f16_inreg(<2 x half> inreg %arg)
@@ -5802,7 +5887,8 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0
; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v3f16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_v3f16_inreg(<3 x half> inreg %arg)
@@ -5855,7 +5941,8 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0
; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4f16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v4f16_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_v4f16_inreg(<4 x half> inreg %arg)
@@ -5906,7 +5993,8 @@ define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 {
; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_p0_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_p0_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_p0_inreg(ptr inreg %arg)
@@ -5957,7 +6045,8 @@ define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg)
; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_p1_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_p1_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_p1_inreg(ptr addrspace(1) inreg %arg)
@@ -6004,7 +6093,8 @@ define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_p3_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_p3_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_p3_inreg(ptr addrspace(3) inreg %arg)
@@ -6063,7 +6153,8 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre
; CHECK-NEXT: $sgpr14 = COPY [[COPY19]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY20]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY21]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2p1_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2p1_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg %arg)
@@ -6114,7 +6205,8 @@ define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inre
; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2p5_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_v2p5_inreg, csr_amdgpu, implicit $sgpr0, implicit $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg %arg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
index 5dc6932837d79..fe32646395794 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
@@ -49,7 +49,8 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) {
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void %fptr()
@@ -67,7 +68,8 @@ define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(ptr %fptr) {
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call amdgpu_gfx void %fptr()
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index 24e85aa280bf6..c96dfb2a4aaa1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -149,7 +149,8 @@ define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a,
; GCN-NEXT: $vgpr1 = COPY [[EVEC1]](s32)
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: S_ENDPGM 0
@@ -195,7 +196,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, pt
; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32)
; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; GCN-NEXT: $vgpr0 = COPY [[COPY4]](s32)
@@ -558,7 +560,8 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
; GCN-NEXT: $vgpr30 = COPY [[C]](s32)
; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc
; GCN-NEXT: $vgpr0 = COPY [[COPY3]](s32)
@@ -583,7 +586,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i3
; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32)
; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>)
- ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = noconvergent G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @sibling_call_i32_fastcc_i32_i32
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-call.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-call.mir
index 5207d992ea74d..1e4d91c95ceb2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-call.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-call.mir
@@ -33,7 +33,8 @@ body: |
; CHECK-NEXT: %g_ptr:sgpr(p0) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY %g_ptr(p0)
; CHECK-NEXT: %func_ptr:vgpr(p0) = G_LOAD [[COPY]](p0) :: (load (p0))
- ; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu
+ ; CHECK-NEXT: %ret_addr:sgpr(s64) = G_SI_CALL [[MV]](p0), 0, csr_amdgpu
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY %ret_addr(s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
@@ -44,13 +45,14 @@ body: |
; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .4:
- ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31
ADJCALLSTACKUP 0, 0, implicit-def $scc
%g_ptr:_(p0) = COPY $sgpr0_sgpr1
%func_ptr:_(p0) = G_LOAD %g_ptr(p0) :: (load (p0))
- $sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu
+ %ret_addr:_(s64) = G_SI_CALL %func_ptr, 0, csr_amdgpu
+ $sgpr30_sgpr31 = COPY %ret_addr
ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- S_SETPC_B64_return undef $sgpr2_sgpr3
+ S_SETPC_B64_return undef $sgpr30_sgpr31
...
@@ -86,7 +88,8 @@ body: |
; CHECK-NEXT: %g_ptr:sgpr(p4) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p4) = COPY %g_ptr(p4)
; CHECK-NEXT: %func_ptr:vgpr(p4) = G_LOAD [[COPY]](p4) :: (load (p4))
- ; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p4), 0, csr_amdgpu
+ ; CHECK-NEXT: %ret_addr:sgpr(s64) = G_SI_CALL [[MV]](p4), 0, csr_amdgpu
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY %ret_addr(s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
@@ -97,13 +100,14 @@ body: |
; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .4:
- ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31
ADJCALLSTACKUP 0, 0, implicit-def $scc
%g_ptr:_(p4) = COPY $sgpr0_sgpr1
%func_ptr:_(p4) = G_LOAD %g_ptr(p4) :: (load (p4))
- $sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu
+ %ret_addr:_(s64) = G_SI_CALL %func_ptr, 0, csr_amdgpu
+ $sgpr30_sgpr31 = COPY %ret_addr
ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- S_SETPC_B64_return undef $sgpr2_sgpr3
+ S_SETPC_B64_return undef $sgpr30_sgpr31
...
@@ -139,7 +143,8 @@ body: |
; CHECK-NEXT: %g_ptr:sgpr(p0) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p0) = COPY %g_ptr(p0)
; CHECK-NEXT: %func_ptr:vgpr(p0) = G_LOAD [[COPY]](p0) :: (load (p0))
- ; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0
+ ; CHECK-NEXT: %ret_addr:sgpr(s64) = G_SI_CALL [[MV]](p0), 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY %ret_addr(s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
@@ -150,13 +155,14 @@ body: |
; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .4:
- ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31
ADJCALLSTACKUP 0, 0, implicit-def $scc
%g_ptr:_(p0) = COPY $sgpr0_sgpr1
%func_ptr:_(p0) = G_LOAD %g_ptr(p0) :: (load (p0))
- $sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0
+ %ret_addr:_(s64) = G_SI_CALL %func_ptr, 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0
+ $sgpr30_sgpr31 = COPY %ret_addr
ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- S_SETPC_B64_return undef $sgpr2_sgpr3
+ S_SETPC_B64_return undef $sgpr30_sgpr31
...
@@ -192,7 +198,8 @@ body: |
; CHECK-NEXT: %g_ptr:sgpr(p4) = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p4) = COPY %g_ptr(p4)
; CHECK-NEXT: %func_ptr:vgpr(p4) = G_LOAD [[COPY]](p4) :: (load (p4))
- ; CHECK-NEXT: $sgpr2_sgpr3 = G_SI_CALL [[MV]](p4), 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0
+ ; CHECK-NEXT: %ret_addr:sgpr(s64) = G_SI_CALL [[MV]](p4), 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY %ret_addr(s64)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
@@ -203,13 +210,14 @@ body: |
; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: .4:
- ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr2_sgpr3
+ ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31
ADJCALLSTACKUP 0, 0, implicit-def $scc
%g_ptr:_(p4) = COPY $sgpr0_sgpr1
%func_ptr:_(p4) = G_LOAD %g_ptr(p4) :: (load (p4))
- $sgpr2_sgpr3 = G_SI_CALL %func_ptr, 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0
+ %ret_addr:_(s64) = G_SI_CALL %func_ptr, 0, csr_amdgpu, implicit $sgpr4, implicit $sgpr5, implicit-def $vgpr0
+ $sgpr30_sgpr31 = COPY %ret_addr
ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- S_SETPC_B64_return undef $sgpr2_sgpr3
+ S_SETPC_B64_return undef $sgpr30_sgpr31
...
diff --git a/llvm/test/CodeGen/AMDGPU/irtranslator-whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/irtranslator-whole-wave-functions.ll
index 38610d8b4c410..66e528c5b6247 100644
--- a/llvm/test/CodeGen/AMDGPU/irtranslator-whole-wave-functions.ll
+++ b/llvm/test/CodeGen/AMDGPU/irtranslator-whole-wave-functions.ll
@@ -118,7 +118,8 @@ define amdgpu_cs void @call(i32 %x, ptr %p) {
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee
; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV1]](p0), @callee, csr_amdgpu_si_gfx, implicit $vgpr0, implicit-def $vgpr0
+ ; CHECK-NEXT: [[SI_CALL:%[0-9]+]]:_(s64) = G_SI_CALL [[GV1]](p0), @callee, csr_amdgpu_si_gfx, implicit $vgpr0, implicit-def $vgpr0
+ ; CHECK-NEXT: $sgpr30_sgpr31 = COPY [[SI_CALL]](s64)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: G_STORE [[COPY3]](s32), [[MV]](p0) :: (store (s32) into %ir.p)
More information about the llvm-commits
mailing list