[llvm] 3a20597 - [amdgpu] Implement lds kernel id intrinsic
Jon Chesterfield via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 19 09:46:32 PDT 2022
Author: Jon Chesterfield
Date: 2022-07-19T17:46:19+01:00
New Revision: 3a20597776a5d2920e511d81653b4d2b6ca0c855
URL: https://github.com/llvm/llvm-project/commit/3a20597776a5d2920e511d81653b4d2b6ca0c855
DIFF: https://github.com/llvm/llvm-project/commit/3a20597776a5d2920e511d81653b4d2b6ca0c855.diff
LOG: [amdgpu] Implement lds kernel id intrinsic
Implement an intrinsic for use lowering LDS variables to different
addresses from different kernels. This will allow kernels that cannot
reach an LDS variable to avoid wasting space for it.
There are a number of implicit arguments accessed by intrinsic already
so this implementation closely follows the existing handling. It is slightly
novel in that this SGPR is written by the kernel prologue.
It is necessary in the general case to put variables at different addresses
such that they can be compactly allocated and thus necessary for an
indirect function call to have some means of determining where a
given variable was allocated. Claiming an arbitrary SGPR into which
an integer can be written by the kernel, in this implementation based
on metadata associated with that kernel, which is then passed on to
indirect call sites is sufficient to determine the variable address.
The intent is to emit a __const array of LDS addresses and index into it.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D125060
Added:
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.kernel.id.ll
Modified:
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.h
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll
llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
llvm/test/CodeGen/AMDGPU/indirect-call.ll
llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 3871175351207..93925a84c8e8a 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -167,6 +167,10 @@ def int_amdgcn_dispatch_id :
ClangBuiltin<"__builtin_amdgcn_dispatch_id">,
Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
+// For internal use. Coordinates LDS lowering between IR transform and backend.
+def int_amdgcn_lds_kernel_id :
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
+
def int_amdgcn_implicit_buffer_ptr :
ClangBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
index d28f38e424306..d361e33995cf4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp
@@ -74,6 +74,7 @@ void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
<< " WorkGroupIDY: " << FI.second.WorkGroupIDY
<< " WorkGroupIDZ: " << FI.second.WorkGroupIDZ
<< " WorkGroupInfo: " << FI.second.WorkGroupInfo
+ << " LDSKernelId: " << FI.second.LDSKernelId
<< " PrivateSegmentWaveByteOffset: "
<< FI.second.PrivateSegmentWaveByteOffset
<< " ImplicitBufferPtr: " << FI.second.ImplicitBufferPtr
@@ -107,6 +108,9 @@ AMDGPUFunctionArgInfo::getPreloadedValue(
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
return std::make_tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
+ case AMDGPUFunctionArgInfo::LDS_KERNEL_ID:
+ return std::make_tuple(LDSKernelId ? &LDSKernelId : nullptr,
+ &AMDGPU::SGPR_32RegClass, LLT::scalar(32));
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
return std::make_tuple(
PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
@@ -162,6 +166,7 @@ constexpr AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() {
AI.WorkGroupIDX = ArgDescriptor::createRegister(AMDGPU::SGPR12);
AI.WorkGroupIDY = ArgDescriptor::createRegister(AMDGPU::SGPR13);
AI.WorkGroupIDZ = ArgDescriptor::createRegister(AMDGPU::SGPR14);
+ AI.LDSKernelId = ArgDescriptor::createRegister(AMDGPU::SGPR15);
const unsigned Mask = 0x3ff;
AI.WorkItemIDX = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
index e9ed45d8cd141..f595e469f9984 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h
@@ -103,6 +103,7 @@ struct AMDGPUFunctionArgInfo {
KERNARG_SEGMENT_PTR = 3,
DISPATCH_ID = 4,
FLAT_SCRATCH_INIT = 5,
+ LDS_KERNEL_ID = 6, // LLVM internal, not part of the ABI
WORKGROUP_ID_X = 10,
WORKGROUP_ID_Y = 11,
WORKGROUP_ID_Z = 12,
@@ -128,6 +129,7 @@ struct AMDGPUFunctionArgInfo {
ArgDescriptor DispatchID;
ArgDescriptor FlatScratchInit;
ArgDescriptor PrivateSegmentSize;
+ ArgDescriptor LDSKernelId;
// System SGPRs in kernels.
ArgDescriptor WorkGroupIDX;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
index 0a2cf38742457..c7a060c5db5b7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
@@ -27,5 +27,6 @@ AMDGPU_ATTRIBUTE(WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z")
AMDGPU_ATTRIBUTE(WORKITEM_ID_X, "amdgpu-no-workitem-id-x")
AMDGPU_ATTRIBUTE(WORKITEM_ID_Y, "amdgpu-no-workitem-id-y")
AMDGPU_ATTRIBUTE(WORKITEM_ID_Z, "amdgpu-no-workitem-id-z")
+AMDGPU_ATTRIBUTE(LDS_KERNEL_ID, "amdgpu-no-lds-kernel-id")
#undef AMDGPU_ATTRIBUTE
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 8de0d7e6bff13..82bf7ab1376f7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -72,6 +72,8 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
case Intrinsic::amdgcn_workgroup_id_z:
case Intrinsic::r600_read_tgid_z:
return WORKGROUP_ID_Z;
+ case Intrinsic::amdgcn_lds_kernel_id:
+ return LDS_KERNEL_ID;
case Intrinsic::amdgcn_dispatch_ptr:
return DISPATCH_PTR;
case Intrinsic::amdgcn_dispatch_id:
@@ -457,6 +459,10 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
removeAssumedBits(QUEUE_PTR);
}
+ if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
+ removeAssumedBits(LDS_KERNEL_ID);
+ }
+
return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
: ChangeStatus::UNCHANGED;
}
@@ -591,6 +597,16 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
UsedAssumedInformation);
}
+
+ bool funcRetrievesLDSKernelId(Attributor &A) {
+ auto DoesNotRetrieve = [&](Instruction &I) {
+ auto &Call = cast<CallBase>(I);
+ return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
+ };
+ bool UsedAssumedInformation = false;
+ return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
+ UsedAssumedInformation);
+ }
};
AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index fd812eb676ef6..484e195c055b3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -764,7 +764,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
AMDGPUFunctionArgInfo::DISPATCH_ID,
AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
- AMDGPUFunctionArgInfo::WORKGROUP_ID_Z
+ AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,
+ AMDGPUFunctionArgInfo::LDS_KERNEL_ID,
};
static constexpr StringLiteral ImplicitAttrNames[] = {
@@ -774,7 +775,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
"amdgpu-no-dispatch-id",
"amdgpu-no-workgroup-id-x",
"amdgpu-no-workgroup-id-y",
- "amdgpu-no-workgroup-id-z"
+ "amdgpu-no-workgroup-id-z",
+ "amdgpu-no-lds-kernel-id",
};
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -810,6 +812,14 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
LI->loadInputValue(InputReg, MIRBuilder, IncomingArg, ArgRC, ArgTy);
} else if (InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR) {
LI->getImplicitArgPtr(InputReg, MRI, MIRBuilder);
+ } else if (InputID == AMDGPUFunctionArgInfo::LDS_KERNEL_ID) {
+ Optional<uint32_t> Id =
+ AMDGPUMachineFunction::getLDSKernelIdMetadata(MF.getFunction());
+ if (Id.hasValue()) {
+ MIRBuilder.buildConstant(InputReg, Id.getValue());
+ } else {
+ MIRBuilder.buildUndef(InputReg);
+ }
} else {
// We may have proven the input wasn't needed, although the ABI is
// requiring it. We just need to allocate the register appropriately.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 01a3e78ea48cd..7262b78e426cb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4197,6 +4197,35 @@ bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
return true;
}
+bool AMDGPULegalizerInfo::getLDSKernelId(Register DstReg,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ Function &F = B.getMF().getFunction();
+ Optional<uint32_t> KnownSize =
+ AMDGPUMachineFunction::getLDSKernelIdMetadata(F);
+ if (KnownSize.hasValue())
+ B.buildConstant(DstReg, KnownSize.getValue());
+ return false;
+}
+
+bool AMDGPULegalizerInfo::legalizeLDSKernelId(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+
+ const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
+ if (!MFI->isEntryFunction()) {
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::LDS_KERNEL_ID);
+ }
+
+ Register DstReg = MI.getOperand(0).getReg();
+ if (!getLDSKernelId(DstReg, MRI, B))
+ return false;
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B,
@@ -5636,6 +5665,9 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_workgroup_id_z:
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+ case Intrinsic::amdgcn_lds_kernel_id:
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::LDS_KERNEL_ID);
case Intrinsic::amdgcn_dispatch_ptr:
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::DISPATCH_PTR);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index cee533aa34ecc..5e8111e22aadc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -155,6 +155,13 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+
+ bool getLDSKernelId(Register DstReg, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+
+ bool legalizeLDSKernelId(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+
bool legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, unsigned AddrSpace) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index b461c3c4bfdcd..f5e12fd960d0b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -11,6 +11,7 @@
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Constants.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -101,6 +102,21 @@ void AMDGPUMachineFunction::allocateModuleLDSGlobal(const Function &F) {
}
}
+Optional<uint32_t>
+AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
+ auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
+ if (MD && MD->getNumOperands() == 1) {
+ ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0));
+ if (KnownSize) {
+ uint64_t V = KnownSize->getZExtValue();
+ if (V <= UINT32_MAX) {
+ return V;
+ }
+ }
+ }
+ return {};
+}
+
void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
const GlobalVariable &GV) {
assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index df62c2314617a..97db8b7eb8d6b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -11,11 +11,12 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Function.h"
namespace llvm {
@@ -104,6 +105,8 @@ class AMDGPUMachineFunction : public MachineFunctionInfo {
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV);
void allocateModuleLDSGlobal(const Function &F);
+ static Optional<uint32_t> getLDSKernelIdMetadata(const Function &F);
+
Align getDynLDSAlign() const { return DynLDSAlign; }
void setDynLDSAlign(const DataLayout &DL, const GlobalVariable &GV);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index e5d2e16f32388..cf4826d81b4bb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -736,13 +736,18 @@ static unsigned getMaxNumPreloadedSGPRs() {
2 + // dispatch ID
2 + // flat scratch init
2; // Implicit buffer ptr
+
// Max number of system SGPRs
unsigned MaxSystemSGPRs = 1 + // WorkGroupIDX
1 + // WorkGroupIDY
1 + // WorkGroupIDZ
1 + // WorkGroupInfo
1; // private segment wave byte offset
- return MaxUserSGPRs + MaxSystemSGPRs;
+
+ // Max number of synthetic SGPRs
+ unsigned SyntheticSGPRs = 1; // LDSKernelId
+
+ return MaxUserSGPRs + MaxSystemSGPRs + SyntheticSGPRs;
}
unsigned GCNSubtarget::getMaxNumSGPRs(const Function &F) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 971e447237580..dca9268673001 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1584,6 +1584,9 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,
AMDGPU::SGPR_32RegClass,
MFI->ArgInfo.PrivateSegmentSize, 0, 0) ||
+ parseAndCheckArgument(YamlMFI.ArgInfo->LDSKernelId,
+ AMDGPU::SGPR_32RegClass,
+ MFI->ArgInfo.LDSKernelId, 0, 1) ||
parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,
AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,
0, 1) ||
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index d16da2a8b86bd..35129a5338ba8 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1664,6 +1664,17 @@ SDValue SITargetLowering::getImplicitArgPtr(SelectionDAG &DAG,
return lowerKernArgParameterPtr(DAG, SL, DAG.getEntryNode(), Offset);
}
+SDValue SITargetLowering::getLDSKernelId(SelectionDAG &DAG,
+ const SDLoc &SL) const {
+
+ Function &F = DAG.getMachineFunction().getFunction();
+ Optional<uint32_t> KnownSize =
+ AMDGPUMachineFunction::getLDSKernelIdMetadata(F);
+ if (KnownSize.hasValue())
+ return DAG.getConstant(KnownSize.getValue(), SL, MVT::i32);
+ return SDValue();
+}
+
SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Val,
bool Signed,
@@ -2049,6 +2060,9 @@ void SITargetLowering::allocateSpecialInputSGPRs(
if (Info.hasWorkGroupIDZ())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDZ);
+
+ if (Info.hasLDSKernelId())
+ allocateSGPR32Input(CCInfo, ArgInfo.LDSKernelId);
}
// Allocate special inputs passed in user SGPRs.
@@ -2102,6 +2116,12 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
CCInfo.AllocateReg(FlatScratchInitReg);
}
+ if (Info.hasLDSKernelId()) {
+ Register Reg = Info.addLDSKernelId();
+ MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
+
// TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
// these from the dispatch pointer.
}
@@ -2347,8 +2367,8 @@ SDValue SITargetLowering::LowerFormalArguments(
(!Info->hasFlatScratchInit() || Subtarget->enableFlatScratch()) &&
!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
!Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
- !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() &&
- !Info->hasWorkItemIDZ());
+ !Info->hasLDSKernelId() && !Info->hasWorkItemIDX() &&
+ !Info->hasWorkItemIDY() && !Info->hasWorkItemIDZ());
}
if (CallConv == CallingConv::AMDGPU_PS) {
@@ -2762,7 +2782,8 @@ void SITargetLowering::passSpecialInputs(
{AMDGPUFunctionArgInfo::DISPATCH_ID, "amdgpu-no-dispatch-id"},
{AMDGPUFunctionArgInfo::WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"},
{AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,"amdgpu-no-workgroup-id-y"},
- {AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"}
+ {AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"},
+ {AMDGPUFunctionArgInfo::LDS_KERNEL_ID,"amdgpu-no-lds-kernel-id"},
};
for (auto Attr : ImplicitAttrs) {
@@ -2798,6 +2819,13 @@ void SITargetLowering::passSpecialInputs(
// The implicit arg ptr is special because it doesn't have a corresponding
// input for kernels, and is computed from the kernarg segment pointer.
InputReg = getImplicitArgPtr(DAG, DL);
+ } else if (InputID == AMDGPUFunctionArgInfo::LDS_KERNEL_ID) {
+ Optional<uint32_t> Id = AMDGPUMachineFunction::getLDSKernelIdMetadata(F);
+ if (Id.hasValue()) {
+ InputReg = DAG.getConstant(Id.getValue(), DL, ArgVT);
+ } else {
+ InputReg = DAG.getUNDEF(ArgVT);
+ }
} else {
// We may have proven the input wasn't needed, although the ABI is
// requiring it. We just need to allocate the register appropriately.
@@ -6887,6 +6915,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::amdgcn_workgroup_id_z:
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
+ case Intrinsic::amdgcn_lds_kernel_id: {
+ if (MFI->isEntryFunction())
+ return getLDSKernelId(DAG, DL);
+ return getPreloadedValue(DAG, *MFI, VT,
+ AMDGPUFunctionArgInfo::LDS_KERNEL_ID);
+ }
case Intrinsic::amdgcn_workitem_id_x:
return lowerWorkitemID(DAG, Op, 0, MFI->getArgInfo().WorkItemIDX);
case Intrinsic::amdgcn_workitem_id_y:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 4fbccf0c58502..d1fecc1afc7fe 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -48,6 +48,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
SDValue Chain, uint64_t Offset) const;
SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
+ SDValue getLDSKernelId(SelectionDAG &DAG, const SDLoc &SL) const;
SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Chain,
uint64_t Offset, Align Alignment,
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 0504c59ebd9e2..9176e85568ee0 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -44,6 +44,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkGroupIDY(false),
WorkGroupIDZ(false),
WorkGroupInfo(false),
+ LDSKernelId(false),
PrivateSegmentWaveByteOffset(false),
WorkItemIDX(false),
WorkItemIDY(false),
@@ -143,6 +144,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
DispatchID = true;
+
+ if (!IsKernel && !F.hasFnAttribute("amdgpu-no-lds-kernel-id"))
+ LDSKernelId = true;
}
// FIXME: This attribute is a hack, we just need an analysis on the function
@@ -261,6 +265,12 @@ Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI)
return ArgInfo.ImplicitBufferPtr.getRegister();
}
+Register SIMachineFunctionInfo::addLDSKernelId() {
+ ArgInfo.LDSKernelId = ArgDescriptor::createRegister(getNextUserSGPR());
+ NumUserSGPRs += 1;
+ return ArgInfo.LDSKernelId.getRegister();
+}
+
bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
MCPhysReg Reg) {
for (unsigned I = 0; CSRegs[I]; ++I) {
@@ -561,6 +571,7 @@ convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
+ Any |= convertArg(AI.LDSKernelId, ArgInfo.LDSKernelId);
Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index bebb13cbf09f4..5105587617fd1 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -191,6 +191,7 @@ struct SIArgumentInfo {
Optional<SIArgument> WorkGroupIDY;
Optional<SIArgument> WorkGroupIDZ;
Optional<SIArgument> WorkGroupInfo;
+ Optional<SIArgument> LDSKernelId;
Optional<SIArgument> PrivateSegmentWaveByteOffset;
Optional<SIArgument> ImplicitArgPtr;
@@ -215,6 +216,7 @@ template <> struct MappingTraits<SIArgumentInfo> {
YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
+ YamlIO.mapOptional("LDSKernelId", AI.LDSKernelId);
YamlIO.mapOptional("privateSegmentWaveByteOffset",
AI.PrivateSegmentWaveByteOffset);
@@ -418,6 +420,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
bool WorkGroupIDY : 1;
bool WorkGroupIDZ : 1;
bool WorkGroupInfo : 1;
+ bool LDSKernelId : 1;
bool PrivateSegmentWaveByteOffset : 1;
bool WorkItemIDX : 1; // Always initialized.
@@ -608,6 +611,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
Register addDispatchID(const SIRegisterInfo &TRI);
Register addFlatScratchInit(const SIRegisterInfo &TRI);
Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
+ Register addLDSKernelId();
/// Increment user SGPRs used for padding the argument list only.
Register addReservedUserSGPR() {
@@ -705,6 +709,8 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
return WorkGroupInfo;
}
+ bool hasLDSKernelId() const { return LDSKernelId; }
+
bool hasPrivateSegmentWaveByteOffset() const {
return PrivateSegmentWaveByteOffset;
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
index 1aa25c4b9c372..54fc4ddd72ff6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
@@ -251,10 +251,10 @@ define void @func_caller_stack() {
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12
; MUBUF-NEXT: v_mov_b32_e32 v0, 12
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
+; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
; MUBUF-NEXT: s_getpc_b64 s[4:5]
; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_v16i32_v16i32_v4i32 at rel32@lo+4
; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32_v16i32_v4i32 at rel32@hi+12
-; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5]
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
@@ -284,10 +284,10 @@ define void @func_caller_stack() {
; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:12
; FLATSCR-NEXT: v_mov_b32_e32 v0, 12
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
+; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:16
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_v16i32_v16i32_v4i32 at rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_v16i32_v4i32 at rel32@hi+12
-; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:16
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
index 34007a75bb0f8..649d9a4d5966d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
@@ -26,6 +26,7 @@ define amdgpu_kernel void @call_debug_loc() {
; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !6
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !6
; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !6
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !6
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10
; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[COPY1]], implicit $exec, debug-location !6
@@ -42,10 +43,11 @@ define amdgpu_kernel void @call_debug_loc() {
; CHECK-NEXT: $sgpr12 = COPY [[COPY13]], debug-location !6
; CHECK-NEXT: $sgpr13 = COPY [[COPY14]], debug-location !6
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]], debug-location !6
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]], debug-location !6
; CHECK-NEXT: $vgpr31 = COPY [[V_OR3_B32_e64_]], debug-location !6
; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def $scc, debug-location !6
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0, debug-location !6 :: (dereferenceable invariant load (p0) from got, addrspace 4)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[S_LOAD_DWORDX2_IMM]], @callee, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, debug-location !6
+ ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[S_LOAD_DWORDX2_IMM]], @callee, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, debug-location !6
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, debug-location !6
; CHECK-NEXT: S_ENDPGM 0
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
index 001589cd1046f..aa19fc8b26fca 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
@@ -23,41 +23,44 @@ declare align 8 i8 addrspace(1)* @returns_ptr_align8()
define void @call_result_align_1() {
; CHECK-LABEL: name: call_result_align_1
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
- ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY17]](s32), [[COPY18]](s32)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: G_STORE [[C]](s8), [[MV]](p1) :: (store (s8) into %ir.ptr, addrspace 1)
; CHECK-NEXT: SI_RETURN
@@ -69,41 +72,44 @@ define void @call_result_align_1() {
define void @call_result_align_8() {
; CHECK-LABEL: name: call_result_align_8
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
- ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY17]](s32), [[COPY18]](s32)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p1) = G_ASSERT_ALIGN [[MV]], 8
; CHECK-NEXT: G_STORE [[C]](s8), [[ASSERT_ALIGN]](p1) :: (store (s8) into %ir.ptr, align 8, addrspace 1)
@@ -116,41 +122,44 @@ define void @call_result_align_8() {
define void @declaration_result_align_8() {
; CHECK-LABEL: name: declaration_result_align_8
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr_align8
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
- ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr_align8, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY17]](s32), [[COPY18]](s32)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr_align8, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: [[ASSERT_ALIGN:%[0-9]+]]:_(p1) = G_ASSERT_ALIGN [[MV]], 8
; CHECK-NEXT: G_STORE [[C]](s8), [[ASSERT_ALIGN]](p1) :: (store (s8) into %ir.ptr, align 8, addrspace 1)
@@ -163,36 +172,39 @@ define void @declaration_result_align_8() {
define i8 addrspace(1)* @tail_call_assert_align() {
; CHECK-LABEL: name: tail_call_assert_align
; CHECK: bb.1.entry:
- ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @returns_ptr_align8
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
- ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @returns_ptr_align8, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
+ ; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @returns_ptr_align8, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
entry:
%call = tail call i8 addrspace(1)* @returns_ptr_align8()
ret i8 addrspace(1)* %call
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
index 7ea242244c43f..545580f33c74f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
@@ -29,6 +29,7 @@ define amdgpu_kernel void @kernel_call_no_workitem_ids() {
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4)
@@ -38,7 +39,8 @@ define amdgpu_kernel void @kernel_call_no_workitem_ids() {
; CHECK-NEXT: $sgpr12 = COPY [[COPY11]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY12]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY13]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @extern() "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
@@ -65,6 +67,7 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY9]], [[C]](s64)
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -80,8 +83,9 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
@@ -102,6 +106,7 @@ define amdgpu_kernel void @kernel_call_no_other_sgprs() {
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p4) = COPY [[COPY3]](p4)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY4]], [[C]](s64)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -114,8 +119,9 @@ define amdgpu_kernel void @kernel_call_no_other_sgprs() {
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY8]](<4 x s32>)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
@@ -125,34 +131,37 @@ define amdgpu_kernel void @kernel_call_no_other_sgprs() {
define void @func_call_no_workitem_ids() {
; CHECK-LABEL: name: func_call_no_workitem_ids
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4)
- ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY11]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY12]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY13]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[COPY15]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @extern() "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
@@ -162,28 +171,31 @@ define void @func_call_no_workitem_ids() {
define void @func_call_no_workgroup_ids() {
; CHECK-LABEL: name: func_call_no_workgroup_ids
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p4) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY3]]
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY2]]
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY1]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY10]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY5]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY6]](p4)
- ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY7]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY8]](s64)
- ; CHECK-NEXT: $vgpr31 = COPY [[COPY9]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY7]](p4)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY8]](p4)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64)
+ ; CHECK-NEXT: $sgpr15 = COPY [[COPY10]](s32)
+ ; CHECK-NEXT: $vgpr31 = COPY [[COPY11]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
@@ -193,19 +205,22 @@ define void @func_call_no_workgroup_ids() {
define void @func_call_no_other_sgprs() {
; CHECK-LABEL: name: func_call_no_other_sgprs
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $vgpr31, $sgpr8_sgpr9
+ ; CHECK-NEXT: liveins: $sgpr15, $vgpr31, $sgpr8_sgpr9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY1]]
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
- ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY2]](p4)
- ; CHECK-NEXT: $vgpr31 = COPY [[COPY3]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY3]](p4)
+ ; CHECK-NEXT: $sgpr15 = COPY [[COPY4]](s32)
+ ; CHECK-NEXT: $vgpr31 = COPY [[COPY5]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
index 0447b6cd064f6..cfc2fc0dc439a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
@@ -36,6 +36,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -55,8 +56,9 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
; GFX900-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
; GFX908-LABEL: name: test_call_external_void_func_i32
@@ -86,6 +88,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -105,8 +108,9 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
; GFX908-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -116,76 +120,82 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
define void @test_func_call_external_void_func_i32() #0 {
; GFX900-LABEL: name: test_func_call_external_void_func_i32
; GFX900: bb.1 (%ir-block.0):
- ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
- ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX900-NEXT: $vgpr0 = COPY [[C]](s32)
- ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
- ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
- ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
- ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4)
- ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32)
- ; GFX900-NEXT: $vgpr31 = COPY [[COPY15]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+ ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+ ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32)
+ ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32)
+ ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; GFX900-NEXT: $sgpr15 = COPY [[COPY16]](s32)
+ ; GFX900-NEXT: $vgpr31 = COPY [[COPY17]](s32)
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: SI_RETURN
; GFX908-LABEL: name: test_func_call_external_void_func_i32
; GFX908: bb.1 (%ir-block.0):
- ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
- ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX908-NEXT: $vgpr0 = COPY [[C]](s32)
- ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
- ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
- ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
- ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4)
- ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32)
- ; GFX908-NEXT: $vgpr31 = COPY [[COPY15]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+ ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+ ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32)
+ ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32)
+ ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; GFX908-NEXT: $sgpr15 = COPY [[COPY16]](s32)
+ ; GFX908-NEXT: $vgpr31 = COPY [[COPY17]](s32)
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: SI_RETURN
call void @external_void_func_i32(i32 99)
@@ -223,6 +233,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -277,8 +288,9 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
; GFX908-LABEL: name: test_call_external_void_func_v32i32
@@ -309,6 +321,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -363,8 +376,9 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX908-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_v32i32(<32 x i32> zeroinitializer)
@@ -374,79 +388,81 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-LABEL: name: test_func_call_external_void_func_v32i32
; GFX900: bb.1 (%ir-block.1):
- ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX900-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32)
+ ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GFX900-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX900-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX900-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX900-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX900-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
- ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX900-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
+ ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX900-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
; GFX900-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16)
- ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX900-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
+ ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX900-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
; GFX900-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16)
- ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX900-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
+ ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX900-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
; GFX900-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16)
- ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX900-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
+ ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX900-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
; GFX900-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16)
- ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX900-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
+ ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX900-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32)
; GFX900-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC10]](s16)
- ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX900-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32)
+ ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX900-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32)
; GFX900-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC12]](s16)
- ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX900-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32)
+ ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX900-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32)
; GFX900-NEXT: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC14]](s16)
- ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX900-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32)
+ ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX900-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32)
; GFX900-NEXT: [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC16]](s16)
- ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX900-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32)
+ ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX900-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32)
; GFX900-NEXT: [[TRUNC19:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC18]](s16)
- ; GFX900-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX900-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32)
+ ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX900-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
; GFX900-NEXT: [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC20]](s16)
- ; GFX900-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX900-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+ ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX900-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32)
; GFX900-NEXT: [[TRUNC23:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC22]](s16)
- ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GFX900-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32)
+ ; GFX900-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GFX900-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GFX900-NEXT: [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC24]](s16)
- ; GFX900-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GFX900-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
+ ; GFX900-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GFX900-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32)
; GFX900-NEXT: [[TRUNC27:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC26]](s16)
- ; GFX900-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GFX900-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32)
+ ; GFX900-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GFX900-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32)
; GFX900-NEXT: [[TRUNC29:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC28]](s16)
- ; GFX900-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GFX900-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32)
+ ; GFX900-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GFX900-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32)
; GFX900-NEXT: [[TRUNC31:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC30]](s16)
- ; GFX900-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GFX900-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32)
+ ; GFX900-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GFX900-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32)
; GFX900-NEXT: [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16)
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX900-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
- ; GFX900-NEXT: [[COPY25:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GFX900-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GFX900-NEXT: [[COPY28:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GFX900-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX900-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GFX900-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GFX900-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX900-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GFX900-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; GFX900-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; GFX900-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GFX900-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX900-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX900-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GFX900-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX900-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
; GFX900-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
; GFX900-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -483,94 +499,97 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-NEXT: $vgpr28 = COPY [[UV28]](s32)
; GFX900-NEXT: $vgpr29 = COPY [[UV29]](s32)
; GFX900-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; GFX900-NEXT: [[COPY33:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY33]](<4 x s32>)
- ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY25]](p4)
- ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY26]](p4)
- ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY27]](p4)
- ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY28]](s64)
- ; GFX900-NEXT: $sgpr12 = COPY [[COPY29]](s32)
- ; GFX900-NEXT: $sgpr13 = COPY [[COPY30]](s32)
- ; GFX900-NEXT: $sgpr14 = COPY [[COPY31]](s32)
- ; GFX900-NEXT: $vgpr31 = COPY [[COPY32]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>)
+ ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4)
+ ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4)
+ ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4)
+ ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](s64)
+ ; GFX900-NEXT: $sgpr12 = COPY [[COPY30]](s32)
+ ; GFX900-NEXT: $sgpr13 = COPY [[COPY31]](s32)
+ ; GFX900-NEXT: $sgpr14 = COPY [[COPY32]](s32)
+ ; GFX900-NEXT: $sgpr15 = COPY [[COPY33]](s32)
+ ; GFX900-NEXT: $vgpr31 = COPY [[COPY34]](s32)
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; GFX900-NEXT: SI_RETURN
; GFX908-LABEL: name: test_func_call_external_void_func_v32i32
; GFX908: bb.1 (%ir-block.1):
- ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX908-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32)
+ ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX908-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GFX908-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
- ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX908-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
+ ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX908-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
; GFX908-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16)
- ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX908-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32)
+ ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX908-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
; GFX908-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC4]](s16)
- ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX908-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32)
+ ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX908-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
; GFX908-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC6]](s16)
- ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX908-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32)
+ ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX908-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
; GFX908-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16)
- ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX908-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32)
+ ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX908-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32)
; GFX908-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC10]](s16)
- ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX908-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32)
+ ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX908-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32)
; GFX908-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC12]](s16)
- ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX908-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32)
+ ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX908-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32)
; GFX908-NEXT: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC14]](s16)
- ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX908-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32)
+ ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX908-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32)
; GFX908-NEXT: [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC16]](s16)
- ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX908-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32)
+ ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX908-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32)
; GFX908-NEXT: [[TRUNC19:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC18]](s16)
- ; GFX908-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX908-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32)
+ ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX908-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
; GFX908-NEXT: [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC20]](s16)
- ; GFX908-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX908-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
+ ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX908-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32)
; GFX908-NEXT: [[TRUNC23:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC22]](s16)
- ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GFX908-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32)
+ ; GFX908-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GFX908-NEXT: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GFX908-NEXT: [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC24]](s16)
- ; GFX908-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GFX908-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
+ ; GFX908-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GFX908-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32)
; GFX908-NEXT: [[TRUNC27:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC26]](s16)
- ; GFX908-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GFX908-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[COPY22]](s32)
+ ; GFX908-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GFX908-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32)
; GFX908-NEXT: [[TRUNC29:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC28]](s16)
- ; GFX908-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GFX908-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[COPY23]](s32)
+ ; GFX908-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GFX908-NEXT: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32)
; GFX908-NEXT: [[TRUNC31:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC30]](s16)
- ; GFX908-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GFX908-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32)
+ ; GFX908-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GFX908-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32)
; GFX908-NEXT: [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16)
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX908-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
- ; GFX908-NEXT: [[COPY25:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GFX908-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GFX908-NEXT: [[COPY28:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GFX908-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GFX908-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GFX908-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GFX908-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GFX908-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GFX908-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; GFX908-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; GFX908-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GFX908-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GFX908-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX908-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GFX908-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX908-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
; GFX908-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
; GFX908-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -607,17 +626,18 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX908-NEXT: $vgpr28 = COPY [[UV28]](s32)
; GFX908-NEXT: $vgpr29 = COPY [[UV29]](s32)
; GFX908-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; GFX908-NEXT: [[COPY33:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY33]](<4 x s32>)
- ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY25]](p4)
- ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY26]](p4)
- ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY27]](p4)
- ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY28]](s64)
- ; GFX908-NEXT: $sgpr12 = COPY [[COPY29]](s32)
- ; GFX908-NEXT: $sgpr13 = COPY [[COPY30]](s32)
- ; GFX908-NEXT: $sgpr14 = COPY [[COPY31]](s32)
- ; GFX908-NEXT: $vgpr31 = COPY [[COPY32]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>)
+ ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4)
+ ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4)
+ ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4)
+ ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](s64)
+ ; GFX908-NEXT: $sgpr12 = COPY [[COPY30]](s32)
+ ; GFX908-NEXT: $sgpr13 = COPY [[COPY31]](s32)
+ ; GFX908-NEXT: $sgpr14 = COPY [[COPY32]](s32)
+ ; GFX908-NEXT: $sgpr15 = COPY [[COPY33]](s32)
+ ; GFX908-NEXT: $vgpr31 = COPY [[COPY34]](s32)
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; GFX908-NEXT: SI_RETURN
call void @external_void_func_v32i32(<32 x i32> zeroinitializer)
@@ -649,6 +669,7 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX900-NEXT: $vgpr0 = COPY [[C]](s32)
; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
@@ -660,8 +681,9 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32)
; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[COPY15]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
; GFX908-LABEL: name: test_only_workitem_id_x
@@ -688,6 +710,7 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX908-NEXT: $vgpr0 = COPY [[C]](s32)
; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
@@ -699,8 +722,9 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32)
; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[COPY15]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -732,6 +756,7 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -747,8 +772,9 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32)
; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
; GFX908-LABEL: name: test_only_workitem_id_y
@@ -775,6 +801,7 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -790,8 +817,9 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32)
; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -823,6 +851,7 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
@@ -838,8 +867,9 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32)
; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
; GFX908-LABEL: name: test_only_workitem_id_z
@@ -866,6 +896,7 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
@@ -881,8 +912,9 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32)
; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -915,6 +947,7 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -930,8 +963,9 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32)
; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32)
; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
; GFX908-LABEL: name: test_only_workitem_id_xy
@@ -959,6 +993,7 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -974,8 +1009,9 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32)
; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32)
; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -1008,6 +1044,7 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GFX900-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1027,8 +1064,9 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32)
; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32)
; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
; GFX908-LABEL: name: test_only_workitem_id_yz
@@ -1056,6 +1094,7 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GFX908-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1075,8 +1114,9 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32)
; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32)
; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -1109,6 +1149,7 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !
; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX900-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX900-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
@@ -1124,8 +1165,9 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !
; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32)
; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32)
; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; GFX900-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
; GFX908-LABEL: name: test_only_workitem_id_xz
@@ -1153,6 +1195,7 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !
; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GFX908-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX908-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
@@ -1168,8 +1211,9 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !
; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32)
; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32)
; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; GFX908-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 346f42763be4c..bcb82d07ccdee 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -96,6 +96,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)*
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -115,8 +116,9 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)*
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY21]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1)
@@ -177,6 +179,7 @@ define amdgpu_kernel void @test_call_external_i1_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -195,8 +198,9 @@ define amdgpu_kernel void @test_call_external_i1_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -253,6 +257,7 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -271,8 +276,9 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -312,6 +318,7 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -330,8 +337,9 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -371,6 +379,7 @@ define amdgpu_kernel void @test_call_external_i8_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -389,8 +398,9 @@ define amdgpu_kernel void @test_call_external_i8_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
@@ -449,6 +459,7 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -467,8 +478,9 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
@@ -509,6 +521,7 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -527,8 +540,9 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
@@ -569,6 +583,7 @@ define amdgpu_kernel void @test_call_external_i16_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -587,8 +602,9 @@ define amdgpu_kernel void @test_call_external_i16_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -626,6 +642,7 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -644,8 +661,9 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -685,6 +703,7 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -703,8 +722,9 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -744,6 +764,7 @@ define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -762,8 +783,9 @@ define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
@@ -818,6 +840,7 @@ define amdgpu_kernel void @test_call_external_i48_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -836,8 +859,9 @@ define amdgpu_kernel void @test_call_external_i48_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
@@ -877,6 +901,7 @@ define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -895,8 +920,9 @@ define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_zeroext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
@@ -938,6 +964,7 @@ define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -956,8 +983,9 @@ define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_signext_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
@@ -999,6 +1027,7 @@ define amdgpu_kernel void @test_call_external_i64_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1017,8 +1046,9 @@ define amdgpu_kernel void @test_call_external_i64_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
@@ -1057,6 +1087,7 @@ define amdgpu_kernel void @test_call_external_p1_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1075,8 +1106,9 @@ define amdgpu_kernel void @test_call_external_p1_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
@@ -1115,6 +1147,7 @@ define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1133,8 +1166,9 @@ define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2p1_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1177,6 +1211,7 @@ define amdgpu_kernel void @test_call_external_p3_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1195,8 +1230,9 @@ define amdgpu_kernel void @test_call_external_p3_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY21]](p3), [[DEF]](p3) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(3)* undef`, addrspace 3)
@@ -1233,6 +1269,7 @@ define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1251,8 +1288,9 @@ define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(p3) = COPY $vgpr1
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY21]](p3), [[COPY22]](p3)
@@ -1291,6 +1329,7 @@ define amdgpu_kernel void @test_call_external_f16_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1309,8 +1348,9 @@ define amdgpu_kernel void @test_call_external_f16_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1348,6 +1388,7 @@ define amdgpu_kernel void @test_call_external_f32_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1366,8 +1407,9 @@ define amdgpu_kernel void @test_call_external_f32_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `float addrspace(1)* undef`, addrspace 1)
@@ -1404,6 +1446,7 @@ define amdgpu_kernel void @test_call_external_f64_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1422,8 +1465,9 @@ define amdgpu_kernel void @test_call_external_f64_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
@@ -1462,6 +1506,7 @@ define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1480,8 +1525,9 @@ define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1524,6 +1570,7 @@ define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1542,8 +1589,9 @@ define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32)
@@ -1582,6 +1630,7 @@ define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1600,8 +1649,9 @@ define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1641,6 +1691,7 @@ define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1659,8 +1710,9 @@ define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1701,6 +1753,7 @@ define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1719,8 +1772,9 @@ define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v5i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v5i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1762,6 +1816,7 @@ define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1780,8 +1835,9 @@ define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v8i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v8i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1826,6 +1882,7 @@ define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1844,8 +1901,9 @@ define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v16i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v16i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1898,6 +1956,7 @@ define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1916,8 +1975,9 @@ define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1986,6 +2046,7 @@ define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2004,8 +2065,9 @@ define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1)
@@ -2042,6 +2104,7 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2060,8 +2123,9 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
@@ -2102,6 +2166,7 @@ define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2120,8 +2185,9 @@ define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
@@ -2160,6 +2226,7 @@ define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2178,8 +2245,9 @@ define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1)
@@ -2216,6 +2284,7 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2234,8 +2303,9 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
@@ -2276,6 +2346,7 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2294,8 +2365,9 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
@@ -2334,6 +2406,7 @@ define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2352,8 +2425,9 @@ define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -2393,6 +2467,7 @@ define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2411,8 +2486,9 @@ define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v5f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v5f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -2456,6 +2532,7 @@ define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 {
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2474,8 +2551,9 @@ define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -2545,6 +2623,7 @@ define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2563,8 +2642,9 @@ define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_a2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_a2i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2606,6 +2686,7 @@ define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2624,8 +2705,9 @@ define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_a5i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_a5i8_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
@@ -2691,6 +2773,7 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2710,8 +2793,9 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5)
; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
@@ -2757,6 +2841,7 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2776,8 +2861,9 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5)
; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
@@ -2822,6 +2908,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 {
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2841,8 +2928,9 @@ define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 {
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5)
; GCN-NEXT: G_STORE [[LOAD]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1)
@@ -2885,6 +2973,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(<33 x i32>
; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2908,8 +2997,9 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(<33 x i32>
; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5)
; GCN-NEXT: G_STORE [[LOAD2]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
index f808695f0129b..b32e8211f05e2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
@@ -40,6 +40,7 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -64,8 +65,9 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; GCN-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
; GCN-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32)
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load (s8) from %ir.out.gep02, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index 2679c1f067259..0d76e29939d47 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -120,6 +120,7 @@ define amdgpu_kernel void @test_call_external_void_func_void() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -138,8 +139,9 @@ define amdgpu_kernel void @test_call_external_void_func_void() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_void()
@@ -163,37 +165,40 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
define void @test_func_call_external_void_func_void() #0 {
; CHECK-LABEL: name: test_func_call_external_void_func_void
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
- ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @external_void_func_void()
@@ -227,6 +232,7 @@ define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -246,8 +252,9 @@ define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_empty_struct, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_empty_struct, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_empty_struct({} zeroinitializer, i32 23)
@@ -281,6 +288,7 @@ define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -300,8 +308,9 @@ define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_empty_array, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_empty_array, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_empty_array([0 x i8] zeroinitializer, i32 23)
@@ -335,6 +344,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -355,8 +365,9 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_i1(i1 true)
@@ -392,6 +403,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -412,8 +424,9 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i1, i1 addrspace(1)* undef
@@ -450,6 +463,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -470,8 +484,9 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i1, i1 addrspace(1)* undef
@@ -507,6 +522,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -528,8 +544,9 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_i8(i8 123)
@@ -565,6 +582,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -586,8 +604,9 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i8, i8 addrspace(1)* undef
@@ -624,6 +643,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -645,8 +665,9 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i8, i8 addrspace(1)* undef
@@ -681,6 +702,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -701,8 +723,9 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_i16(i16 123)
@@ -738,6 +761,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -758,8 +782,9 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i16, i16 addrspace(1)* undef
@@ -796,6 +821,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -816,8 +842,9 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i16, i16 addrspace(1)* undef
@@ -853,6 +880,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -872,8 +900,9 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -945,6 +974,7 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -966,8 +996,9 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_i64(i64 123)
@@ -1002,6 +1033,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1025,8 +1057,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <2 x i64>, <2 x i64> addrspace(1)* null
@@ -1063,6 +1096,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1086,8 +1120,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>)
@@ -1123,6 +1158,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1145,8 +1181,9 @@ define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i48, i48 addrspace(1)* undef
@@ -1183,6 +1220,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1205,8 +1243,9 @@ define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_signext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_signext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i48, i48 addrspace(1)* undef
@@ -1243,6 +1282,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1265,8 +1305,9 @@ define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_zeroext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_zeroext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i48, i48 addrspace(1)* undef
@@ -1302,6 +1343,7 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(i8* %arg) #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1323,8 +1365,9 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(i8* %arg) #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_p0(i8* %arg)
@@ -1359,6 +1402,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1382,8 +1426,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <2 x i8*>, <2 x i8*> addrspace(1)* null
@@ -1423,6 +1468,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1448,8 +1494,9 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%load = load <2 x i64>, <2 x i64> addrspace(1)* null
@@ -1491,6 +1538,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1518,8 +1566,9 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%load = load <2 x i64>, <2 x i64> addrspace(1)* null
@@ -1555,6 +1604,7 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1575,8 +1625,9 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_f16(half 4.0)
@@ -1610,6 +1661,7 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1629,8 +1681,9 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_f32(float 4.0)
@@ -1666,6 +1719,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1687,8 +1741,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>)
@@ -1725,6 +1780,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1747,8 +1803,9 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>)
@@ -1787,6 +1844,7 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1811,8 +1869,9 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
@@ -1846,6 +1905,7 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1867,8 +1927,9 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_f64(double 4.0)
@@ -1904,6 +1965,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1927,8 +1989,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>)
@@ -1965,6 +2028,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -1990,8 +2054,9 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>)
@@ -2026,6 +2091,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2045,8 +2111,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <2 x i16>, <2 x i16> addrspace(1)* undef
@@ -2082,6 +2149,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2092,8 +2160,8 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16)
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF2]](s16)
; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>)
; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>)
; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>)
@@ -2106,8 +2174,9 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <3 x i16>, <3 x i16> addrspace(1)* undef
@@ -2143,6 +2212,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2153,8 +2223,8 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16)
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF2]](s16)
; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>)
; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>)
; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>)
@@ -2167,8 +2237,9 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <3 x half>, <3 x half> addrspace(1)* undef
@@ -2204,6 +2275,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2225,8 +2297,9 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <4 x i16>, <4 x i16> addrspace(1)* undef
@@ -2265,6 +2338,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2286,8 +2360,9 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>)
@@ -2322,6 +2397,7 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2332,8 +2408,8 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 {
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<5 x s16>)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF1]](s16)
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF2]](s16)
; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>)
; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x s16>)
; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x s16>)
@@ -2347,8 +2423,9 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <5 x i16>, <5 x i16> addrspace(1)* undef
@@ -2384,6 +2461,7 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2394,8 +2472,8 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 {
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<7 x s16>)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF1]](s16)
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF2]](s16)
; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s16>)
; CHECK-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>)
; CHECK-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>)
@@ -2410,8 +2488,9 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v7i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v7i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <7 x i16>, <7 x i16> addrspace(1)* undef
@@ -2447,6 +2526,7 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2457,8 +2537,8 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<63 x s16>)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[DEF1]](s16)
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[DEF2]](s16)
; CHECK-NEXT: [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<64 x s16>)
; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -2504,8 +2584,9 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v63i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v63i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <63 x i16>, <63 x i16> addrspace(1)* undef
@@ -2541,6 +2622,7 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2551,8 +2633,8 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<65 x s16>)
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<66 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16), [[DEF1]](s16)
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<66 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16), [[DEF2]](s16)
; CHECK-NEXT: [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>), [[UV95:%[0-9]+]]:_(<2 x s16>), [[UV96:%[0-9]+]]:_(<2 x s16>), [[UV97:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<66 x s16>)
; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -2601,8 +2683,9 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v65i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v65i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <65 x i16>, <65 x i16> addrspace(1)* undef
@@ -2638,6 +2721,7 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2695,8 +2779,9 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v66i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v66i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <66 x i16>, <66 x i16> addrspace(1)* undef
@@ -2732,6 +2817,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2751,8 +2837,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <2 x half>, <2 x half> addrspace(1)* undef
@@ -2788,6 +2875,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2809,8 +2897,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <2 x i32>, <2 x i32> addrspace(1)* undef
@@ -2847,6 +2936,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2868,8 +2958,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>)
@@ -2907,6 +2998,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2929,8 +3021,9 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>)
@@ -2969,6 +3062,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -2992,8 +3086,9 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6)
@@ -3028,6 +3123,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -3051,8 +3147,9 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <4 x i32>, <4 x i32> addrspace(1)* undef
@@ -3091,6 +3188,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -3114,8 +3212,9 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>)
@@ -3154,6 +3253,7 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -3178,8 +3278,9 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>)
@@ -3215,6 +3316,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -3242,8 +3344,9 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
@@ -3287,6 +3390,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -3314,8 +3418,9 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
@@ -3351,6 +3456,7 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -3386,8 +3492,9 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
@@ -3425,6 +3532,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -3479,8 +3587,9 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
@@ -3521,6 +3630,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -3578,8 +3688,9 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
@@ -3622,6 +3733,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -3687,8 +3799,9 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i8_i8_i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i8_i8_i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 16, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
@@ -3733,6 +3846,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -3793,8 +3907,9 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF2]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_p3_p5, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_p3_p5, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
@@ -3837,6 +3952,7 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -3859,8 +3975,9 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
@@ -3954,6 +4071,7 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -3977,8 +4095,9 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_byval_struct_i8_i32, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_byval_struct_i8_i32, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = alloca { i8, i32 }, align 4, addrspace(5)
@@ -3995,50 +4114,53 @@ declare void @void_func_byval_a3i32_byval_i8_align32([3 x i32] addrspace(5)* byv
define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming0, i8 addrspace(5)* align 32 %incoming1) #0 {
; CHECK-LABEL: name: call_byval_3ai32_byval_i8_align32
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p5) = COPY $vgpr1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 999
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a3i32_byval_i8_align32
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
- ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C2]](s32), 0 :: (dereferenceable store (s96) into stack, align 4, addrspace 5), (dereferenceable load (s96) from %ir.incoming0, align 4, addrspace 5)
+ ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C2]](s32), 0 :: (dereferenceable store (s96) into stack, align 4, addrspace 5), (dereferenceable load (s96) from %ir.incoming0, align 4, addrspace 5)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C3]](s32)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[COPY9]](p5), [[C4]](s32), 0 :: (dereferenceable store (s8) into stack + 32, align 32, addrspace 5), (dereferenceable load (s8) from %ir.incoming1, align 32, addrspace 5)
+ ; CHECK-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[COPY10]](p5), [[C4]](s32), 0 :: (dereferenceable store (s8) into stack + 32, align 32, addrspace 5), (dereferenceable load (s8) from %ir.incoming1, align 32, addrspace 5)
; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
- ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
- ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[COPY18]](s32)
+ ; CHECK-NEXT: $vgpr31 = COPY [[COPY19]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @void_func_byval_a3i32_byval_i8_align32([3 x i32] addrspace(5)* byval([3 x i32]) %incoming0, i8 addrspace(5)* align 32 %incoming1, i32 999)
@@ -4052,43 +4174,46 @@ declare void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i6
define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)* align 256 %incoming_high_align) #0 {
; CHECK-LABEL: name: call_byval_a4i64_align4_higher_source_align
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a4i64_align4
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C1]](s32), 0 :: (dereferenceable store (s256) into stack, align 4, addrspace 5), (dereferenceable load (s256) from %ir.incoming_high_align, align 256, addrspace 5)
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C1]](s32), 0 :: (dereferenceable store (s256) into stack, align 4, addrspace 5), (dereferenceable load (s256) from %ir.incoming_high_align, align 256, addrspace 5)
+ ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[COPY17]](s32)
+ ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 32, implicit-def $scc
; CHECK-NEXT: SI_RETURN
call void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i64]) align 4 %incoming_high_align)
@@ -4124,6 +4249,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -4149,8 +4275,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load <2 x i8> addrspace(1)*, <2 x i8> addrspace(1)* addrspace(4)* undef
@@ -4188,6 +4315,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -4216,8 +4344,9 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load <3 x i8> addrspace(1)*, <3 x i8> addrspace(1)* addrspace(4)* undef
@@ -4255,6 +4384,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -4286,8 +4416,9 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef
@@ -4325,6 +4456,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -4368,8 +4500,9 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load <8 x i8> addrspace(1)*, <8 x i8> addrspace(1)* addrspace(4)* undef
@@ -4407,6 +4540,7 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -4474,8 +4608,9 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF1]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
@@ -4515,6 +4650,7 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -4576,8 +4712,9 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @stack_passed_f64_arg, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @stack_passed_f64_arg, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
entry:
@@ -4588,16 +4725,17 @@ entry:
define void @stack_12xv3i32() #0 {
; CHECK-LABEL: name: stack_12xv3i32
; CHECK: bb.1.entry:
- ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
@@ -4628,14 +4766,15 @@ define void @stack_12xv3i32() #0 {
; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3i32
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>)
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>)
@@ -4695,17 +4834,18 @@ define void @stack_12xv3i32() #0 {
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
- ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc
; CHECK-NEXT: SI_RETURN
entry:
@@ -4728,16 +4868,17 @@ entry:
define void @stack_12xv3f32() #0 {
; CHECK-LABEL: name: stack_12xv3f32
; CHECK: bb.1.entry:
- ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
@@ -4768,14 +4909,15 @@ define void @stack_12xv3f32() #0 {
; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3f32
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>)
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>)
@@ -4835,17 +4977,18 @@ define void @stack_12xv3f32() #0 {
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
- ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc
; CHECK-NEXT: SI_RETURN
entry:
@@ -4868,16 +5011,17 @@ entry:
define void @stack_8xv5i32() #0 {
; CHECK-LABEL: name: stack_8xv5i32
; CHECK: bb.1.entry:
- ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
@@ -4904,14 +5048,15 @@ define void @stack_8xv5i32() #0 {
; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5i32
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>)
; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>)
@@ -4979,17 +5124,18 @@ define void @stack_8xv5i32() #0 {
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
- ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc
; CHECK-NEXT: SI_RETURN
entry:
@@ -5008,16 +5154,17 @@ entry:
define void @stack_8xv5f32() #0 {
; CHECK-LABEL: name: stack_8xv5f32
; CHECK: bb.1.entry:
- ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
@@ -5044,14 +5191,15 @@ define void @stack_8xv5f32() #0 {
; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5f32
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>)
; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>)
@@ -5119,17 +5267,18 @@ define void @stack_8xv5f32() #0 {
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
- ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5f32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc
; CHECK-NEXT: SI_RETURN
entry:
@@ -5160,8 +5309,9 @@ define amdgpu_ps void @amdgpu_ps_call_default_cc() {
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32)
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[DEF]](p4)
; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY]](p4)
; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY1]](p4)
@@ -5169,8 +5319,9 @@ define amdgpu_ps void @amdgpu_ps_call_default_cc() {
; CHECK-NEXT: $sgpr12 = COPY [[DEF2]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY2]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY3]](s32)
- ; CHECK-NEXT: $vgpr31 = COPY [[COPY4]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[C]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr15 = COPY [[COPY4]](s32)
+ ; CHECK-NEXT: $vgpr31 = COPY [[COPY5]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[C]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
main_body:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
index a957ade9b86f3..3e8197462e313 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
@@ -28,6 +28,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(void()* %fptr) {
; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
@@ -46,8 +47,9 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(void()* %fptr) {
; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[DEF]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void %fptr()
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index 1be7d377b5a55..20208f1336625 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -809,47 +809,48 @@ declare hidden void @void_fastcc_multi_byval(i32 %a, [3 x i32] addrspace(5)* byv
define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
; GCN-LABEL: name: sibling_call_fastcc_multi_byval
; GCN: bb.1.entry:
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18
- ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19
- ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20
- ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21
- ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22
- ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23
- ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24
- ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25
- ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26
- ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27
- ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28
- ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29
- ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr30
; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.35
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.35, align 16, addrspace 5)
; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.34
@@ -933,32 +934,34 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX35]], [[C3]](s32)
; GCN-NEXT: G_STORE [[C1]](s64), [[PTR_ADD2]](p5) :: (store (s64) into %ir.alloca1 + 8, addrspace 5)
; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_multi_byval
- ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; GCN-NEXT: G_MEMCPY [[FRAME_INDEX36]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.1, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca0, align 16, addrspace 5)
; GCN-NEXT: [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GCN-NEXT: G_MEMCPY [[FRAME_INDEX37]](p5), [[FRAME_INDEX35]](p5), [[C5]](s32), 0 :: (dereferenceable store (s128) into %fixed-stack.0, addrspace 5), (dereferenceable load (s128) from %ir.alloca1, align 8, addrspace 5)
- ; GCN-NEXT: $vgpr0 = COPY [[COPY8]](s32)
- ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY47]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY39]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY40]](p4)
- ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY41]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY42]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY43]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY44]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY45]](s32)
- ; GCN-NEXT: $vgpr31 = COPY [[COPY46]](s32)
- ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_multi_byval, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN-NEXT: $vgpr0 = COPY [[COPY9]](s32)
+ ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY49]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY41]](p4)
+ ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY42]](p4)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[COPY47]](s32)
+ ; GCN-NEXT: $vgpr31 = COPY [[COPY48]](s32)
+ ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_multi_byval, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
entry:
%alloca0 = alloca [3 x i32], align 16, addrspace(5)
%alloca1 = alloca [2 x i64], align 8, addrspace(5)
@@ -974,47 +977,48 @@ declare hidden void @void_fastcc_byval_and_stack_passed([3 x i32] addrspace(5)*
define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 {
; GCN-LABEL: name: sibling_call_byval_and_stack_passed
; GCN: bb.1.entry:
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18
- ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19
- ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20
- ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21
- ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22
- ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23
- ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24
- ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25
- ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26
- ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27
- ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28
- ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29
- ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GCN-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GCN-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GCN-NEXT: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GCN-NEXT: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GCN-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; GCN-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; GCN-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; GCN-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; GCN-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; GCN-NEXT: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; GCN-NEXT: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; GCN-NEXT: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; GCN-NEXT: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; GCN-NEXT: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(s32) = COPY $vgpr30
; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.36
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.36, align 16, addrspace 5)
; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.35
@@ -1094,21 +1098,22 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](s32)
; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca + 8, addrspace 5)
; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_byval_and_stack_passed
- ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; GCN-NEXT: G_MEMCPY [[FRAME_INDEX35]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.2, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca, align 16, addrspace 5)
; GCN-NEXT: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX36]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
; GCN-NEXT: [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
- ; GCN-NEXT: G_STORE [[COPY8]](s32), [[FRAME_INDEX37]](p5) :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
+ ; GCN-NEXT: G_STORE [[COPY9]](s32), [[FRAME_INDEX37]](p5) :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
; GCN-NEXT: $vgpr0 = COPY [[C1]](s32)
; GCN-NEXT: $vgpr1 = COPY [[C1]](s32)
; GCN-NEXT: $vgpr2 = COPY [[C1]](s32)
@@ -1140,17 +1145,18 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
; GCN-NEXT: $vgpr28 = COPY [[C1]](s32)
; GCN-NEXT: $vgpr29 = COPY [[C1]](s32)
; GCN-NEXT: $vgpr30 = COPY [[C1]](s32)
- ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY47]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY39]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY40]](p4)
- ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY41]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY42]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY43]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY44]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY45]](s32)
- ; GCN-NEXT: $vgpr31 = COPY [[COPY46]](s32)
- ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_byval_and_stack_passed, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN-NEXT: [[COPY49:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY49]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY41]](p4)
+ ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY42]](p4)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[COPY47]](s32)
+ ; GCN-NEXT: $vgpr31 = COPY [[COPY48]](s32)
+ ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_byval_and_stack_passed, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
entry:
%alloca = alloca [3 x i32], align 16, addrspace(5)
store [3 x i32] [i32 9, i32 9, i32 9], [3 x i32] addrspace(5)* %alloca
@@ -1163,42 +1169,45 @@ declare hidden fastcc i64 @i64_fastcc_i64(i64 %arg0)
define hidden fastcc i64 @sibling_call_i64_fastcc_i64(i64 %a) #1 {
; GCN-LABEL: name: sibling_call_i64_fastcc_i64
; GCN: bb.1.entry:
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32)
; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i64_fastcc_i64
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
- ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32)
- ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i64_fastcc_i64, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4)
+ ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](s32)
+ ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32)
+ ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i64_fastcc_i64, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
entry:
%ret = tail call fastcc i64 @i64_fastcc_i64(i64 %a)
ret i64 %ret
@@ -1209,42 +1218,45 @@ declare hidden fastcc i8 addrspace(1)* @p1i8_fastcc_p1i8(i8 addrspace(1)* %arg0)
define hidden fastcc i8 addrspace(1)* @sibling_call_p1i8_fastcc_p1i8(i8 addrspace(1)* %a) #1 {
; GCN-LABEL: name: sibling_call_p1i8_fastcc_p1i8
; GCN: bb.1.entry:
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32)
; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @p1i8_fastcc_p1i8
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1)
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
- ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32)
- ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @p1i8_fastcc_p1i8, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4)
+ ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](s32)
+ ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32)
+ ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @p1i8_fastcc_p1i8, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
entry:
%ret = tail call fastcc i8 addrspace(1)* @p1i8_fastcc_p1i8(i8 addrspace(1)* %a)
ret i8 addrspace(1)* %ret
@@ -1255,40 +1267,43 @@ declare hidden fastcc i16 @i16_fastcc_i16(i16 %arg0)
define hidden fastcc i16 @sibling_call_i16_fastcc_i16(i16 %a) #1 {
; GCN-LABEL: name: sibling_call_i16_fastcc_i16
; GCN: bb.1.entry:
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32)
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i16_fastcc_i16
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $vgpr31 = COPY [[COPY16]](s32)
- ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i16_fastcc_i16, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[COPY17]](s32)
+ ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32)
+ ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i16_fastcc_i16, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
entry:
%ret = tail call fastcc i16 @i16_fastcc_i16(i16 %a)
ret i16 %ret
@@ -1299,40 +1314,43 @@ declare hidden fastcc half @f16_fastcc_f16(half %arg0)
define hidden fastcc half @sibling_call_f16_fastcc_f16(half %a) #1 {
; GCN-LABEL: name: sibling_call_f16_fastcc_f16
; GCN: bb.1.entry:
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32)
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32)
; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @f16_fastcc_f16
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY13]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $vgpr31 = COPY [[COPY16]](s32)
- ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @f16_fastcc_f16, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[COPY17]](s32)
+ ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32)
+ ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @f16_fastcc_f16, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
entry:
%ret = tail call fastcc half @f16_fastcc_f16(half %a)
ret half %ret
@@ -1343,47 +1361,50 @@ declare hidden fastcc <3 x i16> @v3i16_fastcc_v3i16(<3 x i16> %arg0)
define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1 {
; GCN-LABEL: name: sibling_call_v3i16_fastcc_v3i16
; GCN: bb.1.entry:
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>)
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>)
; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s16>)
; GCN-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF]](s16)
; GCN-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s16>)
; GCN-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>)
; GCN-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
- ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32)
- ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v3i16_fastcc_v3i16, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4)
+ ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](s32)
+ ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32)
+ ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v3i16_fastcc_v3i16, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
entry:
%ret = tail call fastcc <3 x i16> @v3i16_fastcc_v3i16(<3 x i16> %a)
ret <3 x i16> %ret
@@ -1394,42 +1415,45 @@ declare hidden fastcc <4 x i16> @v4i16_fastcc_v4i16(<4 x i16> %arg0)
define hidden fastcc <4 x i16> @sibling_call_v4i16_fastcc_v4i16(<4 x i16> %a) #1 {
; GCN-LABEL: name: sibling_call_v4i16_fastcc_v4i16
; GCN: bb.1.entry:
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>)
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>)
; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v4i16_fastcc_v4i16
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
; GCN-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; GCN-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
- ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32)
- ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v4i16_fastcc_v4i16, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4)
+ ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[COPY18]](s32)
+ ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32)
+ ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v4i16_fastcc_v4i16, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
entry:
%ret = tail call fastcc <4 x i16> @v4i16_fastcc_v4i16(<4 x i16> %a)
ret <4 x i16> %ret
@@ -1440,48 +1464,51 @@ declare hidden fastcc <2 x i64> @v2i64_fastcc_v2i64(<2 x i64> %arg0)
define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1 {
; GCN-LABEL: name: sibling_call_v2i64_fastcc_v2i64
; GCN: bb.1.entry:
- ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
- ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; GCN-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; GCN-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; GCN-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY9]](s32), [[COPY10]](s32)
+ ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY11]](s32), [[COPY12]](s32)
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v2i64_fastcc_v2i64
- ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>)
; GCN-NEXT: $vgpr0 = COPY [[UV]](s32)
; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32)
; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY12]](p4)
- ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY13]](p4)
- ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY14]](p4)
- ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY15]](s64)
- ; GCN-NEXT: $sgpr12 = COPY [[COPY16]](s32)
- ; GCN-NEXT: $sgpr13 = COPY [[COPY17]](s32)
- ; GCN-NEXT: $sgpr14 = COPY [[COPY18]](s32)
- ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32)
- ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v2i64_fastcc_v2i64, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>)
+ ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY13]](p4)
+ ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY14]](p4)
+ ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY15]](p4)
+ ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY16]](s64)
+ ; GCN-NEXT: $sgpr12 = COPY [[COPY17]](s32)
+ ; GCN-NEXT: $sgpr13 = COPY [[COPY18]](s32)
+ ; GCN-NEXT: $sgpr14 = COPY [[COPY19]](s32)
+ ; GCN-NEXT: $sgpr15 = COPY [[COPY20]](s32)
+ ; GCN-NEXT: $vgpr31 = COPY [[COPY21]](s32)
+ ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v2i64_fastcc_v2i64, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
entry:
%ret = tail call fastcc <2 x i64> @v2i64_fastcc_v2i64(<2 x i64> %a)
ret <2 x i64> %ret
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
index 123845420c06a..b9cdcff95ed1e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll
@@ -6,36 +6,39 @@ declare hidden void @external_void_func_void()
define void @tail_call_void_func_void() {
; CHECK-LABEL: name: tail_call_void_func_void
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr15
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>)
- ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
- ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
- ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4)
- ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
- ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
- ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
- ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32)
- ; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @external_void_func_void, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
+ ; CHECK-NEXT: $sgpr15 = COPY [[COPY16]](s32)
+ ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
+ ; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @external_void_func_void, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
tail call void @external_void_func_void()
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
index dcde1b181b0cf..a0d6d75f0e567 100644
--- a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
+++ b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
@@ -389,4 +389,4 @@ declare i1 @llvm.amdgcn.is.private(i8*)
declare void @llvm.trap()
declare void @llvm.debugtrap()
-attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" }
+attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" }
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
index c66770059a252..97b47c9791c2a 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -230,6 +230,6 @@ attributes #1 = { nounwind }
; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
;.
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index 43d46ba1dfe3d..b05054d8a03d5 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -937,24 +937,24 @@ attributes #5 = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" }
; AKF_HSA: attributes #[[ATTR6:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" }
;.
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR19:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
index 1d975c0b16c59..61ba99bc16f7d 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
@@ -647,15 +647,15 @@ attributes #1 = { nounwind }
; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-stack-objects" }
;.
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
index 227a8ffc74af9..33ad439b0d977 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
@@ -418,13 +418,13 @@ attributes #1 = { nounwind }
; AKF_CHECK: attributes #[[ATTR1]] = { nounwind }
;.
; ATTRIBUTOR_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
index 595fe7bca1064..c32354dd7f125 100644
--- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
@@ -344,4 +344,4 @@ define amdgpu_kernel void @callee_saved_sgpr_vgpr_kernel() #2 {
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind noinline }
-attributes #3 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+attributes #3 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
diff --git a/llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll b/llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll
index ad0e5aaf54d6d..d198c0a6fa7c2 100644
--- a/llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll
@@ -123,7 +123,7 @@ define amdgpu_kernel void @known_xyz_0(i32 addrspace(1)* %out) !reqd_work_group_
}
; CHECK: .amdhsa_system_vgpr_workitem_id 0
-attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" }
+attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" }
!0 = !{i32 1, i32 64, i32 64}
!1 = !{i32 64, i32 1, i32 64}
diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
index 20f550cb632fb..253e183645aaf 100644
--- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
@@ -35,6 +35,6 @@ define amdgpu_kernel void @test_direct_indirect_call() {
ret void
}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
index a25261604a259..414d0652d52a6 100644
--- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
+++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
@@ -42,6 +42,6 @@ attributes #0 = { "amdgpu-no-dispatch-id" }
;.
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" }
;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
index ffc965d69835c..94fe3f0e9fc11 100644
--- a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
@@ -19,7 +19,7 @@ define weak_odr void @test(i32 %0) !dbg !34 {
; CHECK-NEXT: s_or_saveexec_b64 s[16:17], -1
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[16:17]
-; CHECK-NEXT: v_writelane_b32 v40, s33, 15
+; CHECK-NEXT: v_writelane_b32 v40, s33, 16
; CHECK-NEXT: v_writelane_b32 v40, s30, 0
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
; CHECK-NEXT: v_writelane_b32 v40, s34, 2
@@ -32,10 +32,11 @@ define weak_odr void @test(i32 %0) !dbg !34 {
; CHECK-NEXT: v_writelane_b32 v40, s41, 9
; CHECK-NEXT: v_writelane_b32 v40, s42, 10
; CHECK-NEXT: v_writelane_b32 v40, s43, 11
+; CHECK-NEXT: v_writelane_b32 v40, s44, 12
; CHECK-NEXT: s_mov_b32 s33, s32
; CHECK-NEXT: s_addk_i32 s32, 0x400
-; CHECK-NEXT: v_writelane_b32 v40, s44, 12
-; CHECK-NEXT: v_writelane_b32 v40, s46, 13
+; CHECK-NEXT: v_writelane_b32 v40, s45, 13
+; CHECK-NEXT: v_writelane_b32 v40, s46, 14
; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
; CHECK-NEXT: ;DEBUG_VALUE: dummy:dummy <- undef
; CHECK-NEXT: .Ltmp0:
@@ -43,14 +44,15 @@ define weak_odr void @test(i32 %0) !dbg !34 {
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, __kmpc_alloc_shared at gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, __kmpc_alloc_shared at gotpcrel32@hi+12
-; CHECK-NEXT: v_writelane_b32 v40, s47, 14
+; CHECK-NEXT: v_writelane_b32 v40, s47, 15
; CHECK-NEXT: s_load_dwordx2 s[46:47], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_mov_b32_e32 v41, v31
-; CHECK-NEXT: s_mov_b32 s42, s14
-; CHECK-NEXT: s_mov_b32 s43, s13
-; CHECK-NEXT: s_mov_b32 s44, s12
+; CHECK-NEXT: s_mov_b32 s42, s15
+; CHECK-NEXT: s_mov_b32 s43, s14
+; CHECK-NEXT: s_mov_b32 s44, s13
+; CHECK-NEXT: s_mov_b32 s45, s12
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
@@ -60,9 +62,10 @@ define weak_odr void @test(i32 %0) !dbg !34 {
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
-; CHECK-NEXT: s_mov_b32 s12, s44
-; CHECK-NEXT: s_mov_b32 s13, s43
-; CHECK-NEXT: s_mov_b32 s14, s42
+; CHECK-NEXT: s_mov_b32 s12, s45
+; CHECK-NEXT: s_mov_b32 s13, s44
+; CHECK-NEXT: s_mov_b32 s14, s43
+; CHECK-NEXT: s_mov_b32 s15, s42
; CHECK-NEXT: v_mov_b32_e32 v31, v41
; CHECK-NEXT: s_swappc_b64 s[30:31], s[46:47]
; CHECK-NEXT: .Ltmp1:
@@ -71,8 +74,9 @@ define weak_odr void @test(i32 %0) !dbg !34 {
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: v_mov_b32_e32 v2, 0
; CHECK-NEXT: flat_store_dword v[0:1], v2
-; CHECK-NEXT: v_readlane_b32 s47, v40, 14
-; CHECK-NEXT: v_readlane_b32 s46, v40, 13
+; CHECK-NEXT: v_readlane_b32 s47, v40, 15
+; CHECK-NEXT: v_readlane_b32 s46, v40, 14
+; CHECK-NEXT: v_readlane_b32 s45, v40, 13
; CHECK-NEXT: v_readlane_b32 s44, v40, 12
; CHECK-NEXT: v_readlane_b32 s43, v40, 11
; CHECK-NEXT: v_readlane_b32 s42, v40, 10
@@ -87,7 +91,7 @@ define weak_odr void @test(i32 %0) !dbg !34 {
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
; CHECK-NEXT: s_addk_i32 s32, 0xfc00
-; CHECK-NEXT: v_readlane_b32 s33, v40, 15
+; CHECK-NEXT: v_readlane_b32 s33, v40, 16
; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
index f343fab1d6371..7f9f2c002f206 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
@@ -396,7 +396,7 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[16:17]
-; GCN-NEXT: v_writelane_b32 v40, s33, 17
+; GCN-NEXT: v_writelane_b32 v40, s33, 18
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: v_writelane_b32 v40, s30, 0
@@ -412,13 +412,15 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
; GCN-NEXT: v_writelane_b32 v40, s42, 10
; GCN-NEXT: v_writelane_b32 v40, s43, 11
; GCN-NEXT: v_writelane_b32 v40, s44, 12
-; GCN-NEXT: v_writelane_b32 v40, s46, 13
-; GCN-NEXT: v_writelane_b32 v40, s47, 14
-; GCN-NEXT: v_writelane_b32 v40, s48, 15
-; GCN-NEXT: v_writelane_b32 v40, s49, 16
-; GCN-NEXT: s_mov_b32 s42, s14
-; GCN-NEXT: s_mov_b32 s43, s13
-; GCN-NEXT: s_mov_b32 s44, s12
+; GCN-NEXT: v_writelane_b32 v40, s45, 13
+; GCN-NEXT: v_writelane_b32 v40, s46, 14
+; GCN-NEXT: v_writelane_b32 v40, s47, 15
+; GCN-NEXT: v_writelane_b32 v40, s48, 16
+; GCN-NEXT: v_writelane_b32 v40, s49, 17
+; GCN-NEXT: s_mov_b32 s42, s15
+; GCN-NEXT: s_mov_b32 s43, s14
+; GCN-NEXT: s_mov_b32 s44, s13
+; GCN-NEXT: s_mov_b32 s45, s12
; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
@@ -433,9 +435,10 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT: s_mov_b32 s12, s44
-; GCN-NEXT: s_mov_b32 s13, s43
-; GCN-NEXT: s_mov_b32 s14, s42
+; GCN-NEXT: s_mov_b32 s12, s45
+; GCN-NEXT: s_mov_b32 s13, s44
+; GCN-NEXT: s_mov_b32 s14, s43
+; GCN-NEXT: s_mov_b32 s15, s42
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN-NEXT: ; implicit-def: $vgpr31
@@ -443,10 +446,11 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
; GCN-NEXT: s_cbranch_execnz .LBB2_1
; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_mov_b64 exec, s[46:47]
-; GCN-NEXT: v_readlane_b32 s49, v40, 16
-; GCN-NEXT: v_readlane_b32 s48, v40, 15
-; GCN-NEXT: v_readlane_b32 s47, v40, 14
-; GCN-NEXT: v_readlane_b32 s46, v40, 13
+; GCN-NEXT: v_readlane_b32 s49, v40, 17
+; GCN-NEXT: v_readlane_b32 s48, v40, 16
+; GCN-NEXT: v_readlane_b32 s47, v40, 15
+; GCN-NEXT: v_readlane_b32 s46, v40, 14
+; GCN-NEXT: v_readlane_b32 s45, v40, 13
; GCN-NEXT: v_readlane_b32 s44, v40, 12
; GCN-NEXT: v_readlane_b32 s43, v40, 11
; GCN-NEXT: v_readlane_b32 s42, v40, 10
@@ -461,7 +465,7 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
; GCN-NEXT: v_readlane_b32 s31, v40, 1
; GCN-NEXT: v_readlane_b32 s30, v40, 0
; GCN-NEXT: s_addk_i32 s32, 0xfc00
-; GCN-NEXT: v_readlane_b32 s33, v40, 17
+; GCN-NEXT: v_readlane_b32 s33, v40, 18
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[4:5]
@@ -474,7 +478,7 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1
; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GISEL-NEXT: s_mov_b64 exec, s[16:17]
-; GISEL-NEXT: v_writelane_b32 v40, s33, 17
+; GISEL-NEXT: v_writelane_b32 v40, s33, 18
; GISEL-NEXT: s_mov_b32 s33, s32
; GISEL-NEXT: s_addk_i32 s32, 0x400
; GISEL-NEXT: v_writelane_b32 v40, s30, 0
@@ -490,13 +494,15 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
; GISEL-NEXT: v_writelane_b32 v40, s42, 10
; GISEL-NEXT: v_writelane_b32 v40, s43, 11
; GISEL-NEXT: v_writelane_b32 v40, s44, 12
-; GISEL-NEXT: v_writelane_b32 v40, s46, 13
-; GISEL-NEXT: v_writelane_b32 v40, s47, 14
-; GISEL-NEXT: v_writelane_b32 v40, s48, 15
-; GISEL-NEXT: v_writelane_b32 v40, s49, 16
-; GISEL-NEXT: s_mov_b32 s42, s14
-; GISEL-NEXT: s_mov_b32 s43, s13
-; GISEL-NEXT: s_mov_b32 s44, s12
+; GISEL-NEXT: v_writelane_b32 v40, s45, 13
+; GISEL-NEXT: v_writelane_b32 v40, s46, 14
+; GISEL-NEXT: v_writelane_b32 v40, s47, 15
+; GISEL-NEXT: v_writelane_b32 v40, s48, 16
+; GISEL-NEXT: v_writelane_b32 v40, s49, 17
+; GISEL-NEXT: s_mov_b32 s42, s15
+; GISEL-NEXT: s_mov_b32 s43, s14
+; GISEL-NEXT: s_mov_b32 s44, s13
+; GISEL-NEXT: s_mov_b32 s45, s12
; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
@@ -511,9 +517,10 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT: s_mov_b32 s12, s44
-; GISEL-NEXT: s_mov_b32 s13, s43
-; GISEL-NEXT: s_mov_b32 s14, s42
+; GISEL-NEXT: s_mov_b32 s12, s45
+; GISEL-NEXT: s_mov_b32 s13, s44
+; GISEL-NEXT: s_mov_b32 s14, s43
+; GISEL-NEXT: s_mov_b32 s15, s42
; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GISEL-NEXT: ; implicit-def: $vgpr0
; GISEL-NEXT: ; implicit-def: $vgpr31
@@ -521,10 +528,11 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
; GISEL-NEXT: s_cbranch_execnz .LBB2_1
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b64 exec, s[46:47]
-; GISEL-NEXT: v_readlane_b32 s49, v40, 16
-; GISEL-NEXT: v_readlane_b32 s48, v40, 15
-; GISEL-NEXT: v_readlane_b32 s47, v40, 14
-; GISEL-NEXT: v_readlane_b32 s46, v40, 13
+; GISEL-NEXT: v_readlane_b32 s49, v40, 17
+; GISEL-NEXT: v_readlane_b32 s48, v40, 16
+; GISEL-NEXT: v_readlane_b32 s47, v40, 15
+; GISEL-NEXT: v_readlane_b32 s46, v40, 14
+; GISEL-NEXT: v_readlane_b32 s45, v40, 13
; GISEL-NEXT: v_readlane_b32 s44, v40, 12
; GISEL-NEXT: v_readlane_b32 s43, v40, 11
; GISEL-NEXT: v_readlane_b32 s42, v40, 10
@@ -539,7 +547,7 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
; GISEL-NEXT: v_readlane_b32 s31, v40, 1
; GISEL-NEXT: v_readlane_b32 s30, v40, 0
; GISEL-NEXT: s_addk_i32 s32, 0xfc00
-; GISEL-NEXT: v_readlane_b32 s33, v40, 17
+; GISEL-NEXT: v_readlane_b32 s33, v40, 18
; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GISEL-NEXT: s_mov_b64 exec, s[4:5]
@@ -556,7 +564,7 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[16:17]
-; GCN-NEXT: v_writelane_b32 v40, s33, 17
+; GCN-NEXT: v_writelane_b32 v40, s33, 18
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: v_writelane_b32 v40, s30, 0
@@ -572,13 +580,15 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
; GCN-NEXT: v_writelane_b32 v40, s42, 10
; GCN-NEXT: v_writelane_b32 v40, s43, 11
; GCN-NEXT: v_writelane_b32 v40, s44, 12
-; GCN-NEXT: v_writelane_b32 v40, s46, 13
-; GCN-NEXT: v_writelane_b32 v40, s47, 14
-; GCN-NEXT: v_writelane_b32 v40, s48, 15
-; GCN-NEXT: v_writelane_b32 v40, s49, 16
-; GCN-NEXT: s_mov_b32 s42, s14
-; GCN-NEXT: s_mov_b32 s43, s13
-; GCN-NEXT: s_mov_b32 s44, s12
+; GCN-NEXT: v_writelane_b32 v40, s45, 13
+; GCN-NEXT: v_writelane_b32 v40, s46, 14
+; GCN-NEXT: v_writelane_b32 v40, s47, 15
+; GCN-NEXT: v_writelane_b32 v40, s48, 16
+; GCN-NEXT: v_writelane_b32 v40, s49, 17
+; GCN-NEXT: s_mov_b32 s42, s15
+; GCN-NEXT: s_mov_b32 s43, s14
+; GCN-NEXT: s_mov_b32 s44, s13
+; GCN-NEXT: s_mov_b32 s45, s12
; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
@@ -594,9 +604,10 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT: s_mov_b32 s12, s44
-; GCN-NEXT: s_mov_b32 s13, s43
-; GCN-NEXT: s_mov_b32 s14, s42
+; GCN-NEXT: s_mov_b32 s12, s45
+; GCN-NEXT: s_mov_b32 s13, s44
+; GCN-NEXT: s_mov_b32 s14, s43
+; GCN-NEXT: s_mov_b32 s15, s42
; GCN-NEXT: v_mov_b32_e32 v0, v2
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
@@ -606,10 +617,11 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
; GCN-NEXT: s_cbranch_execnz .LBB3_1
; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_mov_b64 exec, s[46:47]
-; GCN-NEXT: v_readlane_b32 s49, v40, 16
-; GCN-NEXT: v_readlane_b32 s48, v40, 15
-; GCN-NEXT: v_readlane_b32 s47, v40, 14
-; GCN-NEXT: v_readlane_b32 s46, v40, 13
+; GCN-NEXT: v_readlane_b32 s49, v40, 17
+; GCN-NEXT: v_readlane_b32 s48, v40, 16
+; GCN-NEXT: v_readlane_b32 s47, v40, 15
+; GCN-NEXT: v_readlane_b32 s46, v40, 14
+; GCN-NEXT: v_readlane_b32 s45, v40, 13
; GCN-NEXT: v_readlane_b32 s44, v40, 12
; GCN-NEXT: v_readlane_b32 s43, v40, 11
; GCN-NEXT: v_readlane_b32 s42, v40, 10
@@ -624,7 +636,7 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
; GCN-NEXT: v_readlane_b32 s31, v40, 1
; GCN-NEXT: v_readlane_b32 s30, v40, 0
; GCN-NEXT: s_addk_i32 s32, 0xfc00
-; GCN-NEXT: v_readlane_b32 s33, v40, 17
+; GCN-NEXT: v_readlane_b32 s33, v40, 18
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[4:5]
@@ -637,7 +649,7 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1
; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GISEL-NEXT: s_mov_b64 exec, s[16:17]
-; GISEL-NEXT: v_writelane_b32 v40, s33, 17
+; GISEL-NEXT: v_writelane_b32 v40, s33, 18
; GISEL-NEXT: s_mov_b32 s33, s32
; GISEL-NEXT: s_addk_i32 s32, 0x400
; GISEL-NEXT: v_writelane_b32 v40, s30, 0
@@ -653,13 +665,15 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
; GISEL-NEXT: v_writelane_b32 v40, s42, 10
; GISEL-NEXT: v_writelane_b32 v40, s43, 11
; GISEL-NEXT: v_writelane_b32 v40, s44, 12
-; GISEL-NEXT: v_writelane_b32 v40, s46, 13
-; GISEL-NEXT: v_writelane_b32 v40, s47, 14
-; GISEL-NEXT: v_writelane_b32 v40, s48, 15
-; GISEL-NEXT: v_writelane_b32 v40, s49, 16
-; GISEL-NEXT: s_mov_b32 s42, s14
-; GISEL-NEXT: s_mov_b32 s43, s13
-; GISEL-NEXT: s_mov_b32 s44, s12
+; GISEL-NEXT: v_writelane_b32 v40, s45, 13
+; GISEL-NEXT: v_writelane_b32 v40, s46, 14
+; GISEL-NEXT: v_writelane_b32 v40, s47, 15
+; GISEL-NEXT: v_writelane_b32 v40, s48, 16
+; GISEL-NEXT: v_writelane_b32 v40, s49, 17
+; GISEL-NEXT: s_mov_b32 s42, s15
+; GISEL-NEXT: s_mov_b32 s43, s14
+; GISEL-NEXT: s_mov_b32 s44, s13
+; GISEL-NEXT: s_mov_b32 s45, s12
; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
@@ -675,9 +689,10 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT: s_mov_b32 s12, s44
-; GISEL-NEXT: s_mov_b32 s13, s43
-; GISEL-NEXT: s_mov_b32 s14, s42
+; GISEL-NEXT: s_mov_b32 s12, s45
+; GISEL-NEXT: s_mov_b32 s13, s44
+; GISEL-NEXT: s_mov_b32 s14, s43
+; GISEL-NEXT: s_mov_b32 s15, s42
; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GISEL-NEXT: ; implicit-def: $vgpr0
; GISEL-NEXT: ; implicit-def: $vgpr31
@@ -685,10 +700,11 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
; GISEL-NEXT: s_cbranch_execnz .LBB3_1
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b64 exec, s[46:47]
-; GISEL-NEXT: v_readlane_b32 s49, v40, 16
-; GISEL-NEXT: v_readlane_b32 s48, v40, 15
-; GISEL-NEXT: v_readlane_b32 s47, v40, 14
-; GISEL-NEXT: v_readlane_b32 s46, v40, 13
+; GISEL-NEXT: v_readlane_b32 s49, v40, 17
+; GISEL-NEXT: v_readlane_b32 s48, v40, 16
+; GISEL-NEXT: v_readlane_b32 s47, v40, 15
+; GISEL-NEXT: v_readlane_b32 s46, v40, 14
+; GISEL-NEXT: v_readlane_b32 s45, v40, 13
; GISEL-NEXT: v_readlane_b32 s44, v40, 12
; GISEL-NEXT: v_readlane_b32 s43, v40, 11
; GISEL-NEXT: v_readlane_b32 s42, v40, 10
@@ -703,7 +719,7 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
; GISEL-NEXT: v_readlane_b32 s31, v40, 1
; GISEL-NEXT: v_readlane_b32 s30, v40, 0
; GISEL-NEXT: s_addk_i32 s32, 0xfc00
-; GISEL-NEXT: v_readlane_b32 s33, v40, 17
+; GISEL-NEXT: v_readlane_b32 s33, v40, 18
; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GISEL-NEXT: s_mov_b64 exec, s[4:5]
@@ -720,7 +736,7 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[16:17]
-; GCN-NEXT: v_writelane_b32 v40, s33, 17
+; GCN-NEXT: v_writelane_b32 v40, s33, 18
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: v_writelane_b32 v40, s30, 0
@@ -736,13 +752,15 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
; GCN-NEXT: v_writelane_b32 v40, s42, 10
; GCN-NEXT: v_writelane_b32 v40, s43, 11
; GCN-NEXT: v_writelane_b32 v40, s44, 12
-; GCN-NEXT: v_writelane_b32 v40, s46, 13
-; GCN-NEXT: v_writelane_b32 v40, s47, 14
-; GCN-NEXT: v_writelane_b32 v40, s48, 15
-; GCN-NEXT: v_writelane_b32 v40, s49, 16
-; GCN-NEXT: s_mov_b32 s42, s14
-; GCN-NEXT: s_mov_b32 s43, s13
-; GCN-NEXT: s_mov_b32 s44, s12
+; GCN-NEXT: v_writelane_b32 v40, s45, 13
+; GCN-NEXT: v_writelane_b32 v40, s46, 14
+; GCN-NEXT: v_writelane_b32 v40, s47, 15
+; GCN-NEXT: v_writelane_b32 v40, s48, 16
+; GCN-NEXT: v_writelane_b32 v40, s49, 17
+; GCN-NEXT: s_mov_b32 s42, s15
+; GCN-NEXT: s_mov_b32 s43, s14
+; GCN-NEXT: s_mov_b32 s44, s13
+; GCN-NEXT: s_mov_b32 s45, s12
; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
@@ -757,9 +775,10 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT: s_mov_b32 s12, s44
-; GCN-NEXT: s_mov_b32 s13, s43
-; GCN-NEXT: s_mov_b32 s14, s42
+; GCN-NEXT: s_mov_b32 s12, s45
+; GCN-NEXT: s_mov_b32 s13, s44
+; GCN-NEXT: s_mov_b32 s14, s43
+; GCN-NEXT: s_mov_b32 s15, s42
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: v_mov_b32_e32 v2, v0
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
@@ -769,10 +788,11 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_mov_b64 exec, s[46:47]
; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2
-; GCN-NEXT: v_readlane_b32 s49, v40, 16
-; GCN-NEXT: v_readlane_b32 s48, v40, 15
-; GCN-NEXT: v_readlane_b32 s47, v40, 14
-; GCN-NEXT: v_readlane_b32 s46, v40, 13
+; GCN-NEXT: v_readlane_b32 s49, v40, 17
+; GCN-NEXT: v_readlane_b32 s48, v40, 16
+; GCN-NEXT: v_readlane_b32 s47, v40, 15
+; GCN-NEXT: v_readlane_b32 s46, v40, 14
+; GCN-NEXT: v_readlane_b32 s45, v40, 13
; GCN-NEXT: v_readlane_b32 s44, v40, 12
; GCN-NEXT: v_readlane_b32 s43, v40, 11
; GCN-NEXT: v_readlane_b32 s42, v40, 10
@@ -787,7 +807,7 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
; GCN-NEXT: v_readlane_b32 s31, v40, 1
; GCN-NEXT: v_readlane_b32 s30, v40, 0
; GCN-NEXT: s_addk_i32 s32, 0xfc00
-; GCN-NEXT: v_readlane_b32 s33, v40, 17
+; GCN-NEXT: v_readlane_b32 s33, v40, 18
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[4:5]
@@ -800,7 +820,7 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1
; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GISEL-NEXT: s_mov_b64 exec, s[16:17]
-; GISEL-NEXT: v_writelane_b32 v40, s33, 17
+; GISEL-NEXT: v_writelane_b32 v40, s33, 18
; GISEL-NEXT: s_mov_b32 s33, s32
; GISEL-NEXT: s_addk_i32 s32, 0x400
; GISEL-NEXT: v_writelane_b32 v40, s30, 0
@@ -816,13 +836,15 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
; GISEL-NEXT: v_writelane_b32 v40, s42, 10
; GISEL-NEXT: v_writelane_b32 v40, s43, 11
; GISEL-NEXT: v_writelane_b32 v40, s44, 12
-; GISEL-NEXT: v_writelane_b32 v40, s46, 13
-; GISEL-NEXT: v_writelane_b32 v40, s47, 14
-; GISEL-NEXT: v_writelane_b32 v40, s48, 15
-; GISEL-NEXT: v_writelane_b32 v40, s49, 16
-; GISEL-NEXT: s_mov_b32 s42, s14
-; GISEL-NEXT: s_mov_b32 s43, s13
-; GISEL-NEXT: s_mov_b32 s44, s12
+; GISEL-NEXT: v_writelane_b32 v40, s45, 13
+; GISEL-NEXT: v_writelane_b32 v40, s46, 14
+; GISEL-NEXT: v_writelane_b32 v40, s47, 15
+; GISEL-NEXT: v_writelane_b32 v40, s48, 16
+; GISEL-NEXT: v_writelane_b32 v40, s49, 17
+; GISEL-NEXT: s_mov_b32 s42, s15
+; GISEL-NEXT: s_mov_b32 s43, s14
+; GISEL-NEXT: s_mov_b32 s44, s13
+; GISEL-NEXT: s_mov_b32 s45, s12
; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
@@ -837,9 +859,10 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT: s_mov_b32 s12, s44
-; GISEL-NEXT: s_mov_b32 s13, s43
-; GISEL-NEXT: s_mov_b32 s14, s42
+; GISEL-NEXT: s_mov_b32 s12, s45
+; GISEL-NEXT: s_mov_b32 s13, s44
+; GISEL-NEXT: s_mov_b32 s14, s43
+; GISEL-NEXT: s_mov_b32 s15, s42
; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GISEL-NEXT: v_mov_b32_e32 v1, v0
; GISEL-NEXT: ; implicit-def: $vgpr0
@@ -849,10 +872,11 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b64 exec, s[46:47]
; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v1
-; GISEL-NEXT: v_readlane_b32 s49, v40, 16
-; GISEL-NEXT: v_readlane_b32 s48, v40, 15
-; GISEL-NEXT: v_readlane_b32 s47, v40, 14
-; GISEL-NEXT: v_readlane_b32 s46, v40, 13
+; GISEL-NEXT: v_readlane_b32 s49, v40, 17
+; GISEL-NEXT: v_readlane_b32 s48, v40, 16
+; GISEL-NEXT: v_readlane_b32 s47, v40, 15
+; GISEL-NEXT: v_readlane_b32 s46, v40, 14
+; GISEL-NEXT: v_readlane_b32 s45, v40, 13
; GISEL-NEXT: v_readlane_b32 s44, v40, 12
; GISEL-NEXT: v_readlane_b32 s43, v40, 11
; GISEL-NEXT: v_readlane_b32 s42, v40, 10
@@ -867,7 +891,7 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
; GISEL-NEXT: v_readlane_b32 s31, v40, 1
; GISEL-NEXT: v_readlane_b32 s30, v40, 0
; GISEL-NEXT: s_addk_i32 s32, 0xfc00
-; GISEL-NEXT: v_readlane_b32 s33, v40, 17
+; GISEL-NEXT: v_readlane_b32 s33, v40, 18
; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GISEL-NEXT: s_mov_b64 exec, s[4:5]
@@ -885,7 +909,7 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[16:17]
-; GCN-NEXT: v_writelane_b32 v40, s33, 19
+; GCN-NEXT: v_writelane_b32 v40, s33, 20
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: v_writelane_b32 v40, s30, 0
@@ -901,15 +925,17 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
; GCN-NEXT: v_writelane_b32 v40, s42, 10
; GCN-NEXT: v_writelane_b32 v40, s43, 11
; GCN-NEXT: v_writelane_b32 v40, s44, 12
-; GCN-NEXT: v_writelane_b32 v40, s46, 13
-; GCN-NEXT: v_writelane_b32 v40, s47, 14
-; GCN-NEXT: v_writelane_b32 v40, s48, 15
-; GCN-NEXT: v_writelane_b32 v40, s49, 16
-; GCN-NEXT: v_writelane_b32 v40, s50, 17
-; GCN-NEXT: v_writelane_b32 v40, s51, 18
-; GCN-NEXT: s_mov_b32 s42, s14
-; GCN-NEXT: s_mov_b32 s43, s13
-; GCN-NEXT: s_mov_b32 s44, s12
+; GCN-NEXT: v_writelane_b32 v40, s45, 13
+; GCN-NEXT: v_writelane_b32 v40, s46, 14
+; GCN-NEXT: v_writelane_b32 v40, s47, 15
+; GCN-NEXT: v_writelane_b32 v40, s48, 16
+; GCN-NEXT: v_writelane_b32 v40, s49, 17
+; GCN-NEXT: v_writelane_b32 v40, s50, 18
+; GCN-NEXT: v_writelane_b32 v40, s51, 19
+; GCN-NEXT: s_mov_b32 s42, s15
+; GCN-NEXT: s_mov_b32 s43, s14
+; GCN-NEXT: s_mov_b32 s44, s13
+; GCN-NEXT: s_mov_b32 s45, s12
; GCN-NEXT: s_mov_b64 s[34:35], s[10:11]
; GCN-NEXT: s_mov_b64 s[36:37], s[8:9]
; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
@@ -929,9 +955,10 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
; GCN-NEXT: s_mov_b64 s[6:7], s[38:39]
; GCN-NEXT: s_mov_b64 s[8:9], s[36:37]
; GCN-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT: s_mov_b32 s12, s44
-; GCN-NEXT: s_mov_b32 s13, s43
-; GCN-NEXT: s_mov_b32 s14, s42
+; GCN-NEXT: s_mov_b32 s12, s45
+; GCN-NEXT: s_mov_b32 s13, s44
+; GCN-NEXT: s_mov_b32 s14, s43
+; GCN-NEXT: s_mov_b32 s15, s42
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN-NEXT: ; implicit-def: $vgpr31
@@ -941,12 +968,13 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
; GCN-NEXT: s_mov_b64 exec, s[48:49]
; GCN-NEXT: .LBB5_4: ; %bb2
; GCN-NEXT: s_or_b64 exec, exec, s[46:47]
-; GCN-NEXT: v_readlane_b32 s51, v40, 18
-; GCN-NEXT: v_readlane_b32 s50, v40, 17
-; GCN-NEXT: v_readlane_b32 s49, v40, 16
-; GCN-NEXT: v_readlane_b32 s48, v40, 15
-; GCN-NEXT: v_readlane_b32 s47, v40, 14
-; GCN-NEXT: v_readlane_b32 s46, v40, 13
+; GCN-NEXT: v_readlane_b32 s51, v40, 19
+; GCN-NEXT: v_readlane_b32 s50, v40, 18
+; GCN-NEXT: v_readlane_b32 s49, v40, 17
+; GCN-NEXT: v_readlane_b32 s48, v40, 16
+; GCN-NEXT: v_readlane_b32 s47, v40, 15
+; GCN-NEXT: v_readlane_b32 s46, v40, 14
+; GCN-NEXT: v_readlane_b32 s45, v40, 13
; GCN-NEXT: v_readlane_b32 s44, v40, 12
; GCN-NEXT: v_readlane_b32 s43, v40, 11
; GCN-NEXT: v_readlane_b32 s42, v40, 10
@@ -961,7 +989,7 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
; GCN-NEXT: v_readlane_b32 s31, v40, 1
; GCN-NEXT: v_readlane_b32 s30, v40, 0
; GCN-NEXT: s_addk_i32 s32, 0xfc00
-; GCN-NEXT: v_readlane_b32 s33, v40, 19
+; GCN-NEXT: v_readlane_b32 s33, v40, 20
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[4:5]
@@ -974,7 +1002,7 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1
; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GISEL-NEXT: s_mov_b64 exec, s[16:17]
-; GISEL-NEXT: v_writelane_b32 v40, s33, 19
+; GISEL-NEXT: v_writelane_b32 v40, s33, 20
; GISEL-NEXT: s_mov_b32 s33, s32
; GISEL-NEXT: s_addk_i32 s32, 0x400
; GISEL-NEXT: v_writelane_b32 v40, s30, 0
@@ -990,15 +1018,17 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
; GISEL-NEXT: v_writelane_b32 v40, s42, 10
; GISEL-NEXT: v_writelane_b32 v40, s43, 11
; GISEL-NEXT: v_writelane_b32 v40, s44, 12
-; GISEL-NEXT: v_writelane_b32 v40, s46, 13
-; GISEL-NEXT: v_writelane_b32 v40, s47, 14
-; GISEL-NEXT: v_writelane_b32 v40, s48, 15
-; GISEL-NEXT: v_writelane_b32 v40, s49, 16
-; GISEL-NEXT: v_writelane_b32 v40, s50, 17
-; GISEL-NEXT: v_writelane_b32 v40, s51, 18
-; GISEL-NEXT: s_mov_b32 s42, s14
-; GISEL-NEXT: s_mov_b32 s43, s13
-; GISEL-NEXT: s_mov_b32 s44, s12
+; GISEL-NEXT: v_writelane_b32 v40, s45, 13
+; GISEL-NEXT: v_writelane_b32 v40, s46, 14
+; GISEL-NEXT: v_writelane_b32 v40, s47, 15
+; GISEL-NEXT: v_writelane_b32 v40, s48, 16
+; GISEL-NEXT: v_writelane_b32 v40, s49, 17
+; GISEL-NEXT: v_writelane_b32 v40, s50, 18
+; GISEL-NEXT: v_writelane_b32 v40, s51, 19
+; GISEL-NEXT: s_mov_b32 s42, s15
+; GISEL-NEXT: s_mov_b32 s43, s14
+; GISEL-NEXT: s_mov_b32 s44, s13
+; GISEL-NEXT: s_mov_b32 s45, s12
; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
@@ -1018,9 +1048,10 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT: s_mov_b32 s12, s44
-; GISEL-NEXT: s_mov_b32 s13, s43
-; GISEL-NEXT: s_mov_b32 s14, s42
+; GISEL-NEXT: s_mov_b32 s12, s45
+; GISEL-NEXT: s_mov_b32 s13, s44
+; GISEL-NEXT: s_mov_b32 s14, s43
+; GISEL-NEXT: s_mov_b32 s15, s42
; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GISEL-NEXT: ; implicit-def: $vgpr0
; GISEL-NEXT: ; implicit-def: $vgpr31
@@ -1030,12 +1061,13 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
; GISEL-NEXT: s_mov_b64 exec, s[48:49]
; GISEL-NEXT: .LBB5_4: ; %bb2
; GISEL-NEXT: s_or_b64 exec, exec, s[46:47]
-; GISEL-NEXT: v_readlane_b32 s51, v40, 18
-; GISEL-NEXT: v_readlane_b32 s50, v40, 17
-; GISEL-NEXT: v_readlane_b32 s49, v40, 16
-; GISEL-NEXT: v_readlane_b32 s48, v40, 15
-; GISEL-NEXT: v_readlane_b32 s47, v40, 14
-; GISEL-NEXT: v_readlane_b32 s46, v40, 13
+; GISEL-NEXT: v_readlane_b32 s51, v40, 19
+; GISEL-NEXT: v_readlane_b32 s50, v40, 18
+; GISEL-NEXT: v_readlane_b32 s49, v40, 17
+; GISEL-NEXT: v_readlane_b32 s48, v40, 16
+; GISEL-NEXT: v_readlane_b32 s47, v40, 15
+; GISEL-NEXT: v_readlane_b32 s46, v40, 14
+; GISEL-NEXT: v_readlane_b32 s45, v40, 13
; GISEL-NEXT: v_readlane_b32 s44, v40, 12
; GISEL-NEXT: v_readlane_b32 s43, v40, 11
; GISEL-NEXT: v_readlane_b32 s42, v40, 10
@@ -1050,7 +1082,7 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
; GISEL-NEXT: v_readlane_b32 s31, v40, 1
; GISEL-NEXT: v_readlane_b32 s30, v40, 0
; GISEL-NEXT: s_addk_i32 s32, 0xfc00
-; GISEL-NEXT: v_readlane_b32 s33, v40, 19
+; GISEL-NEXT: v_readlane_b32 s33, v40, 20
; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GISEL-NEXT: s_mov_b64 exec, s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.kernel.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.kernel.id.ll
new file mode 100644
index 0000000000000..4e8cc7ba8d4f5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.kernel.id.ll
@@ -0,0 +1,82 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+declare i32 @llvm.amdgcn.lds.kernel.id()
+declare i32 @llvm.amdgcn.workgroup.id.x()
+
+define void @function_lds_id(i32 addrspace(1)* %out) {
+; GCN-LABEL: function_lds_id:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_add_i32 s4, s15, s12
+; GCN-NEXT: v_mov_b32_e32 v2, s4
+; GCN-NEXT: flat_store_dword v[0:1], v2
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %tmp0 = call i32 @llvm.amdgcn.lds.kernel.id()
+ %help = call i32 @llvm.amdgcn.workgroup.id.x()
+ %both = add i32 %tmp0, %help
+ store i32 %both, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @kernel_lds_id(i32 addrspace(1)* %out) !llvm.amdgcn.lds.kernel.id !0 {
+; GCN-LABEL: kernel_lds_id:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GCN-NEXT: s_add_i32 s2, s6, 42
+; GCN-NEXT: v_mov_b32_e32 v2, s2
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: v_mov_b32_e32 v1, s1
+; GCN-NEXT: flat_store_dword v[0:1], v2
+; GCN-NEXT: s_endpgm
+ %tmp0 = call i32 @llvm.amdgcn.lds.kernel.id()
+ %help = call i32 @llvm.amdgcn.workgroup.id.x()
+ %both = add i32 %tmp0, %help
+ store i32 %both, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @indirect_lds_id(i32 addrspace(1)* %out) !llvm.amdgcn.lds.kernel.id !1 {
+; GCN-LABEL: indirect_lds_id:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: s_mov_b32 flat_scratch_lo, s7
+; GCN-NEXT: s_add_i32 s6, s6, s9
+; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
+; GCN-NEXT: s_add_u32 s0, s0, s9
+; GCN-NEXT: s_addc_u32 s1, s1, 0
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: s_getpc_b64 s[6:7]
+; GCN-NEXT: s_add_u32 s6, s6, function_lds_id at gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s7, s7, function_lds_id at gotpcrel32@hi+12
+; GCN-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GCN-NEXT: s_mov_b32 s15, 21
+; GCN-NEXT: s_mov_b32 s12, s8
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, s4
+; GCN-NEXT: v_mov_b32_e32 v1, s5
+; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GCN-NEXT: s_endpgm
+ call void @function_lds_id(i32 addrspace(1) * %out)
+ ret void
+}
+
+define amdgpu_kernel void @doesnt_use_it(i32 addrspace(1)* %out) !llvm.amdgcn.lds.kernel.id !0 {
+; GCN-LABEL: doesnt_use_it:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GCN-NEXT: v_mov_b32_e32 v2, 0x64
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: v_mov_b32_e32 v1, s1
+; GCN-NEXT: flat_store_dword v[0:1], v2
+; GCN-NEXT: s_endpgm
+ store i32 100, i32 addrspace(1)* %out
+ ret void
+}
+
+
+!0 = !{i32 42}
+!1 = !{i32 21}
diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
index b662256829b55..ac9540bc0d842 100644
--- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
@@ -191,40 +191,45 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 {
; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-NEXT: v_writelane_b32 v40, s33, 4
+; GFX9-NEXT: v_writelane_b32 v40, s33, 5
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
+; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_addk_i32 s32, 0x800
-; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: v_writelane_b32 v40, s34, 2
+; GFX9-NEXT: v_writelane_b32 v40, s36, 3
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, foo at gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, foo at gotpcrel32@hi+12
-; GFX9-NEXT: v_writelane_b32 v40, s35, 3
-; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0
+; GFX9-NEXT: v_writelane_b32 v40, s37, 4
+; GFX9-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x0
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: v_mov_b32_e32 v41, v1
; GFX9-NEXT: v_mov_b32_e32 v42, v0
; GFX9-NEXT: v_mul_u32_u24_e32 v0, v42, v41
+; GFX9-NEXT: s_mov_b32 s34, s15
; GFX9-NEXT: v_and_b32_e32 v43, 0xffffff, v41
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37]
; GFX9-NEXT: v_mad_u32_u24 v41, v42, v41, v43
+; GFX9-NEXT: s_mov_b32 s15, s34
; GFX9-NEXT: v_mov_b32_e32 v0, v41
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37]
; GFX9-NEXT: v_add_u32_e32 v0, v41, v43
-; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
+; GFX9-NEXT: s_mov_b32 s15, s34
+; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37]
; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; GFX9-NEXT: v_readlane_b32 s35, v40, 3
+; GFX9-NEXT: v_readlane_b32 s37, v40, 4
+; GFX9-NEXT: v_readlane_b32 s36, v40, 3
; GFX9-NEXT: v_readlane_b32 s34, v40, 2
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
; GFX9-NEXT: s_addk_i32 s32, 0xf800
-; GFX9-NEXT: v_readlane_b32 s33, v40, 4
+; GFX9-NEXT: v_readlane_b32 s33, v40, 5
; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
index 0c9104db87138..d2ae6cf60681a 100644
--- a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
@@ -202,13 +202,13 @@ attributes #5 = { "amdgpu-flat-work-group-size"="128,512" }
attributes #6 = { "amdgpu-flat-work-group-size"="512,512" }
attributes #7 = { "amdgpu-flat-work-group-size"="64,256" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
index 809c9568a9b95..fcaba91cea8fa 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -73,6 +73,6 @@ define amdgpu_kernel void @test_simple_indirect_call() {
;.
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
index 21d24b9c0b48e..0b5109b7270bd 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
@@ -4,13 +4,13 @@
; GCN: s_or_saveexec_b64
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec
-; GCN: v_writelane_b32 v40, s33, 2
+; GCN: v_writelane_b32 v40, s33, 3
; GCN: s_swappc_b64
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 9
; GCN: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
-; GCN: v_readlane_b32 s33, v40, 2
+; GCN: v_readlane_b32 s33, v40, 3
; GCN: s_or_saveexec_b64
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GCN: s_mov_b64 exec
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
index 2de7e4b23ba04..ff1b276af74d6 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
@@ -31,5 +31,5 @@ define amdgpu_kernel void @kernel1() #1 {
attributes #0 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
index 4b74bc4c10516..eeb2c3df025f1 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
@@ -97,6 +97,6 @@ define amdgpu_kernel void @kernel2() #0 {
attributes #0 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
index be85a776f9136..bee04e3c76f98 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
@@ -41,6 +41,6 @@ define amdgpu_kernel void @kernel3() #2 {
attributes #2 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
index a38f899e2f566..53b3bde39ba24 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
@@ -41,6 +41,6 @@ define amdgpu_kernel void @kernel2() #2 {
attributes #1 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
index 7bfbd80cda430..9367b4fd47bbb 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
@@ -101,7 +101,7 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 {
attributes #0 = { nounwind readnone }
attributes #1 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
index ab0d4bb28a284..423627e582d2f 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
@@ -61,5 +61,5 @@ define amdgpu_kernel void @kernel3() #0 {
attributes #0 = { "uniform-work-group-size"="false" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
index 006f7cbef1497..1d00da488dfb1 100644
--- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
+++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
@@ -1,5 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: llc -mtriple=amdgcn-amdhsa -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=GCN %s
; RUN: opt -S -si-annotate-control-flow -mtriple=amdgcn-amdhsa -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=SI-OPT %s
@@ -20,10 +19,10 @@ define hidden void @widget() {
; GCN-NEXT: flat_load_dword v0, v[0:1]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 21, v0
-; GCN-NEXT: v_readfirstlane_b32 s15, v0
+; GCN-NEXT: v_readfirstlane_b32 s16, v0
; GCN-NEXT: s_cbranch_vccz .LBB0_3
; GCN-NEXT: ; %bb.1: ; %bb4
-; GCN-NEXT: s_cmp_lg_u32 s15, 9
+; GCN-NEXT: s_cmp_lg_u32 s16, 9
; GCN-NEXT: s_cbranch_scc1 .LBB0_4
; GCN-NEXT: ; %bb.2: ; %bb7
; GCN-NEXT: s_getpc_b64 s[16:17]
@@ -32,7 +31,7 @@ define hidden void @widget() {
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: s_branch .LBB0_7
; GCN-NEXT: .LBB0_3: ; %bb2
-; GCN-NEXT: s_cmp_eq_u32 s15, 21
+; GCN-NEXT: s_cmp_eq_u32 s16, 21
; GCN-NEXT: s_cbranch_scc1 .LBB0_6
; GCN-NEXT: .LBB0_4: ; %bb9
; GCN-NEXT: s_getpc_b64 s[16:17]
@@ -187,7 +186,7 @@ define hidden void @blam() {
; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[16:17]
-; GCN-NEXT: v_writelane_b32 v40, s33, 17
+; GCN-NEXT: v_writelane_b32 v40, s33, 18
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_addk_i32 s32, 0x800
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
@@ -210,12 +209,14 @@ define hidden void @blam() {
; GCN-NEXT: v_writelane_b32 v40, s44, 12
; GCN-NEXT: v_writelane_b32 v40, s45, 13
; GCN-NEXT: v_writelane_b32 v40, s46, 14
-; GCN-NEXT: v_writelane_b32 v40, s48, 15
-; GCN-NEXT: v_writelane_b32 v40, s49, 16
+; GCN-NEXT: v_writelane_b32 v40, s47, 15
+; GCN-NEXT: v_writelane_b32 v40, s48, 16
+; GCN-NEXT: v_writelane_b32 v40, s49, 17
; GCN-NEXT: v_mov_b32_e32 v41, v31
-; GCN-NEXT: s_mov_b32 s44, s14
-; GCN-NEXT: s_mov_b32 s45, s13
-; GCN-NEXT: s_mov_b32 s46, s12
+; GCN-NEXT: s_mov_b32 s44, s15
+; GCN-NEXT: s_mov_b32 s45, s14
+; GCN-NEXT: s_mov_b32 s46, s13
+; GCN-NEXT: s_mov_b32 s47, s12
; GCN-NEXT: s_mov_b64 s[36:37], s[10:11]
; GCN-NEXT: s_mov_b64 s[38:39], s[8:9]
; GCN-NEXT: s_mov_b64 s[40:41], s[6:7]
@@ -278,9 +279,10 @@ define hidden void @blam() {
; GCN-NEXT: s_mov_b64 s[6:7], s[40:41]
; GCN-NEXT: s_mov_b64 s[8:9], s[38:39]
; GCN-NEXT: s_mov_b64 s[10:11], s[36:37]
-; GCN-NEXT: s_mov_b32 s12, s46
-; GCN-NEXT: s_mov_b32 s13, s45
-; GCN-NEXT: s_mov_b32 s14, s44
+; GCN-NEXT: s_mov_b32 s12, s47
+; GCN-NEXT: s_mov_b32 s13, s46
+; GCN-NEXT: s_mov_b32 s14, s45
+; GCN-NEXT: s_mov_b32 s15, s44
; GCN-NEXT: v_mov_b32_e32 v31, v41
; GCN-NEXT: s_swappc_b64 s[30:31], s[48:49]
; GCN-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
index 082711ac2d6f5..172744e060cbf 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
@@ -30,6 +30,7 @@
# FULL-NEXT: workGroupIDX: { reg: '$sgpr6' }
# FULL-NEXT: workGroupIDY: { reg: '$sgpr13' }
# FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' }
+# FULL-NEXT: LDSKernelId: { reg: '$sgpr15' }
# FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
# FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# FULL-NEXT: workItemIDX: { reg: '$vgpr0' }
@@ -67,6 +68,7 @@
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr6' }
# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' }
# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' }
+# SIMPLE-NEXT: LDSKernelId: { reg: '$sgpr15' }
# SIMPLE-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr0' }
@@ -127,6 +129,7 @@ body: |
# FULL-NEXT: workGroupIDX: { reg: '$sgpr12' }
# FULL-NEXT: workGroupIDY: { reg: '$sgpr13' }
# FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' }
+# FULL-NEXT: LDSKernelId: { reg: '$sgpr15' }
# FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
@@ -153,6 +156,7 @@ body: |
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' }
# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' }
# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' }
+# SIMPLE-NEXT: LDSKernelId: { reg: '$sgpr15' }
# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
@@ -194,6 +198,7 @@ body: |
# FULL-NEXT: workGroupIDX: { reg: '$sgpr12' }
# FULL-NEXT: workGroupIDY: { reg: '$sgpr13' }
# FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' }
+# FULL-NEXT: LDSKernelId: { reg: '$sgpr15' }
# FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
@@ -220,6 +225,7 @@ body: |
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' }
# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' }
# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' }
+# SIMPLE-NEXT: LDSKernelId: { reg: '$sgpr15' }
# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
@@ -262,6 +268,7 @@ body: |
# FULL-NEXT: workGroupIDX: { reg: '$sgpr12' }
# FULL-NEXT: workGroupIDY: { reg: '$sgpr13' }
# FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' }
+# FULL-NEXT: LDSKernelId: { reg: '$sgpr15' }
# FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
@@ -289,6 +296,7 @@ body: |
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' }
# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' }
# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' }
+# SIMPLE-NEXT: LDSKernelId: { reg: '$sgpr15' }
# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
@@ -341,6 +349,7 @@ body: |
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' }
# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' }
# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' }
+# SIMPLE-NEXT: LDSKernelId: { reg: '$sgpr15' }
# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 }
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
index c3dde88dcd746..867d73f18f0ca 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
@@ -119,6 +119,7 @@ define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 {
; CHECK-NEXT: workGroupIDX: { reg: '$sgpr12' }
; CHECK-NEXT: workGroupIDY: { reg: '$sgpr13' }
; CHECK-NEXT: workGroupIDZ: { reg: '$sgpr14' }
+; CHECK-NEXT: LDSKernelId: { reg: '$sgpr15' }
; CHECK-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
; CHECK-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
; CHECK-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
@@ -164,6 +165,7 @@ define void @function() {
; CHECK-NEXT: workGroupIDX: { reg: '$sgpr12' }
; CHECK-NEXT: workGroupIDY: { reg: '$sgpr13' }
; CHECK-NEXT: workGroupIDZ: { reg: '$sgpr14' }
+; CHECK-NEXT: LDSKernelId: { reg: '$sgpr15' }
; CHECK-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
; CHECK-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
; CHECK-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
diff --git a/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir b/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir
index 7870d416f8df8..73e75fc0f7ef5 100644
--- a/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir
+++ b/llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir
@@ -38,6 +38,7 @@
# RESULT-NEXT: workGroupIDX: { reg: '$sgpr20' }
# RESULT-NEXT: workGroupIDY: { reg: '$sgpr19' }
# RESULT-NEXT: workGroupIDZ: { reg: '$sgpr18' }
+# RESULT-NEXT: LDSKernelId: { reg: '$sgpr15' }
# RESULT-NEXT: implicitArgPtr: { reg: '$sgpr10_sgpr11' }
# RESULT-NEXT: workItemIDX: { reg: '$vgpr34', mask: 1023 }
# RESULT-NEXT: workItemIDY: { reg: '$vgpr34', mask: 1047552 }
More information about the llvm-commits
mailing list