[llvm] 0197cd0 - AMDGPU: Optimize amdgpu-no-* attributes
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 9 15:24:42 PDT 2021
Author: Matt Arsenault
Date: 2021-09-09T18:24:28-04:00
New Revision: 0197cd0bd4a4ea3d05ae55f05a6e70202cd6a19b
URL: https://github.com/llvm/llvm-project/commit/0197cd0bd4a4ea3d05ae55f05a6e70202cd6a19b
DIFF: https://github.com/llvm/llvm-project/commit/0197cd0bd4a4ea3d05ae55f05a6e70202cd6a19b.diff
LOG: AMDGPU: Optimize amdgpu-no-* attributes
This allows clobbering a few extra registers in the fixed ABI, and
avoids some workitem ID packing instructions.
Added:
llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index b9faad453aba7..961501a113c3a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -753,6 +753,11 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const {
MachineFunction &MF = MIRBuilder.getMF();
+ // If there's no call site, this doesn't correspond to a call from the IR and
+ // doesn't need implicit inputs.
+ if (!Info.CB)
+ return true;
+
const AMDGPUFunctionArgInfo *CalleeArgInfo
= &AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
@@ -773,17 +778,32 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z
};
+ static constexpr StringLiteral ImplicitAttrNames[] = {
+ "amdgpu-no-dispatch-ptr",
+ "amdgpu-no-queue-ptr",
+ "amdgpu-no-implicitarg-ptr",
+ "amdgpu-no-dispatch-id",
+ "amdgpu-no-workgroup-id-x",
+ "amdgpu-no-workgroup-id-y",
+ "amdgpu-no-workgroup-id-z"
+ };
+
MachineRegisterInfo &MRI = MF.getRegInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const AMDGPULegalizerInfo *LI
= static_cast<const AMDGPULegalizerInfo*>(ST.getLegalizerInfo());
+ unsigned I = 0;
for (auto InputID : InputRegs) {
const ArgDescriptor *OutgoingArg;
const TargetRegisterClass *ArgRC;
LLT ArgTy;
+ // If the callee does not use the attribute value, skip copying the value.
+ if (Info.CB->hasFnAttr(ImplicitAttrNames[I++]))
+ continue;
+
std::tie(OutgoingArg, ArgRC, ArgTy) =
CalleeArgInfo->getPreloadedValue(InputID);
if (!OutgoingArg)
@@ -843,16 +863,22 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);
const LLT S32 = LLT::scalar(32);
+ const bool NeedWorkItemIDX = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-x");
+ const bool NeedWorkItemIDY = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-y");
+ const bool NeedWorkItemIDZ = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-z");
+
// If incoming ids are not packed we need to pack them.
// FIXME: Should consider known workgroup size to eliminate known 0 cases.
Register InputReg;
- if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX) {
+ if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
+ NeedWorkItemIDX) {
InputReg = MRI.createGenericVirtualRegister(S32);
LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
}
- if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY) {
+ if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
+ NeedWorkItemIDY) {
Register Y = MRI.createGenericVirtualRegister(S32);
LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),
std::get<2>(WorkitemIDY));
@@ -861,7 +887,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y;
}
- if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ) {
+ if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
+ NeedWorkItemIDZ) {
Register Z = MRI.createGenericVirtualRegister(S32);
LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),
std::get<2>(WorkitemIDZ));
@@ -870,7 +897,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z;
}
- if (!InputReg) {
+ if (!InputReg && (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
InputReg = MRI.createGenericVirtualRegister(S32);
// Workitem ids are already packed, any of present incoming arguments will
@@ -883,7 +910,9 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
}
if (OutgoingArg->isRegister()) {
- ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
+ if (InputReg)
+ ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
+
if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
report_fatal_error("failed to allocate implicit input argument");
} else {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 44aefe87e709b..dd50779f26143 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2899,10 +2899,16 @@ bool AMDGPULegalizerInfo::loadInputValue(
std::tie(Arg, ArgRC, ArgTy) = MFI->getPreloadedValue(ArgType);
if (!Arg) {
- assert(ArgType == AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
- // The intrinsic may appear when we have a 0 sized kernarg segment, in which
- // case the pointer argument may be missing and we use null.
- B.buildConstant(DstReg, 0);
+ if (ArgType == AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR) {
+ // The intrinsic may appear when we have a 0 sized kernarg segment, in which
+ // case the pointer argument may be missing and we use null.
+ B.buildConstant(DstReg, 0);
+ return true;
+ }
+
+ // It's undefined behavior if a function marked with the amdgpu-no-*
+ // attributes uses the corresponding intrinsic.
+ B.buildUndef(DstReg);
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1fa1dc0349e95..7e5a33d816198 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1820,11 +1820,16 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID);
if (!Reg) {
- // It's possible for a kernarg intrinsic call to appear in a kernel with no
- // allocated segment, in which case we do not add the user sgpr argument, so
- // just return null.
- assert(PVID == AMDGPUFunctionArgInfo::PreloadedValue::KERNARG_SEGMENT_PTR);
- return DAG.getConstant(0, SDLoc(), VT);
+ if (PVID == AMDGPUFunctionArgInfo::PreloadedValue::KERNARG_SEGMENT_PTR) {
+ // It's possible for a kernarg intrinsic call to appear in a kernel with
+ // no allocated segment, in which case we do not add the user sgpr
+ // argument, so just return null.
+ return DAG.getConstant(0, SDLoc(), VT);
+ }
+
+ // It's undefined behavior if a function marked with the amdgpu-no-*
+ // attributes uses the corresponding intrinsic.
+ return DAG.getUNDEF(VT);
}
return CreateLiveInRegister(DAG, RC, Reg->getRegister(), VT);
@@ -2042,31 +2047,33 @@ void SITargetLowering::allocateSpecialInputSGPRs(
SIMachineFunctionInfo &Info) const {
auto &ArgInfo = Info.getArgInfo();
- // TODO: Unify handling with private memory pointers.
+ // We need to allocate these in place regardless of their use.
+ const bool IsFixed = AMDGPUTargetMachine::EnableFixedFunctionABI;
- if (Info.hasDispatchPtr())
+ // TODO: Unify handling with private memory pointers.
+ if (IsFixed || Info.hasDispatchPtr())
allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr);
- if (Info.hasQueuePtr())
+ if (IsFixed || Info.hasQueuePtr())
allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr);
// Implicit arg ptr takes the place of the kernarg segment pointer. This is a
// constant offset from the kernarg segment.
- if (Info.hasImplicitArgPtr())
+ if (IsFixed || Info.hasImplicitArgPtr())
allocateSGPR64Input(CCInfo, ArgInfo.ImplicitArgPtr);
- if (Info.hasDispatchID())
+ if (IsFixed || Info.hasDispatchID())
allocateSGPR64Input(CCInfo, ArgInfo.DispatchID);
// flat_scratch_init is not applicable for non-kernel functions.
- if (Info.hasWorkGroupIDX())
+ if (IsFixed || Info.hasWorkGroupIDX())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDX);
- if (Info.hasWorkGroupIDY())
+ if (IsFixed || Info.hasWorkGroupIDY())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDY);
- if (Info.hasWorkGroupIDZ())
+ if (IsFixed || Info.hasWorkGroupIDZ())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDZ);
}
@@ -2766,21 +2773,28 @@ void SITargetLowering::passSpecialInputs(
// TODO: Unify with private memory register handling. This is complicated by
// the fact that at least in kernels, the input argument is not necessarily
// in the same location as the input.
- AMDGPUFunctionArgInfo::PreloadedValue InputRegs[] = {
- AMDGPUFunctionArgInfo::DISPATCH_PTR,
- AMDGPUFunctionArgInfo::QUEUE_PTR,
- AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR,
- AMDGPUFunctionArgInfo::DISPATCH_ID,
- AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
- AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
- AMDGPUFunctionArgInfo::WORKGROUP_ID_Z
+ static constexpr std::pair<AMDGPUFunctionArgInfo::PreloadedValue,
+ StringLiteral> ImplicitAttrs[] = {
+ {AMDGPUFunctionArgInfo::DISPATCH_PTR, "amdgpu-no-dispatch-ptr"},
+ {AMDGPUFunctionArgInfo::QUEUE_PTR, "amdgpu-no-queue-ptr" },
+ {AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"},
+ {AMDGPUFunctionArgInfo::DISPATCH_ID, "amdgpu-no-dispatch-id"},
+ {AMDGPUFunctionArgInfo::WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"},
+ {AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,"amdgpu-no-workgroup-id-y"},
+ {AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"}
};
- for (auto InputID : InputRegs) {
+ for (auto Attr : ImplicitAttrs) {
const ArgDescriptor *OutgoingArg;
const TargetRegisterClass *ArgRC;
LLT ArgTy;
+ AMDGPUFunctionArgInfo::PreloadedValue InputID = Attr.first;
+
+ // If the callee does not use the attribute value, skip copying the value.
+ if (CLI.CB->hasFnAttr(Attr.second))
+ continue;
+
std::tie(OutgoingArg, ArgRC, ArgTy) =
CalleeArgInfo->getPreloadedValue(InputID);
if (!OutgoingArg)
@@ -2846,11 +2860,17 @@ void SITargetLowering::passSpecialInputs(
SDValue InputReg;
SDLoc SL;
+ const bool NeedWorkItemIDX = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-x");
+ const bool NeedWorkItemIDY = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-y");
+ const bool NeedWorkItemIDZ = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-z");
+
// If incoming ids are not packed we need to pack them.
- if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX)
+ if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
+ NeedWorkItemIDX)
InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgX);
- if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY) {
+ if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
+ NeedWorkItemIDY) {
SDValue Y = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgY);
Y = DAG.getNode(ISD::SHL, SL, MVT::i32, Y,
DAG.getShiftAmountConstant(10, MVT::i32, SL));
@@ -2858,7 +2878,8 @@ void SITargetLowering::passSpecialInputs(
DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Y) : Y;
}
- if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ) {
+ if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
+ NeedWorkItemIDZ) {
SDValue Z = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgZ);
Z = DAG.getNode(ISD::SHL, SL, MVT::i32, Z,
DAG.getShiftAmountConstant(20, MVT::i32, SL));
@@ -2866,7 +2887,7 @@ void SITargetLowering::passSpecialInputs(
DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Z) : Z;
}
- if (!InputReg.getNode()) {
+ if (!InputReg && (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
// Workitem ids are already packed, any of present incoming arguments
// will carry all required fields.
ArgDescriptor IncomingArg = ArgDescriptor::createArg(
@@ -2877,13 +2898,17 @@ void SITargetLowering::passSpecialInputs(
}
if (OutgoingArg->isRegister()) {
- RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
+ if (InputReg)
+ RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
+
CCInfo.AllocateReg(OutgoingArg->getRegister());
} else {
unsigned SpecialArgOffset = CCInfo.AllocateStack(4, Align(4));
- SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
- SpecialArgOffset);
- MemOpChains.push_back(ArgStore);
+ if (InputReg) {
+ SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
+ SpecialArgOffset);
+ MemOpChains.push_back(ArgStore);
+ }
}
}
@@ -5292,9 +5317,18 @@ SDValue SITargetLowering::lowerTrapHsaQueuePtr(
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
Register UserSGPR = Info->getQueuePtrUserSGPR();
- assert(UserSGPR != AMDGPU::NoRegister);
- SDValue QueuePtr = CreateLiveInRegister(
- DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
+
+ SDValue QueuePtr;
+ if (UserSGPR == AMDGPU::NoRegister) {
+ // We probably are in a function incorrectly marked with
+ // amdgpu-no-queue-ptr. This is undefined. We don't want to delete the trap,
+ // so just use a null pointer.
+ QueuePtr = DAG.getConstant(0, SL, MVT::i64);
+ } else {
+ QueuePtr = CreateLiveInRegister(
+ DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
+ }
+
SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64);
SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01,
QueuePtr, SDValue());
@@ -5371,7 +5405,11 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
Register UserSGPR = Info->getQueuePtrUserSGPR();
- assert(UserSGPR != AMDGPU::NoRegister);
+ if (UserSGPR == AMDGPU::NoRegister) {
+ // We probably are in a function incorrectly marked with
+ // amdgpu-no-queue-ptr. This is undefined.
+ return DAG.getUNDEF(MVT::i32);
+ }
SDValue QueuePtr = CreateLiveInRegister(
DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
index c3c5b161001d9..b5e53eff2dbf1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
@@ -9,47 +9,34 @@ declare hidden void @extern()
define amdgpu_kernel void @kernel_call_no_workitem_ids() {
; CHECK-LABEL: name: kernel_call_no_workitem_ids
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
- ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
- ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern
- ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+ ; CHECK: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]]
; CHECK: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; CHECK: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
- ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]]
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]]
+ ; CHECK: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>)
+ ; CHECK: $sgpr4_sgpr5 = COPY [[COPY6]](p4)
+ ; CHECK: $sgpr6_sgpr7 = COPY [[COPY7]](p4)
; CHECK: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; CHECK: $sgpr12 = COPY [[COPY12]](s32)
- ; CHECK: $sgpr13 = COPY [[COPY13]](s32)
- ; CHECK: $sgpr14 = COPY [[COPY14]](s32)
- ; CHECK: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK: $sgpr10_sgpr11 = COPY [[COPY8]](s64)
+ ; CHECK: $sgpr12 = COPY [[COPY9]](s32)
+ ; CHECK: $sgpr13 = COPY [[COPY10]](s32)
+ ; CHECK: $sgpr14 = COPY [[COPY11]](s32)
+ ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK: S_ENDPGM 0
call void @extern() "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
@@ -59,47 +46,38 @@ define amdgpu_kernel void @kernel_call_no_workitem_ids() {
define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
; CHECK-LABEL: name: kernel_call_no_workgroup_ids
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern
- ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+ ; CHECK: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+ ; CHECK: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]]
; CHECK: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; CHECK: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]]
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C2]](s32)
+ ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL]]
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C3]](s32)
; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; CHECK: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>)
+ ; CHECK: $sgpr4_sgpr5 = COPY [[COPY6]](p4)
+ ; CHECK: $sgpr6_sgpr7 = COPY [[COPY7]](p4)
; CHECK: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; CHECK: $sgpr12 = COPY [[COPY12]](s32)
- ; CHECK: $sgpr13 = COPY [[COPY13]](s32)
- ; CHECK: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK: $sgpr10_sgpr11 = COPY [[COPY8]](s64)
; CHECK: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK: S_ENDPGM 0
call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
@@ -109,47 +87,29 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
define amdgpu_kernel void @kernel_call_no_other_sgprs() {
; CHECK-LABEL: name: kernel_call_no_other_sgprs
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern
- ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; CHECK: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
- ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
- ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C2]](s32)
+ ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL]]
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+ ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C3]](s32)
; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
- ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+ ; CHECK: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>)
; CHECK: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; CHECK: $sgpr12 = COPY [[COPY12]](s32)
- ; CHECK: $sgpr13 = COPY [[COPY13]](s32)
- ; CHECK: $sgpr14 = COPY [[COPY14]](s32)
; CHECK: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK: S_ENDPGM 0
call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
@@ -159,40 +119,37 @@ define amdgpu_kernel void @kernel_call_no_other_sgprs() {
define void @func_call_no_workitem_ids() {
; CHECK-LABEL: name: func_call_no_workitem_ids
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern
- ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
- ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
- ; CHECK: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
- ; CHECK: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
- ; CHECK: $sgpr12 = COPY [[COPY13]](s32)
- ; CHECK: $sgpr13 = COPY [[COPY14]](s32)
- ; CHECK: $sgpr14 = COPY [[COPY15]](s32)
- ; CHECK: $vgpr31 = COPY [[COPY16]](s32)
- ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+ ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY4]]
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY3]]
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY]]
+ ; CHECK: [[COPY15:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY15]](<4 x s32>)
+ ; CHECK: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
+ ; CHECK: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
+ ; CHECK: $sgpr8_sgpr9 = COPY [[COPY10]](p4)
+ ; CHECK: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; CHECK: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
- ; CHECK: S_SETPC_B64_return [[COPY18]]
+ ; CHECK: [[COPY16:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY7]]
+ ; CHECK: S_SETPC_B64_return [[COPY16]]
call void @extern() "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
ret void
}
@@ -200,40 +157,31 @@ define void @func_call_no_workitem_ids() {
define void @func_call_no_workgroup_ids() {
; CHECK-LABEL: name: func_call_no_workgroup_ids
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+ ; CHECK: liveins: $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern
- ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
- ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
- ; CHECK: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
- ; CHECK: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
- ; CHECK: $sgpr12 = COPY [[COPY13]](s32)
- ; CHECK: $sgpr13 = COPY [[COPY14]](s32)
- ; CHECK: $sgpr14 = COPY [[COPY15]](s32)
- ; CHECK: $vgpr31 = COPY [[COPY16]](s32)
- ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY4]]
+ ; CHECK: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY3]]
+ ; CHECK: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY2]]
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY1]]
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK: [[COPY11:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY11]](<4 x s32>)
+ ; CHECK: $sgpr4_sgpr5 = COPY [[COPY6]](p4)
+ ; CHECK: $sgpr6_sgpr7 = COPY [[COPY7]](p4)
+ ; CHECK: $sgpr8_sgpr9 = COPY [[COPY8]](p4)
+ ; CHECK: $sgpr10_sgpr11 = COPY [[COPY9]](s64)
+ ; CHECK: $vgpr31 = COPY [[COPY10]](s32)
+ ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; CHECK: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
- ; CHECK: S_SETPC_B64_return [[COPY18]]
+ ; CHECK: [[COPY12:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]]
+ ; CHECK: S_SETPC_B64_return [[COPY12]]
call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
ret void
}
@@ -241,40 +189,22 @@ define void @func_call_no_workgroup_ids() {
define void @func_call_no_other_sgprs() {
; CHECK-LABEL: name: func_call_no_other_sgprs
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+ ; CHECK: liveins: $vgpr31, $sgpr8_sgpr9, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern
- ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
- ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
- ; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4)
- ; CHECK: $sgpr8_sgpr9 = COPY [[COPY11]](p4)
- ; CHECK: $sgpr10_sgpr11 = COPY [[COPY12]](s64)
- ; CHECK: $sgpr12 = COPY [[COPY13]](s32)
- ; CHECK: $sgpr13 = COPY [[COPY14]](s32)
- ; CHECK: $sgpr14 = COPY [[COPY15]](s32)
- ; CHECK: $vgpr31 = COPY [[COPY16]](s32)
- ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY1]]
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>)
+ ; CHECK: $sgpr8_sgpr9 = COPY [[COPY3]](p4)
+ ; CHECK: $vgpr31 = COPY [[COPY4]](s32)
+ ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; CHECK: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
- ; CHECK: S_SETPC_B64_return [[COPY18]]
+ ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+ ; CHECK: S_SETPC_B64_return [[COPY6]]
call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index 2d1e8e4c371c6..c5a1eb8b6a10b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -43,41 +43,17 @@ define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 {
define hidden fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) #1 {
; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32
; GCN: bb.1.entry:
- ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN: [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+ ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
- ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN: $vgpr0 = COPY [[COPY8]](s32)
- ; GCN: $vgpr1 = COPY [[COPY9]](s32)
- ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN: $sgpr4_sgpr5 = COPY [[COPY12]](p4)
- ; GCN: $sgpr6_sgpr7 = COPY [[COPY13]](p4)
- ; GCN: $sgpr8_sgpr9 = COPY [[COPY14]](p4)
- ; GCN: $sgpr10_sgpr11 = COPY [[COPY15]](s64)
- ; GCN: $sgpr12 = COPY [[COPY16]](s32)
- ; GCN: $sgpr13 = COPY [[COPY17]](s32)
- ; GCN: $sgpr14 = COPY [[COPY18]](s32)
- ; GCN: $vgpr31 = COPY [[COPY19]](s32)
- ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN: $vgpr0 = COPY [[COPY]](s32)
+ ; GCN: $vgpr1 = COPY [[COPY1]](s32)
+ ; GCN: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
+ ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
entry:
%ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b)
ret i32 %ret
@@ -86,46 +62,22 @@ entry:
define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, i32 %c) #1 {
; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_stack_object
; GCN: bb.1.entry:
- ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN: [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+ ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32)
; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
- ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN: $vgpr0 = COPY [[COPY8]](s32)
- ; GCN: $vgpr1 = COPY [[COPY9]](s32)
- ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN: $sgpr4_sgpr5 = COPY [[COPY12]](p4)
- ; GCN: $sgpr6_sgpr7 = COPY [[COPY13]](p4)
- ; GCN: $sgpr8_sgpr9 = COPY [[COPY14]](p4)
- ; GCN: $sgpr10_sgpr11 = COPY [[COPY15]](s64)
- ; GCN: $sgpr12 = COPY [[COPY16]](s32)
- ; GCN: $sgpr13 = COPY [[COPY17]](s32)
- ; GCN: $sgpr14 = COPY [[COPY18]](s32)
- ; GCN: $vgpr31 = COPY [[COPY19]](s32)
- ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN: $vgpr0 = COPY [[COPY]](s32)
+ ; GCN: $vgpr1 = COPY [[COPY1]](s32)
+ ; GCN: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
+ ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
entry:
%alloca = alloca [16 x i32], align 4, addrspace(5)
%gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
@@ -137,46 +89,22 @@ entry:
define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i32 %b, i32 %c) #1 {
; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_callee_stack_object
; GCN: bb.1.entry:
- ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN: [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+ ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32)
; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_stack_object
- ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN: $vgpr0 = COPY [[COPY8]](s32)
- ; GCN: $vgpr1 = COPY [[COPY9]](s32)
- ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN: $sgpr4_sgpr5 = COPY [[COPY12]](p4)
- ; GCN: $sgpr6_sgpr7 = COPY [[COPY13]](p4)
- ; GCN: $sgpr8_sgpr9 = COPY [[COPY14]](p4)
- ; GCN: $sgpr10_sgpr11 = COPY [[COPY15]](s64)
- ; GCN: $sgpr12 = COPY [[COPY16]](s32)
- ; GCN: $sgpr13 = COPY [[COPY17]](s32)
- ; GCN: $sgpr14 = COPY [[COPY18]](s32)
- ; GCN: $vgpr31 = COPY [[COPY19]](s32)
- ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_stack_object, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN: $vgpr0 = COPY [[COPY]](s32)
+ ; GCN: $vgpr1 = COPY [[COPY1]](s32)
+ ; GCN: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
+ ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_stack_object, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
entry:
%alloca = alloca [16 x i32], align 4, addrspace(5)
%gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
@@ -188,41 +116,17 @@ entry:
define fastcc void @sibling_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 {
; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_unused_result
; GCN: bb.1.entry:
- ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN: [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+ ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
- ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN: $vgpr0 = COPY [[COPY8]](s32)
- ; GCN: $vgpr1 = COPY [[COPY9]](s32)
- ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN: $sgpr4_sgpr5 = COPY [[COPY12]](p4)
- ; GCN: $sgpr6_sgpr7 = COPY [[COPY13]](p4)
- ; GCN: $sgpr8_sgpr9 = COPY [[COPY14]](p4)
- ; GCN: $sgpr10_sgpr11 = COPY [[COPY15]](s64)
- ; GCN: $sgpr12 = COPY [[COPY16]](s32)
- ; GCN: $sgpr13 = COPY [[COPY17]](s32)
- ; GCN: $sgpr14 = COPY [[COPY18]](s32)
- ; GCN: $vgpr31 = COPY [[COPY19]](s32)
- ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN: $vgpr0 = COPY [[COPY]](s32)
+ ; GCN: $vgpr1 = COPY [[COPY1]](s32)
+ ; GCN: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
+ ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
entry:
%ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b)
ret void
@@ -232,17 +136,8 @@ entry:
define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 {
; GCN-LABEL: name: kernel_call_i32_fastcc_i32_i32_unused_result
; GCN: bb.1.entry:
- ; GCN: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
- ; GCN: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
- ; GCN: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
- ; GCN: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
- ; GCN: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+ ; GCN: liveins: $sgpr8_sgpr9
+ ; GCN: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; GCN: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
@@ -253,38 +148,12 @@ define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a,
; GCN: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C2]](s64)
; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
- ; GCN: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
- ; GCN: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
- ; GCN: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
- ; GCN: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
- ; GCN: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
- ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
- ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
- ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; GCN: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; GCN: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
- ; GCN: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
- ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; GCN: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
- ; GCN: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GCN: $vgpr0 = COPY [[EVEC]](s32)
; GCN: $vgpr1 = COPY [[EVEC1]](s32)
- ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
- ; GCN: $sgpr6_sgpr7 = COPY [[COPY11]](p4)
- ; GCN: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
- ; GCN: $sgpr10_sgpr11 = COPY [[COPY13]](s64)
- ; GCN: $sgpr12 = COPY [[COPY14]](s32)
- ; GCN: $sgpr13 = COPY [[COPY15]](s32)
- ; GCN: $sgpr14 = COPY [[COPY16]](s32)
- ; GCN: $vgpr31 = COPY [[OR1]](s32)
- ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
- ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
+ ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN: S_ENDPGM 0
entry:
@@ -314,52 +183,28 @@ define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)*
define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i32 addrspace(5)* byval(i32) %b.byval, i32 %c) #1 {
; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32_byval_parent
; GCN: bb.1.entry:
- ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+ ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
- ; GCN: [[COPY9:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
- ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN: [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; GCN: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
+ ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32
- ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN: $vgpr0 = COPY [[COPY8]](s32)
- ; GCN: [[COPY20:%[0-9]+]]:_(p5) = COPY $sgpr32
+ ; GCN: $vgpr0 = COPY [[COPY]](s32)
+ ; GCN: [[COPY4:%[0-9]+]]:_(p5) = COPY $sgpr32
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C]](s32)
+ ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY4]], [[C]](s32)
; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; GCN: G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5)
- ; GCN: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
- ; GCN: $sgpr4_sgpr5 = COPY [[COPY12]](p4)
- ; GCN: $sgpr6_sgpr7 = COPY [[COPY13]](p4)
- ; GCN: $sgpr8_sgpr9 = COPY [[COPY14]](p4)
- ; GCN: $sgpr10_sgpr11 = COPY [[COPY15]](s64)
- ; GCN: $sgpr12 = COPY [[COPY16]](s32)
- ; GCN: $sgpr13 = COPY [[COPY17]](s32)
- ; GCN: $sgpr14 = COPY [[COPY18]](s32)
- ; GCN: $vgpr31 = COPY [[COPY19]](s32)
- ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
- ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN: G_MEMCPY [[PTR_ADD]](p5), [[COPY1]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5)
+ ; GCN: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>)
+ ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
- ; GCN: $vgpr0 = COPY [[COPY22]](s32)
- ; GCN: [[COPY23:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY11]]
- ; GCN: S_SETPC_B64_return [[COPY23]], implicit $vgpr0
+ ; GCN: $vgpr0 = COPY [[COPY6]](s32)
+ ; GCN: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+ ; GCN: S_SETPC_B64_return [[COPY7]], implicit $vgpr0
entry:
%ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) %b.byval)
ret i32 %ret
@@ -371,77 +216,53 @@ entry:
define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %large) #1 {
; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32
; GCN: bb.1.entry:
- ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18
- ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19
- ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20
- ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21
- ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22
- ; GCN: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23
- ; GCN: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24
- ; GCN: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25
- ; GCN: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26
- ; GCN: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27
- ; GCN: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28
- ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29
- ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+ ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5)
; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5)
- ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; GCN: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GCN: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[C]](s32)
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32
- ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN: $vgpr0 = COPY [[COPY8]](s32)
+ ; GCN: $vgpr0 = COPY [[COPY]](s32)
; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; GCN: G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (s32) from `i32 addrspace(5)* inttoptr (i32 16 to i32 addrspace(5)*)`, align 16, addrspace 5)
- ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>)
- ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4)
- ; GCN: $sgpr6_sgpr7 = COPY [[COPY41]](p4)
- ; GCN: $sgpr8_sgpr9 = COPY [[COPY42]](p4)
- ; GCN: $sgpr10_sgpr11 = COPY [[COPY43]](s64)
- ; GCN: $sgpr12 = COPY [[COPY44]](s32)
- ; GCN: $sgpr13 = COPY [[COPY45]](s32)
- ; GCN: $sgpr14 = COPY [[COPY46]](s32)
- ; GCN: $vgpr31 = COPY [[COPY47]](s32)
- ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_byval_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
+ ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_byval_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
entry:
%ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) inttoptr (i32 16 to i32 addrspace(5)*))
ret i32 %ret
@@ -506,110 +327,86 @@ define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %l
define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 {
; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32
; GCN: bb.1.entry:
- ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18
- ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19
- ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20
- ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21
- ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22
- ; GCN: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23
- ; GCN: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24
- ; GCN: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25
- ; GCN: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26
- ; GCN: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27
- ; GCN: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28
- ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29
- ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+ ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5)
; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5)
; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5)
- ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; GCN: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
- ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN: $vgpr0 = COPY [[COPY8]](s32)
- ; GCN: $vgpr1 = COPY [[COPY9]](s32)
- ; GCN: $vgpr2 = COPY [[COPY10]](s32)
- ; GCN: $vgpr3 = COPY [[COPY11]](s32)
- ; GCN: $vgpr4 = COPY [[COPY12]](s32)
- ; GCN: $vgpr5 = COPY [[COPY13]](s32)
- ; GCN: $vgpr6 = COPY [[COPY14]](s32)
- ; GCN: $vgpr7 = COPY [[COPY15]](s32)
- ; GCN: $vgpr8 = COPY [[COPY16]](s32)
- ; GCN: $vgpr9 = COPY [[COPY17]](s32)
- ; GCN: $vgpr10 = COPY [[COPY18]](s32)
- ; GCN: $vgpr11 = COPY [[COPY19]](s32)
- ; GCN: $vgpr12 = COPY [[COPY20]](s32)
- ; GCN: $vgpr13 = COPY [[COPY21]](s32)
- ; GCN: $vgpr14 = COPY [[COPY22]](s32)
- ; GCN: $vgpr15 = COPY [[COPY23]](s32)
- ; GCN: $vgpr16 = COPY [[COPY24]](s32)
- ; GCN: $vgpr17 = COPY [[COPY25]](s32)
- ; GCN: $vgpr18 = COPY [[COPY26]](s32)
- ; GCN: $vgpr19 = COPY [[COPY27]](s32)
- ; GCN: $vgpr20 = COPY [[COPY28]](s32)
- ; GCN: $vgpr21 = COPY [[COPY29]](s32)
- ; GCN: $vgpr22 = COPY [[COPY30]](s32)
- ; GCN: $vgpr23 = COPY [[COPY31]](s32)
- ; GCN: $vgpr24 = COPY [[COPY32]](s32)
- ; GCN: $vgpr25 = COPY [[COPY33]](s32)
- ; GCN: $vgpr26 = COPY [[COPY34]](s32)
- ; GCN: $vgpr27 = COPY [[COPY35]](s32)
- ; GCN: $vgpr28 = COPY [[COPY36]](s32)
- ; GCN: $vgpr29 = COPY [[COPY37]](s32)
- ; GCN: $vgpr30 = COPY [[COPY38]](s32)
+ ; GCN: $vgpr0 = COPY [[COPY]](s32)
+ ; GCN: $vgpr1 = COPY [[COPY1]](s32)
+ ; GCN: $vgpr2 = COPY [[COPY2]](s32)
+ ; GCN: $vgpr3 = COPY [[COPY3]](s32)
+ ; GCN: $vgpr4 = COPY [[COPY4]](s32)
+ ; GCN: $vgpr5 = COPY [[COPY5]](s32)
+ ; GCN: $vgpr6 = COPY [[COPY6]](s32)
+ ; GCN: $vgpr7 = COPY [[COPY7]](s32)
+ ; GCN: $vgpr8 = COPY [[COPY8]](s32)
+ ; GCN: $vgpr9 = COPY [[COPY9]](s32)
+ ; GCN: $vgpr10 = COPY [[COPY10]](s32)
+ ; GCN: $vgpr11 = COPY [[COPY11]](s32)
+ ; GCN: $vgpr12 = COPY [[COPY12]](s32)
+ ; GCN: $vgpr13 = COPY [[COPY13]](s32)
+ ; GCN: $vgpr14 = COPY [[COPY14]](s32)
+ ; GCN: $vgpr15 = COPY [[COPY15]](s32)
+ ; GCN: $vgpr16 = COPY [[COPY16]](s32)
+ ; GCN: $vgpr17 = COPY [[COPY17]](s32)
+ ; GCN: $vgpr18 = COPY [[COPY18]](s32)
+ ; GCN: $vgpr19 = COPY [[COPY19]](s32)
+ ; GCN: $vgpr20 = COPY [[COPY20]](s32)
+ ; GCN: $vgpr21 = COPY [[COPY21]](s32)
+ ; GCN: $vgpr22 = COPY [[COPY22]](s32)
+ ; GCN: $vgpr23 = COPY [[COPY23]](s32)
+ ; GCN: $vgpr24 = COPY [[COPY24]](s32)
+ ; GCN: $vgpr25 = COPY [[COPY25]](s32)
+ ; GCN: $vgpr26 = COPY [[COPY26]](s32)
+ ; GCN: $vgpr27 = COPY [[COPY27]](s32)
+ ; GCN: $vgpr28 = COPY [[COPY28]](s32)
+ ; GCN: $vgpr29 = COPY [[COPY29]](s32)
+ ; GCN: $vgpr30 = COPY [[COPY30]](s32)
; GCN: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
; GCN: G_STORE [[LOAD]](s32), [[FRAME_INDEX3]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
; GCN: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
; GCN: G_STORE [[LOAD1]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
; GCN: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; GCN: G_STORE [[LOAD2]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
- ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>)
- ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4)
- ; GCN: $sgpr6_sgpr7 = COPY [[COPY41]](p4)
- ; GCN: $sgpr8_sgpr9 = COPY [[COPY42]](p4)
- ; GCN: $sgpr10_sgpr11 = COPY [[COPY43]](s64)
- ; GCN: $sgpr12 = COPY [[COPY44]](s32)
- ; GCN: $sgpr13 = COPY [[COPY45]](s32)
- ; GCN: $sgpr14 = COPY [[COPY46]](s32)
- ; GCN: $vgpr31 = COPY [[COPY47]](s32)
- ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
+ ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
entry:
%ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c)
ret i32 %ret
@@ -618,115 +415,91 @@ entry:
define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i32 %b, [32 x i32] %c) #1 {
; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32_stack_object
; GCN: bb.1.entry:
- ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18
- ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19
- ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20
- ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21
- ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22
- ; GCN: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23
- ; GCN: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24
- ; GCN: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25
- ; GCN: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26
- ; GCN: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27
- ; GCN: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28
- ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29
- ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+ ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5)
; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5)
; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5)
- ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; GCN: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
; GCN: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32)
; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
- ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN: $vgpr0 = COPY [[COPY8]](s32)
- ; GCN: $vgpr1 = COPY [[COPY9]](s32)
- ; GCN: $vgpr2 = COPY [[COPY10]](s32)
- ; GCN: $vgpr3 = COPY [[COPY11]](s32)
- ; GCN: $vgpr4 = COPY [[COPY12]](s32)
- ; GCN: $vgpr5 = COPY [[COPY13]](s32)
- ; GCN: $vgpr6 = COPY [[COPY14]](s32)
- ; GCN: $vgpr7 = COPY [[COPY15]](s32)
- ; GCN: $vgpr8 = COPY [[COPY16]](s32)
- ; GCN: $vgpr9 = COPY [[COPY17]](s32)
- ; GCN: $vgpr10 = COPY [[COPY18]](s32)
- ; GCN: $vgpr11 = COPY [[COPY19]](s32)
- ; GCN: $vgpr12 = COPY [[COPY20]](s32)
- ; GCN: $vgpr13 = COPY [[COPY21]](s32)
- ; GCN: $vgpr14 = COPY [[COPY22]](s32)
- ; GCN: $vgpr15 = COPY [[COPY23]](s32)
- ; GCN: $vgpr16 = COPY [[COPY24]](s32)
- ; GCN: $vgpr17 = COPY [[COPY25]](s32)
- ; GCN: $vgpr18 = COPY [[COPY26]](s32)
- ; GCN: $vgpr19 = COPY [[COPY27]](s32)
- ; GCN: $vgpr20 = COPY [[COPY28]](s32)
- ; GCN: $vgpr21 = COPY [[COPY29]](s32)
- ; GCN: $vgpr22 = COPY [[COPY30]](s32)
- ; GCN: $vgpr23 = COPY [[COPY31]](s32)
- ; GCN: $vgpr24 = COPY [[COPY32]](s32)
- ; GCN: $vgpr25 = COPY [[COPY33]](s32)
- ; GCN: $vgpr26 = COPY [[COPY34]](s32)
- ; GCN: $vgpr27 = COPY [[COPY35]](s32)
- ; GCN: $vgpr28 = COPY [[COPY36]](s32)
- ; GCN: $vgpr29 = COPY [[COPY37]](s32)
- ; GCN: $vgpr30 = COPY [[COPY38]](s32)
+ ; GCN: $vgpr0 = COPY [[COPY]](s32)
+ ; GCN: $vgpr1 = COPY [[COPY1]](s32)
+ ; GCN: $vgpr2 = COPY [[COPY2]](s32)
+ ; GCN: $vgpr3 = COPY [[COPY3]](s32)
+ ; GCN: $vgpr4 = COPY [[COPY4]](s32)
+ ; GCN: $vgpr5 = COPY [[COPY5]](s32)
+ ; GCN: $vgpr6 = COPY [[COPY6]](s32)
+ ; GCN: $vgpr7 = COPY [[COPY7]](s32)
+ ; GCN: $vgpr8 = COPY [[COPY8]](s32)
+ ; GCN: $vgpr9 = COPY [[COPY9]](s32)
+ ; GCN: $vgpr10 = COPY [[COPY10]](s32)
+ ; GCN: $vgpr11 = COPY [[COPY11]](s32)
+ ; GCN: $vgpr12 = COPY [[COPY12]](s32)
+ ; GCN: $vgpr13 = COPY [[COPY13]](s32)
+ ; GCN: $vgpr14 = COPY [[COPY14]](s32)
+ ; GCN: $vgpr15 = COPY [[COPY15]](s32)
+ ; GCN: $vgpr16 = COPY [[COPY16]](s32)
+ ; GCN: $vgpr17 = COPY [[COPY17]](s32)
+ ; GCN: $vgpr18 = COPY [[COPY18]](s32)
+ ; GCN: $vgpr19 = COPY [[COPY19]](s32)
+ ; GCN: $vgpr20 = COPY [[COPY20]](s32)
+ ; GCN: $vgpr21 = COPY [[COPY21]](s32)
+ ; GCN: $vgpr22 = COPY [[COPY22]](s32)
+ ; GCN: $vgpr23 = COPY [[COPY23]](s32)
+ ; GCN: $vgpr24 = COPY [[COPY24]](s32)
+ ; GCN: $vgpr25 = COPY [[COPY25]](s32)
+ ; GCN: $vgpr26 = COPY [[COPY26]](s32)
+ ; GCN: $vgpr27 = COPY [[COPY27]](s32)
+ ; GCN: $vgpr28 = COPY [[COPY28]](s32)
+ ; GCN: $vgpr29 = COPY [[COPY29]](s32)
+ ; GCN: $vgpr30 = COPY [[COPY30]](s32)
; GCN: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
; GCN: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
; GCN: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
; GCN: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
; GCN: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; GCN: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
- ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>)
- ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4)
- ; GCN: $sgpr6_sgpr7 = COPY [[COPY41]](p4)
- ; GCN: $sgpr8_sgpr9 = COPY [[COPY42]](p4)
- ; GCN: $sgpr10_sgpr11 = COPY [[COPY43]](s64)
- ; GCN: $sgpr12 = COPY [[COPY44]](s32)
- ; GCN: $sgpr13 = COPY [[COPY45]](s32)
- ; GCN: $sgpr14 = COPY [[COPY46]](s32)
- ; GCN: $vgpr31 = COPY [[COPY47]](s32)
- ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
+ ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
entry:
%alloca = alloca [16 x i32], align 4, addrspace(5)
%gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
@@ -741,31 +514,15 @@ entry:
define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
; GCN-LABEL: name: no_sibling_call_callee_more_stack_space
; GCN: bb.1.entry:
- ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+ ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
- ; GCN: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN: $vgpr0 = COPY [[COPY8]](s32)
- ; GCN: $vgpr1 = COPY [[COPY9]](s32)
+ ; GCN: $vgpr0 = COPY [[COPY]](s32)
+ ; GCN: $vgpr1 = COPY [[COPY1]](s32)
; GCN: $vgpr2 = COPY [[C]](s32)
; GCN: $vgpr3 = COPY [[C]](s32)
; GCN: $vgpr4 = COPY [[C]](s32)
@@ -795,32 +552,24 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
; GCN: $vgpr28 = COPY [[C]](s32)
; GCN: $vgpr29 = COPY [[C]](s32)
; GCN: $vgpr30 = COPY [[C]](s32)
- ; GCN: [[COPY19:%[0-9]+]]:_(p5) = COPY $sgpr32
+ ; GCN: [[COPY3:%[0-9]+]]:_(p5) = COPY $sgpr32
; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C1]](s32)
+ ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C1]](s32)
; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
; GCN: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; GCN: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C2]](s32)
+ ; GCN: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C2]](s32)
; GCN: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
; GCN: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GCN: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C3]](s32)
+ ; GCN: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C3]](s32)
; GCN: G_STORE [[C]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
- ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
- ; GCN: $sgpr6_sgpr7 = COPY [[COPY12]](p4)
- ; GCN: $sgpr8_sgpr9 = COPY [[COPY13]](p4)
- ; GCN: $sgpr10_sgpr11 = COPY [[COPY14]](s64)
- ; GCN: $sgpr12 = COPY [[COPY15]](s32)
- ; GCN: $sgpr13 = COPY [[COPY16]](s32)
- ; GCN: $sgpr14 = COPY [[COPY17]](s32)
- ; GCN: $vgpr31 = COPY [[COPY18]](s32)
- ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
- ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
+ ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN: ADJCALLSTACKDOWN 0, 12, implicit-def $scc
- ; GCN: $vgpr0 = COPY [[COPY21]](s32)
- ; GCN: [[COPY22:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]]
- ; GCN: S_SETPC_B64_return [[COPY22]], implicit $vgpr0
+ ; GCN: $vgpr0 = COPY [[COPY5]](s32)
+ ; GCN: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+ ; GCN: S_SETPC_B64_return [[COPY6]], implicit $vgpr0
entry:
%ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] zeroinitializer)
ret i32 %ret
@@ -830,67 +579,27 @@ entry:
define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) #1 {
; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_other_call
; GCN: bb.1.entry:
- ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN: [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+ ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
- ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN: $vgpr0 = COPY [[COPY8]](s32)
- ; GCN: $vgpr1 = COPY [[COPY9]](s32)
- ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
- ; GCN: $sgpr4_sgpr5 = COPY [[COPY12]](p4)
- ; GCN: $sgpr6_sgpr7 = COPY [[COPY13]](p4)
- ; GCN: $sgpr8_sgpr9 = COPY [[COPY14]](p4)
- ; GCN: $sgpr10_sgpr11 = COPY [[COPY15]](s64)
- ; GCN: $sgpr12 = COPY [[COPY16]](s32)
- ; GCN: $sgpr13 = COPY [[COPY17]](s32)
- ; GCN: $sgpr14 = COPY [[COPY18]](s32)
- ; GCN: $vgpr31 = COPY [[COPY19]](s32)
- ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
- ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN: $vgpr0 = COPY [[COPY]](s32)
+ ; GCN: $vgpr1 = COPY [[COPY1]](s32)
+ ; GCN: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
+ ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN: [[GV1:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @sibling_call_i32_fastcc_i32_i32
- ; GCN: [[COPY22:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN: [[COPY23:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN: [[COPY24:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN: [[COPY25:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN: $vgpr0 = COPY [[COPY8]](s32)
- ; GCN: $vgpr1 = COPY [[COPY9]](s32)
- ; GCN: $vgpr2 = COPY [[COPY21]](s32)
- ; GCN: [[COPY30:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY30]](<4 x s32>)
- ; GCN: $sgpr4_sgpr5 = COPY [[COPY22]](p4)
- ; GCN: $sgpr6_sgpr7 = COPY [[COPY23]](p4)
- ; GCN: $sgpr8_sgpr9 = COPY [[COPY24]](p4)
- ; GCN: $sgpr10_sgpr11 = COPY [[COPY25]](s64)
- ; GCN: $sgpr12 = COPY [[COPY26]](s32)
- ; GCN: $sgpr13 = COPY [[COPY27]](s32)
- ; GCN: $sgpr14 = COPY [[COPY28]](s32)
- ; GCN: $vgpr31 = COPY [[COPY29]](s32)
- ; GCN: SI_TCRETURN [[GV1]](p0), @sibling_call_i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN: $vgpr0 = COPY [[COPY]](s32)
+ ; GCN: $vgpr1 = COPY [[COPY1]](s32)
+ ; GCN: $vgpr2 = COPY [[COPY5]](s32)
+ ; GCN: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>)
+ ; GCN: SI_TCRETURN [[GV1]](p0), @sibling_call_i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3
entry:
%other.call = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b)
%ret = tail call fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %other.call)
@@ -902,115 +611,91 @@ entry:
define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 {
; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32
; GCN: bb.1.entry:
- ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18
- ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19
- ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20
- ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21
- ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22
- ; GCN: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23
- ; GCN: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24
- ; GCN: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25
- ; GCN: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26
- ; GCN: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27
- ; GCN: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28
- ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29
- ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+ ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5)
; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5)
; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5)
- ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; GCN: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
; GCN: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32)
; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
- ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN: $vgpr0 = COPY [[COPY8]](s32)
- ; GCN: $vgpr1 = COPY [[COPY9]](s32)
- ; GCN: $vgpr2 = COPY [[COPY10]](s32)
- ; GCN: $vgpr3 = COPY [[COPY11]](s32)
- ; GCN: $vgpr4 = COPY [[COPY12]](s32)
- ; GCN: $vgpr5 = COPY [[COPY13]](s32)
- ; GCN: $vgpr6 = COPY [[COPY14]](s32)
- ; GCN: $vgpr7 = COPY [[COPY15]](s32)
- ; GCN: $vgpr8 = COPY [[COPY16]](s32)
- ; GCN: $vgpr9 = COPY [[COPY17]](s32)
- ; GCN: $vgpr10 = COPY [[COPY18]](s32)
- ; GCN: $vgpr11 = COPY [[COPY19]](s32)
- ; GCN: $vgpr12 = COPY [[COPY20]](s32)
- ; GCN: $vgpr13 = COPY [[COPY21]](s32)
- ; GCN: $vgpr14 = COPY [[COPY22]](s32)
- ; GCN: $vgpr15 = COPY [[COPY23]](s32)
- ; GCN: $vgpr16 = COPY [[COPY24]](s32)
- ; GCN: $vgpr17 = COPY [[COPY25]](s32)
- ; GCN: $vgpr18 = COPY [[COPY26]](s32)
- ; GCN: $vgpr19 = COPY [[COPY27]](s32)
- ; GCN: $vgpr20 = COPY [[COPY28]](s32)
- ; GCN: $vgpr21 = COPY [[COPY29]](s32)
- ; GCN: $vgpr22 = COPY [[COPY30]](s32)
- ; GCN: $vgpr23 = COPY [[COPY31]](s32)
- ; GCN: $vgpr24 = COPY [[COPY32]](s32)
- ; GCN: $vgpr25 = COPY [[COPY33]](s32)
- ; GCN: $vgpr26 = COPY [[COPY34]](s32)
- ; GCN: $vgpr27 = COPY [[COPY35]](s32)
- ; GCN: $vgpr28 = COPY [[COPY36]](s32)
- ; GCN: $vgpr29 = COPY [[COPY37]](s32)
- ; GCN: $vgpr30 = COPY [[COPY38]](s32)
+ ; GCN: $vgpr0 = COPY [[COPY]](s32)
+ ; GCN: $vgpr1 = COPY [[COPY1]](s32)
+ ; GCN: $vgpr2 = COPY [[COPY2]](s32)
+ ; GCN: $vgpr3 = COPY [[COPY3]](s32)
+ ; GCN: $vgpr4 = COPY [[COPY4]](s32)
+ ; GCN: $vgpr5 = COPY [[COPY5]](s32)
+ ; GCN: $vgpr6 = COPY [[COPY6]](s32)
+ ; GCN: $vgpr7 = COPY [[COPY7]](s32)
+ ; GCN: $vgpr8 = COPY [[COPY8]](s32)
+ ; GCN: $vgpr9 = COPY [[COPY9]](s32)
+ ; GCN: $vgpr10 = COPY [[COPY10]](s32)
+ ; GCN: $vgpr11 = COPY [[COPY11]](s32)
+ ; GCN: $vgpr12 = COPY [[COPY12]](s32)
+ ; GCN: $vgpr13 = COPY [[COPY13]](s32)
+ ; GCN: $vgpr14 = COPY [[COPY14]](s32)
+ ; GCN: $vgpr15 = COPY [[COPY15]](s32)
+ ; GCN: $vgpr16 = COPY [[COPY16]](s32)
+ ; GCN: $vgpr17 = COPY [[COPY17]](s32)
+ ; GCN: $vgpr18 = COPY [[COPY18]](s32)
+ ; GCN: $vgpr19 = COPY [[COPY19]](s32)
+ ; GCN: $vgpr20 = COPY [[COPY20]](s32)
+ ; GCN: $vgpr21 = COPY [[COPY21]](s32)
+ ; GCN: $vgpr22 = COPY [[COPY22]](s32)
+ ; GCN: $vgpr23 = COPY [[COPY23]](s32)
+ ; GCN: $vgpr24 = COPY [[COPY24]](s32)
+ ; GCN: $vgpr25 = COPY [[COPY25]](s32)
+ ; GCN: $vgpr26 = COPY [[COPY26]](s32)
+ ; GCN: $vgpr27 = COPY [[COPY27]](s32)
+ ; GCN: $vgpr28 = COPY [[COPY28]](s32)
+ ; GCN: $vgpr29 = COPY [[COPY29]](s32)
+ ; GCN: $vgpr30 = COPY [[COPY30]](s32)
; GCN: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
; GCN: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
; GCN: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
; GCN: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
; GCN: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; GCN: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
- ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>)
- ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4)
- ; GCN: $sgpr6_sgpr7 = COPY [[COPY41]](p4)
- ; GCN: $sgpr8_sgpr9 = COPY [[COPY42]](p4)
- ; GCN: $sgpr10_sgpr11 = COPY [[COPY43]](s64)
- ; GCN: $sgpr12 = COPY [[COPY44]](s32)
- ; GCN: $sgpr13 = COPY [[COPY45]](s32)
- ; GCN: $sgpr14 = COPY [[COPY46]](s32)
- ; GCN: $vgpr31 = COPY [[COPY47]](s32)
- ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
+ ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
entry:
%alloca = alloca [16 x i32], align 4, addrspace(5)
%gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
@@ -1022,46 +707,38 @@ entry:
define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area(i32 %a, i32 %b, [36 x i32] %c) #1 {
; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area
; GCN: bb.1.entry:
- ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
- ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18
- ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19
- ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20
- ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21
- ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22
- ; GCN: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23
- ; GCN: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24
- ; GCN: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25
- ; GCN: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26
- ; GCN: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27
- ; GCN: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28
- ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29
- ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+ ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9
; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.9, align 16, addrspace 5)
; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
@@ -1076,7 +753,7 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
; GCN: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5)
; GCN: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
; GCN: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5)
- ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; GCN: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
@@ -1084,16 +761,8 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX7]], [[C2]](s32)
; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
- ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]]
- ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; GCN: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; GCN: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]]
- ; GCN: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]]
- ; GCN: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; GCN: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; GCN: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN: $vgpr0 = COPY [[COPY8]](s32)
- ; GCN: $vgpr1 = COPY [[COPY9]](s32)
+ ; GCN: $vgpr0 = COPY [[COPY]](s32)
+ ; GCN: $vgpr1 = COPY [[COPY1]](s32)
; GCN: $vgpr2 = COPY [[C1]](s32)
; GCN: $vgpr3 = COPY [[C1]](s32)
; GCN: $vgpr4 = COPY [[C1]](s32)
@@ -1129,17 +798,9 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
; GCN: G_STORE [[C1]](s32), [[FRAME_INDEX9]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
; GCN: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; GCN: G_STORE [[C1]](s32), [[FRAME_INDEX10]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
- ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>)
- ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4)
- ; GCN: $sgpr6_sgpr7 = COPY [[COPY41]](p4)
- ; GCN: $sgpr8_sgpr9 = COPY [[COPY42]](p4)
- ; GCN: $sgpr10_sgpr11 = COPY [[COPY43]](s64)
- ; GCN: $sgpr12 = COPY [[COPY44]](s32)
- ; GCN: $sgpr13 = COPY [[COPY45]](s32)
- ; GCN: $sgpr14 = COPY [[COPY46]](s32)
- ; GCN: $vgpr31 = COPY [[COPY47]](s32)
- ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
+ ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
entry:
%alloca = alloca [16 x i32], align 4, addrspace(5)
%gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
diff --git a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
new file mode 100644
index 0000000000000..72a6522ab14a1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
@@ -0,0 +1,562 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-fixed-function-abi=0 < %s | FileCheck -check-prefix=VARABI %s
+; RUN: llc -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-fixed-function-abi=1 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-SDAG %s
+; RUN: llc -global-isel -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-fixed-function-abi=1 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-GISEL %s
+
+; Test with gfx803 so that
+; addrspacecast/llvm.amdgcn.is.shared/llvm.amdgcn.is.private require
+; the queue ptr. Tests with code object v3 to test
+; llvm.trap/llvm.debugtrap that require the queue ptr.
+
+
+declare hidden void @requires_all_inputs()
+
+; This function incorrectly is marked with the hints that the callee
+; does not require the implicit arguments to the function. Make sure
+; we do not crash.
+define void @parent_func_missing_inputs() #0 {
+; VARABI-LABEL: parent_func_missing_inputs:
+; VARABI: ; %bb.0:
+; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VARABI-NEXT: s_or_saveexec_b64 s[4:5], -1
+; VARABI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
+; VARABI-NEXT: s_mov_b64 exec, s[4:5]
+; VARABI-NEXT: v_writelane_b32 v40, s33, 2
+; VARABI-NEXT: v_writelane_b32 v40, s30, 0
+; VARABI-NEXT: s_mov_b32 s33, s32
+; VARABI-NEXT: s_addk_i32 s32, 0x400
+; VARABI-NEXT: s_getpc_b64 s[4:5]
+; VARABI-NEXT: s_add_u32 s4, s4, requires_all_inputs at rel32@lo+4
+; VARABI-NEXT: s_addc_u32 s5, s5, requires_all_inputs at rel32@hi+12
+; VARABI-NEXT: v_writelane_b32 v40, s31, 1
+; VARABI-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; VARABI-NEXT: v_readlane_b32 s4, v40, 0
+; VARABI-NEXT: v_readlane_b32 s5, v40, 1
+; VARABI-NEXT: s_addk_i32 s32, 0xfc00
+; VARABI-NEXT: v_readlane_b32 s33, v40, 2
+; VARABI-NEXT: s_or_saveexec_b64 s[6:7], -1
+; VARABI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
+; VARABI-NEXT: s_mov_b64 exec, s[6:7]
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: s_setpc_b64 s[4:5]
+;
+; FIXEDABI-LABEL: parent_func_missing_inputs:
+; FIXEDABI: ; %bb.0:
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-NEXT: s_or_saveexec_b64 s[16:17], -1
+; FIXEDABI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
+; FIXEDABI-NEXT: s_mov_b64 exec, s[16:17]
+; FIXEDABI-NEXT: v_writelane_b32 v40, s33, 2
+; FIXEDABI-NEXT: v_writelane_b32 v40, s30, 0
+; FIXEDABI-NEXT: s_mov_b32 s33, s32
+; FIXEDABI-NEXT: s_addk_i32 s32, 0x400
+; FIXEDABI-NEXT: s_getpc_b64 s[16:17]
+; FIXEDABI-NEXT: s_add_u32 s16, s16, requires_all_inputs at rel32@lo+4
+; FIXEDABI-NEXT: s_addc_u32 s17, s17, requires_all_inputs at rel32@hi+12
+; FIXEDABI-NEXT: v_writelane_b32 v40, s31, 1
+; FIXEDABI-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; FIXEDABI-NEXT: v_readlane_b32 s4, v40, 0
+; FIXEDABI-NEXT: v_readlane_b32 s5, v40, 1
+; FIXEDABI-NEXT: s_addk_i32 s32, 0xfc00
+; FIXEDABI-NEXT: v_readlane_b32 s33, v40, 2
+; FIXEDABI-NEXT: s_or_saveexec_b64 s[6:7], -1
+; FIXEDABI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
+; FIXEDABI-NEXT: s_mov_b64 exec, s[6:7]
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT: s_setpc_b64 s[4:5]
+ call void @requires_all_inputs()
+ ret void
+}
+
+define amdgpu_kernel void @parent_kernel_missing_inputs() #0 {
+; VARABI-LABEL: parent_kernel_missing_inputs:
+; VARABI: ; %bb.0:
+; VARABI-NEXT: s_add_i32 s4, s4, s9
+; VARABI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8
+; VARABI-NEXT: s_add_u32 s0, s0, s9
+; VARABI-NEXT: s_addc_u32 s1, s1, 0
+; VARABI-NEXT: s_mov_b32 flat_scratch_lo, s5
+; VARABI-NEXT: s_getpc_b64 s[4:5]
+; VARABI-NEXT: s_add_u32 s4, s4, requires_all_inputs at rel32@lo+4
+; VARABI-NEXT: s_addc_u32 s5, s5, requires_all_inputs at rel32@hi+12
+; VARABI-NEXT: s_mov_b32 s32, 0
+; VARABI-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; VARABI-NEXT: s_endpgm
+;
+; FIXEDABI-SDAG-LABEL: parent_kernel_missing_inputs:
+; FIXEDABI-SDAG: ; %bb.0:
+; FIXEDABI-SDAG-NEXT: s_add_i32 s10, s10, s15
+; FIXEDABI-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
+; FIXEDABI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; FIXEDABI-SDAG-NEXT: s_add_u32 s0, s0, s15
+; FIXEDABI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2
+; FIXEDABI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
+; FIXEDABI-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s11
+; FIXEDABI-SDAG-NEXT: s_addc_u32 s1, s1, 0
+; FIXEDABI-SDAG-NEXT: s_mov_b64 s[10:11], s[8:9]
+; FIXEDABI-SDAG-NEXT: v_or_b32_e32 v31, v0, v2
+; FIXEDABI-SDAG-NEXT: s_mov_b64 s[8:9], 0
+; FIXEDABI-SDAG-NEXT: s_getpc_b64 s[16:17]
+; FIXEDABI-SDAG-NEXT: s_add_u32 s16, s16, requires_all_inputs at rel32@lo+4
+; FIXEDABI-SDAG-NEXT: s_addc_u32 s17, s17, requires_all_inputs at rel32@hi+12
+; FIXEDABI-SDAG-NEXT: s_mov_b32 s32, 0
+; FIXEDABI-SDAG-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; FIXEDABI-SDAG-NEXT: s_endpgm
+;
+; FIXEDABI-GISEL-LABEL: parent_kernel_missing_inputs:
+; FIXEDABI-GISEL: ; %bb.0:
+; FIXEDABI-GISEL-NEXT: s_add_i32 s10, s10, s15
+; FIXEDABI-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
+; FIXEDABI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; FIXEDABI-GISEL-NEXT: s_add_u32 s0, s0, s15
+; FIXEDABI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; FIXEDABI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2
+; FIXEDABI-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s11
+; FIXEDABI-GISEL-NEXT: s_addc_u32 s1, s1, 0
+; FIXEDABI-GISEL-NEXT: s_mov_b64 s[10:11], s[8:9]
+; FIXEDABI-GISEL-NEXT: v_or_b32_e32 v31, v0, v1
+; FIXEDABI-GISEL-NEXT: s_mov_b64 s[8:9], 0
+; FIXEDABI-GISEL-NEXT: s_getpc_b64 s[16:17]
+; FIXEDABI-GISEL-NEXT: s_add_u32 s16, s16, requires_all_inputs at rel32@lo+4
+; FIXEDABI-GISEL-NEXT: s_addc_u32 s17, s17, requires_all_inputs at rel32@hi+12
+; FIXEDABI-GISEL-NEXT: s_mov_b32 s32, 0
+; FIXEDABI-GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; FIXEDABI-GISEL-NEXT: s_endpgm
+ call void @requires_all_inputs()
+ ret void
+}
+
+; Function is marked with amdgpu-no-workitem-id-* but uses them anyway
+define void @marked_func_use_workitem_id(i32 addrspace(1)* %ptr) #0 {
+; VARABI-LABEL: marked_func_use_workitem_id:
+; VARABI: ; %bb.0:
+; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VARABI-NEXT: v_and_b32_e32 v3, 0x3ff, v2
+; VARABI-NEXT: flat_store_dword v[0:1], v3
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: v_bfe_u32 v3, v2, 10, 10
+; VARABI-NEXT: v_bfe_u32 v2, v2, 20, 10
+; VARABI-NEXT: flat_store_dword v[0:1], v3
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: flat_store_dword v[0:1], v2
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: s_setpc_b64 s[30:31]
+;
+; FIXEDABI-SDAG-LABEL: marked_func_use_workitem_id:
+; FIXEDABI-SDAG: ; %bb.0:
+; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v31
+; FIXEDABI-SDAG-NEXT: flat_store_dword v[0:1], v2
+; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-SDAG-NEXT: v_bfe_u32 v2, v31, 10, 10
+; FIXEDABI-SDAG-NEXT: flat_store_dword v[0:1], v2
+; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-SDAG-NEXT: v_bfe_u32 v2, v31, 20, 10
+; FIXEDABI-SDAG-NEXT: flat_store_dword v[0:1], v2
+; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; FIXEDABI-GISEL-LABEL: marked_func_use_workitem_id:
+; FIXEDABI-GISEL: ; %bb.0:
+; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT: v_and_b32_e32 v2, 0x3ff, v31
+; FIXEDABI-GISEL-NEXT: v_bfe_u32 v3, v31, 10, 10
+; FIXEDABI-GISEL-NEXT: v_bfe_u32 v4, v31, 20, 10
+; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v2
+; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v3
+; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v4
+; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ %id.y = call i32 @llvm.amdgcn.workitem.id.y()
+ %id.z = call i32 @llvm.amdgcn.workitem.id.z()
+ store volatile i32 %id.x, i32 addrspace(1)* %ptr
+ store volatile i32 %id.y, i32 addrspace(1)* %ptr
+ store volatile i32 %id.z, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; Function is marked with amdgpu-no-workitem-id-* but uses them anyway
+define amdgpu_kernel void @marked_kernel_use_workitem_id(i32 addrspace(1)* %ptr) #0 {
+; VARABI-LABEL: marked_kernel_use_workitem_id:
+; VARABI: ; %bb.0:
+; VARABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VARABI-NEXT: s_waitcnt lgkmcnt(0)
+; VARABI-NEXT: v_mov_b32_e32 v4, s1
+; VARABI-NEXT: v_mov_b32_e32 v3, s0
+; VARABI-NEXT: flat_store_dword v[3:4], v0
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: flat_store_dword v[3:4], v1
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: flat_store_dword v[3:4], v2
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: s_endpgm
+;
+; FIXEDABI-LABEL: marked_kernel_use_workitem_id:
+; FIXEDABI: ; %bb.0:
+; FIXEDABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0)
+; FIXEDABI-NEXT: v_mov_b32_e32 v4, s1
+; FIXEDABI-NEXT: v_mov_b32_e32 v3, s0
+; FIXEDABI-NEXT: flat_store_dword v[3:4], v0
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT: flat_store_dword v[3:4], v1
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT: flat_store_dword v[3:4], v2
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT: s_endpgm
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ %id.y = call i32 @llvm.amdgcn.workitem.id.y()
+ %id.z = call i32 @llvm.amdgcn.workitem.id.z()
+ store volatile i32 %id.x, i32 addrspace(1)* %ptr
+ store volatile i32 %id.y, i32 addrspace(1)* %ptr
+ store volatile i32 %id.z, i32 addrspace(1)* %ptr
+ ret void
+}
+
+define void @marked_func_use_workgroup_id(i32 addrspace(1)* %ptr) #0 {
+; VARABI-LABEL: marked_func_use_workgroup_id:
+; VARABI: ; %bb.0:
+; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VARABI-NEXT: v_mov_b32_e32 v2, s4
+; VARABI-NEXT: flat_store_dword v[0:1], v2
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: v_mov_b32_e32 v2, s5
+; VARABI-NEXT: flat_store_dword v[0:1], v2
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: v_mov_b32_e32 v2, s6
+; VARABI-NEXT: flat_store_dword v[0:1], v2
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: s_setpc_b64 s[30:31]
+;
+; FIXEDABI-LABEL: marked_func_use_workgroup_id:
+; FIXEDABI: ; %bb.0:
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-NEXT: v_mov_b32_e32 v2, s12
+; FIXEDABI-NEXT: flat_store_dword v[0:1], v2
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT: v_mov_b32_e32 v2, s13
+; FIXEDABI-NEXT: flat_store_dword v[0:1], v2
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT: v_mov_b32_e32 v2, s14
+; FIXEDABI-NEXT: flat_store_dword v[0:1], v2
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT: s_setpc_b64 s[30:31]
+ %id.x = call i32 @llvm.amdgcn.workgroup.id.x()
+ %id.y = call i32 @llvm.amdgcn.workgroup.id.y()
+ %id.z = call i32 @llvm.amdgcn.workgroup.id.z()
+ store volatile i32 %id.x, i32 addrspace(1)* %ptr
+ store volatile i32 %id.y, i32 addrspace(1)* %ptr
+ store volatile i32 %id.z, i32 addrspace(1)* %ptr
+ ret void
+}
+
+define amdgpu_kernel void @marked_kernel_use_workgroup_id(i32 addrspace(1)* %ptr) #0 {
+; VARABI-LABEL: marked_kernel_use_workgroup_id:
+; VARABI: ; %bb.0:
+; VARABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VARABI-NEXT: v_mov_b32_e32 v2, s6
+; VARABI-NEXT: s_waitcnt lgkmcnt(0)
+; VARABI-NEXT: v_mov_b32_e32 v0, s0
+; VARABI-NEXT: v_mov_b32_e32 v1, s1
+; VARABI-NEXT: flat_store_dword v[0:1], v2
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: v_mov_b32_e32 v2, s7
+; VARABI-NEXT: flat_store_dword v[0:1], v2
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: v_mov_b32_e32 v2, s8
+; VARABI-NEXT: flat_store_dword v[0:1], v2
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: s_endpgm
+;
+; FIXEDABI-LABEL: marked_kernel_use_workgroup_id:
+; FIXEDABI: ; %bb.0:
+; FIXEDABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; FIXEDABI-NEXT: v_mov_b32_e32 v2, s6
+; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0)
+; FIXEDABI-NEXT: v_mov_b32_e32 v0, s0
+; FIXEDABI-NEXT: v_mov_b32_e32 v1, s1
+; FIXEDABI-NEXT: flat_store_dword v[0:1], v2
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT: v_mov_b32_e32 v2, s7
+; FIXEDABI-NEXT: flat_store_dword v[0:1], v2
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT: v_mov_b32_e32 v2, s8
+; FIXEDABI-NEXT: flat_store_dword v[0:1], v2
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT: s_endpgm
+ %id.x = call i32 @llvm.amdgcn.workgroup.id.x()
+ %id.y = call i32 @llvm.amdgcn.workgroup.id.y()
+ %id.z = call i32 @llvm.amdgcn.workgroup.id.z()
+ store volatile i32 %id.x, i32 addrspace(1)* %ptr
+ store volatile i32 %id.y, i32 addrspace(1)* %ptr
+ store volatile i32 %id.z, i32 addrspace(1)* %ptr
+ ret void
+}
+
+define void @marked_func_use_other_sgpr(i64 addrspace(1)* %ptr) #0 {
+; VARABI-LABEL: marked_func_use_other_sgpr:
+; VARABI: ; %bb.0:
+; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: s_setpc_b64 s[30:31]
+;
+; FIXEDABI-LABEL: marked_func_use_other_sgpr:
+; FIXEDABI: ; %bb.0:
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-NEXT: v_mov_b32_e32 v2, s6
+; FIXEDABI-NEXT: v_mov_b32_e32 v3, s7
+; FIXEDABI-NEXT: flat_load_ubyte v2, v[2:3] glc
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT: v_mov_b32_e32 v2, s8
+; FIXEDABI-NEXT: v_mov_b32_e32 v3, s9
+; FIXEDABI-NEXT: flat_load_ubyte v2, v[2:3] glc
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT: v_mov_b32_e32 v2, s4
+; FIXEDABI-NEXT: v_mov_b32_e32 v3, s5
+; FIXEDABI-NEXT: flat_load_ubyte v2, v[2:3] glc
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT: v_mov_b32_e32 v2, s10
+; FIXEDABI-NEXT: v_mov_b32_e32 v3, s11
+; FIXEDABI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT: s_setpc_b64 s[30:31]
+ %queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
+ %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+ %dispatch.id = call i64 @llvm.amdgcn.dispatch.id()
+ %queue.load = load volatile i8, i8 addrspace(4)* %queue.ptr
+ %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr
+ %dispatch.load = load volatile i8, i8 addrspace(4)* %dispatch.ptr
+ store volatile i64 %dispatch.id, i64 addrspace(1)* %ptr
+ ret void
+}
+
+define amdgpu_kernel void @marked_kernel_use_other_sgpr(i64 addrspace(1)* %ptr) #0 {
+; VARABI-LABEL: marked_kernel_use_other_sgpr:
+; VARABI: ; %bb.0:
+; VARABI-NEXT: s_add_u32 s0, s4, 8
+; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc
+; VARABI-NEXT: s_addc_u32 s1, s5, 0
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: v_mov_b32_e32 v0, s0
+; VARABI-NEXT: v_mov_b32_e32 v1, s1
+; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc
+; VARABI-NEXT: s_endpgm
+;
+; FIXEDABI-LABEL: marked_kernel_use_other_sgpr:
+; FIXEDABI: ; %bb.0:
+; FIXEDABI-NEXT: s_add_u32 s0, s4, 8
+; FIXEDABI-NEXT: flat_load_ubyte v0, v[0:1] glc
+; FIXEDABI-NEXT: s_addc_u32 s1, s5, 0
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT: v_mov_b32_e32 v0, s0
+; FIXEDABI-NEXT: v_mov_b32_e32 v1, s1
+; FIXEDABI-NEXT: flat_load_ubyte v0, v[0:1] glc
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT: flat_load_ubyte v0, v[0:1] glc
+; FIXEDABI-NEXT: s_endpgm
+ %queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
+ %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+ %dispatch.id = call i64 @llvm.amdgcn.dispatch.id()
+ %queue.load = load volatile i8, i8 addrspace(4)* %queue.ptr
+ %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr
+ %dispatch.load = load volatile i8, i8 addrspace(4)* %dispatch.ptr
+ store volatile i64 %dispatch.id, i64 addrspace(1)* %ptr
+ ret void
+}
+
+define amdgpu_kernel void @marked_kernel_nokernargs_implicitarg_ptr() #0 {
+; VARABI-LABEL: marked_kernel_nokernargs_implicitarg_ptr:
+; VARABI: ; %bb.0:
+; VARABI-NEXT: v_mov_b32_e32 v0, 0
+; VARABI-NEXT: v_mov_b32_e32 v1, 0
+; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc
+; VARABI-NEXT: s_endpgm
+;
+; FIXEDABI-LABEL: marked_kernel_nokernargs_implicitarg_ptr:
+; FIXEDABI: ; %bb.0:
+; FIXEDABI-NEXT: v_mov_b32_e32 v0, 0
+; FIXEDABI-NEXT: v_mov_b32_e32 v1, 0
+; FIXEDABI-NEXT: flat_load_ubyte v0, v[0:1] glc
+; FIXEDABI-NEXT: s_endpgm
+ %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr
+ ret void
+}
+
+; On gfx8, the queue ptr is required for this addrspacecast.
+define void @addrspacecast_requires_queue_ptr(i32 addrspace(5)* %ptr.private, i32 addrspace(3)* %ptr.local) #0 {
+; VARABI-LABEL: addrspacecast_requires_queue_ptr:
+; VARABI: ; %bb.0:
+; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VARABI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
+; VARABI-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc
+; VARABI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1
+; VARABI-NEXT: v_mov_b32_e32 v3, 0
+; VARABI-NEXT: v_mov_b32_e32 v4, 1
+; VARABI-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; VARABI-NEXT: flat_store_dword v[2:3], v4
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: v_mov_b32_e32 v1, v3
+; VARABI-NEXT: v_mov_b32_e32 v2, 2
+; VARABI-NEXT: flat_store_dword v[0:1], v2
+; VARABI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; VARABI-NEXT: s_setpc_b64 s[30:31]
+;
+; FIXEDABI-SDAG-LABEL: addrspacecast_requires_queue_ptr:
+; FIXEDABI-SDAG: ; %bb.0:
+; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT: s_load_dword s4, s[6:7], 0x40
+; FIXEDABI-SDAG-NEXT: s_load_dword s5, s[6:7], 0x44
+; FIXEDABI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
+; FIXEDABI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v2, s5
+; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc
+; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc
+; FIXEDABI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1
+; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v0, s4
+; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v0, vcc
+; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v0, 1
+; FIXEDABI-SDAG-NEXT: flat_store_dword v[2:3], v0
+; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc
+; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v0, 2
+; FIXEDABI-SDAG-NEXT: flat_store_dword v[4:5], v0
+; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; FIXEDABI-GISEL-LABEL: addrspacecast_requires_queue_ptr:
+; FIXEDABI-GISEL: ; %bb.0:
+; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT: s_load_dword s4, s[6:7], 0x44
+; FIXEDABI-GISEL-NEXT: s_load_dword s5, s[6:7], 0x40
+; FIXEDABI-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
+; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc
+; FIXEDABI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v3, s4
+; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
+; FIXEDABI-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1
+; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v4, s5
+; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc
+; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc
+; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v4, 1
+; FIXEDABI-GISEL-NEXT: flat_store_dword v[2:3], v4
+; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v2, 2
+; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v2
+; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %flat.private = addrspacecast i32 addrspace(5)* %ptr.private to i32*
+ %flat.local = addrspacecast i32 addrspace(3)* %ptr.local to i32*
+ store volatile i32 1, i32* %flat.private
+ store volatile i32 2, i32* %flat.local
+ ret void
+}
+
+define void @is_shared_requires_queue_ptr(i8* %ptr) #0 {
+; VARABI-LABEL: is_shared_requires_queue_ptr:
+; VARABI: ; %bb.0:
+; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VARABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; VARABI-NEXT: flat_store_dword v[0:1], v0
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: s_setpc_b64 s[30:31]
+;
+; FIXEDABI-LABEL: is_shared_requires_queue_ptr:
+; FIXEDABI: ; %bb.0:
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-NEXT: s_load_dword s4, s[6:7], 0x40
+; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0)
+; FIXEDABI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1
+; FIXEDABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; FIXEDABI-NEXT: flat_store_dword v[0:1], v0
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT: s_setpc_b64 s[30:31]
+ %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
+ %zext = zext i1 %is.shared to i32
+ store volatile i32 %zext, i32 addrspace(1)* undef
+ ret void
+}
+
+define void @is_private_requires_queue_ptr(i8* %ptr) #0 {
+; VARABI-LABEL: is_private_requires_queue_ptr:
+; VARABI: ; %bb.0:
+; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VARABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; VARABI-NEXT: flat_store_dword v[0:1], v0
+; VARABI-NEXT: s_waitcnt vmcnt(0)
+; VARABI-NEXT: s_setpc_b64 s[30:31]
+;
+; FIXEDABI-LABEL: is_private_requires_queue_ptr:
+; FIXEDABI: ; %bb.0:
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-NEXT: s_load_dword s4, s[6:7], 0x44
+; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0)
+; FIXEDABI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1
+; FIXEDABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; FIXEDABI-NEXT: flat_store_dword v[0:1], v0
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT: s_setpc_b64 s[30:31]
+ %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr)
+ %zext = zext i1 %is.private to i32
+ store volatile i32 %zext, i32 addrspace(1)* undef
+ ret void
+}
+
+define void @trap_requires_queue() #0 {
+; VARABI-LABEL: trap_requires_queue:
+; VARABI: ; %bb.0:
+; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VARABI-NEXT: s_mov_b64 s[0:1], 0
+; VARABI-NEXT: s_trap 2
+;
+; FIXEDABI-LABEL: trap_requires_queue:
+; FIXEDABI: ; %bb.0:
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-NEXT: s_mov_b64 s[0:1], s[6:7]
+; FIXEDABI-NEXT: s_trap 2
+ call void @llvm.trap()
+ unreachable
+}
+
+define void @debugtrap_requires_queue() #0 {
+; VARABI-LABEL: debugtrap_requires_queue:
+; VARABI: ; %bb.0:
+; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VARABI-NEXT: s_trap 3
+;
+; FIXEDABI-LABEL: debugtrap_requires_queue:
+; FIXEDABI: ; %bb.0:
+; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-NEXT: s_trap 3
+ call void @llvm.debugtrap()
+ unreachable
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x()
+declare i32 @llvm.amdgcn.workitem.id.y()
+declare i32 @llvm.amdgcn.workitem.id.z()
+declare i32 @llvm.amdgcn.workgroup.id.x()
+declare i32 @llvm.amdgcn.workgroup.id.y()
+declare i32 @llvm.amdgcn.workgroup.id.z()
+declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
+declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+declare i64 @llvm.amdgcn.dispatch.id()
+declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+declare i1 @llvm.amdgcn.is.shared(i8*)
+declare i1 @llvm.amdgcn.is.private(i8*)
+declare void @llvm.trap()
+declare void @llvm.debugtrap()
+
+attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" }
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
index e8021176d8fbb..968dbf20eb929 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
@@ -144,17 +144,21 @@ define hidden void @func_indirect_use_workgroup_id_x() #1 {
ret void
}
+; Argument is in right place already. We are free to clobber other
+; SGPR arguments
; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y:
-; GCN-NOT: s4
-; GCN: v_readlane_b32 s4, v40, 0
+; GCN-NOT: s12
+; GCN-NOT: s13
+; GCN-NOT: s14
define hidden void @func_indirect_use_workgroup_id_y() #1 {
call void @use_workgroup_id_y()
ret void
}
; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z:
-; GCN-NOT: s4
-; GCN: v_readlane_b32 s4, v40, 0
+; GCN-NOT: s12
+; GCN-NOT: s13
+; GCN-NOT: s14
define hidden void @func_indirect_use_workgroup_id_z() #1 {
call void @use_workgroup_id_z()
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
index 6a4ab5a30e180..a373442364055 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
@@ -131,10 +131,11 @@ define void @use_workitem_id_yz() #1 {
; VARABI: enable_vgpr_workitem_id = 0
; FIXEDABI: enable_vgpr_workitem_id = 2
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v31
+; FIXEDABI: v_mov_b32_e32 v31, v0{{$}}
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v31
; VARABI-NOT: v31
; GCN: s_swappc_b64
@@ -148,20 +149,18 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
; VARABI: enable_vgpr_workitem_id = 1
; FIXEDABI: enable_vgpr_workitem_id = 2
-; FIXEDABI-NOT: v0
-; FIXEDABI-NOT: v1
; VARABI-NOT: v31
; VARABI: v_lshlrev_b32_e32 v0, 10, v1
-
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
-
; FIXEDABI-NOT: v0
; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
+; FIXEDABI: v_lshlrev_b32_e32 v31, 10, v1
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
+
; VARABI-NOT: v31
; GCN: s_swappc_b64
@@ -179,10 +178,11 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
; VARABI-NOT: v0
; VARABI-NOT: v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI: v_lshlrev_b32_e32 v31, 20, v2
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
@@ -198,10 +198,14 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
; VARABI-NOT: v0
; VARABI-NOT: v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
+; FIXEDABI: v_lshlrev_b32_e32 v1, 10, v1
+; FIXEDABI-NEXT: v_or_b32_e32 v31, v0, v1
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
@@ -218,10 +222,14 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
; VARABI-NOT: v2
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
+; FIXEDABI: v_lshlrev_b32_e32 v1, 20, v2
+; FIXEDABI-NEXT: v_or_b32_e32 v31, v0, v1
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
@@ -238,11 +246,15 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
; VARABI-NOT: v1
; VARABI-NOT: v2
-
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
+; FIXEDABI:v_lshlrev_b32_e32 v0, 20, v2
+; FIXEDABI-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; FIXEDABI-NEXT: v_or_b32_e32 v31, v1, v0
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 {
@@ -348,10 +360,9 @@ define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
; VARABI: v_mov_b32_e32 v1, v0
; VARABI: v_mov_b32_e32 v0, 0x22b
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
+; FIXEDABI-NOT: v0
+; FIXEDABI: v_mov_b32_e32 v31, v0
+; FIXEDABI: v_mov_b32_e32 v0, 0x22b
; GCN: s_swappc_b64
define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
@@ -371,10 +382,12 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
; VARABI-NOT: v0
; FIXEDABI: enable_vgpr_workitem_id = 2
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
+
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
+; FIXEDABI: v_lshlrev_b32_e32 v31, 10, v1
+; FIXEDABI: v_mov_b32_e32 v0, 0x22b
define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
call void @other_arg_use_workitem_id_y(i32 555)
ret void
@@ -388,11 +401,11 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
; VARABI: s_swappc_b64
; VARABI-NOT: v0
-
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
+; FIXEDABI: v_lshlrev_b32_e32 v31, 20, v2
+; FIXEDABI: v_mov_b32_e32 v0, 0x22b
define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
call void @other_arg_use_workitem_id_z(i32 555)
ret void
@@ -462,13 +475,13 @@ define void @too_many_args_use_workitem_id_x(
; FIXEDABI: enable_vgpr_workitem_id = 2
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
; FIXEDABI-DAG: s_mov_b32 s32, 0
; FIXEDABI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x140{{$}}
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI-DAG: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI-DAG: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
-; FIXEDABI: buffer_store_dword [[K]], off, s[0:3], s32{{$}}
+; FIXEDABI-DAG: buffer_store_dword [[K]], off, s[0:3], s32{{$}}
+; FIXEDABI-DAG: v_mov_b32_e32 v31, v0
; FIXEDABI: s_swappc_b64
define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
@@ -622,6 +635,10 @@ define void @too_many_args_use_workitem_id_x_byval(
; VARABI: s_swappc_b64
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
+; FIXEDABI: v_mov_b32_e32 v31, v0
; FIXEDABI: v_mov_b32_e32 [[K0:v[0-9]+]], 0x3e7
; FIXEDABI: buffer_store_dword [[K0]], off, s[0:3], 0 offset:4{{$}}
; FIXEDABI: s_movk_i32 s32, 0x400{{$}}
@@ -632,11 +649,6 @@ define void @too_many_args_use_workitem_id_x_byval(
; FIXME: Why this reload?
; FIXEDABI: buffer_load_dword [[RELOAD:v[0-9]+]], off, s[0:3], 0 offset:4{{$}}
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI-DAG: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
-
; FIXEDABI-NOT: s32
; FIXEDABI: buffer_store_dword [[RELOAD]], off, s[0:3], s32 offset:4
; FIXEDABI: s_swappc_b64
@@ -885,9 +897,53 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz()
ret void
}
+declare hidden void @extern_hint(i32) #2
+
+; Workitem IDs should not be passed due to the attribute
+; GCN-LABEL: {{^}}kern_call_no_workitem_id_hints:
+; GCN-NOT: v30
+; GCN-NOT: v31
+; GCN: v_mov_b32_e32 v0, 9
+; GCN-NOT: v0
+; GCN-NOT: v31
+; GCN: s_swappc_b64
+define amdgpu_kernel void @kern_call_no_workitem_id_hints() #2 {
+ call void @extern_hint(i32 9)
+ ret void
+}
+
+; GCN-LABEL: {{^}}func_call_no_workitem_id_hints:
+; GCN-NOT: v30
+; GCN-NOT: v31
+; GCN: v_mov_b32_e32 v0, 9
+; GCN-NOT: v0
+; GCN-NOT: v31
+; GCN: s_swappc_b64
+define void @func_call_no_workitem_id_hints() #2 {
+ call void @extern_hint(i32 9)
+ ret void
+}
+
+declare hidden void @extern_nohint(i32)
+
+; Check that the hint is respected on the callsite, not the function
+; declaration
+; GCN-LABEL: {{^}}kern_callsite_workitem_id_hints:
+; GCN-NOT: v30
+; GCN-NOT: v31
+; GCN: v_mov_b32_e32 v0, 9
+; GCN-NOT: v0
+; GCN-NOT: v31
+; GCN: s_swappc_b64
+define amdgpu_kernel void @kern_callsite_workitem_id_hints() #2 {
+ call void @extern_nohint(i32 9) #2
+ ret void
+}
+
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
declare i32 @llvm.amdgcn.workitem.id.z() #0
attributes #0 = { nounwind readnone speculatable }
attributes #1 = { nounwind noinline }
+attributes #2 = { nounwind "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
More information about the llvm-commits
mailing list