[llvm] 0197cd0 - AMDGPU: Optimize amdgpu-no-* attributes

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 9 15:24:42 PDT 2021


Author: Matt Arsenault
Date: 2021-09-09T18:24:28-04:00
New Revision: 0197cd0bd4a4ea3d05ae55f05a6e70202cd6a19b

URL: https://github.com/llvm/llvm-project/commit/0197cd0bd4a4ea3d05ae55f05a6e70202cd6a19b
DIFF: https://github.com/llvm/llvm-project/commit/0197cd0bd4a4ea3d05ae55f05a6e70202cd6a19b.diff

LOG: AMDGPU: Optimize amdgpu-no-* attributes

This allows clobbering a few extra registers in the fixed ABI, and
avoids some workitem ID packing instructions.

Added: 
    llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
    llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
    llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index b9faad453aba7..961501a113c3a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -753,6 +753,11 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
                                            CallLoweringInfo &Info) const {
   MachineFunction &MF = MIRBuilder.getMF();
 
+  // If there's no call site, this doesn't correspond to a call from the IR and
+  // doesn't need implicit inputs.
+  if (!Info.CB)
+    return true;
+
   const AMDGPUFunctionArgInfo *CalleeArgInfo
     = &AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
 
@@ -773,17 +778,32 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
     AMDGPUFunctionArgInfo::WORKGROUP_ID_Z
   };
 
+  static constexpr StringLiteral ImplicitAttrNames[] = {
+    "amdgpu-no-dispatch-ptr",
+    "amdgpu-no-queue-ptr",
+    "amdgpu-no-implicitarg-ptr",
+    "amdgpu-no-dispatch-id",
+    "amdgpu-no-workgroup-id-x",
+    "amdgpu-no-workgroup-id-y",
+    "amdgpu-no-workgroup-id-z"
+  };
+
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const AMDGPULegalizerInfo *LI
     = static_cast<const AMDGPULegalizerInfo*>(ST.getLegalizerInfo());
 
+  unsigned I = 0;
   for (auto InputID : InputRegs) {
     const ArgDescriptor *OutgoingArg;
     const TargetRegisterClass *ArgRC;
     LLT ArgTy;
 
+    // If the callee does not use the attribute value, skip copying the value.
+    if (Info.CB->hasFnAttr(ImplicitAttrNames[I++]))
+      continue;
+
     std::tie(OutgoingArg, ArgRC, ArgTy) =
         CalleeArgInfo->getPreloadedValue(InputID);
     if (!OutgoingArg)
@@ -843,16 +863,22 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
   const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);
   const LLT S32 = LLT::scalar(32);
 
+  const bool NeedWorkItemIDX = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-x");
+  const bool NeedWorkItemIDY = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-y");
+  const bool NeedWorkItemIDZ = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-z");
+
   // If incoming ids are not packed we need to pack them.
   // FIXME: Should consider known workgroup size to eliminate known 0 cases.
   Register InputReg;
-  if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX) {
+  if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
+      NeedWorkItemIDX) {
     InputReg = MRI.createGenericVirtualRegister(S32);
     LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
                        std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
   }
 
-  if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY) {
+  if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
+      NeedWorkItemIDY) {
     Register Y = MRI.createGenericVirtualRegister(S32);
     LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),
                        std::get<2>(WorkitemIDY));
@@ -861,7 +887,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
     InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y;
   }
 
-  if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ) {
+  if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
+      NeedWorkItemIDZ) {
     Register Z = MRI.createGenericVirtualRegister(S32);
     LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),
                        std::get<2>(WorkitemIDZ));
@@ -870,7 +897,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
     InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z;
   }
 
-  if (!InputReg) {
+  if (!InputReg && (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
     InputReg = MRI.createGenericVirtualRegister(S32);
 
     // Workitem ids are already packed, any of present incoming arguments will
@@ -883,7 +910,9 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
   }
 
   if (OutgoingArg->isRegister()) {
-    ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
+    if (InputReg)
+      ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
+
     if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
       report_fatal_error("failed to allocate implicit input argument");
   } else {

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 44aefe87e709b..dd50779f26143 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2899,10 +2899,16 @@ bool AMDGPULegalizerInfo::loadInputValue(
   std::tie(Arg, ArgRC, ArgTy) = MFI->getPreloadedValue(ArgType);
 
   if (!Arg) {
-    assert(ArgType == AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
-    // The intrinsic may appear when we have a 0 sized kernarg segment, in which
-    // case the pointer argument may be missing and we use null.
-    B.buildConstant(DstReg, 0);
+    if (ArgType == AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR) {
+      // The intrinsic may appear when we have a 0 sized kernarg segment, in which
+      // case the pointer argument may be missing and we use null.
+      B.buildConstant(DstReg, 0);
+      return true;
+    }
+
+    // It's undefined behavior if a function marked with the amdgpu-no-*
+    // attributes uses the corresponding intrinsic.
+    B.buildUndef(DstReg);
     return true;
   }
 

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1fa1dc0349e95..7e5a33d816198 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1820,11 +1820,16 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
 
   std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID);
   if (!Reg) {
-    // It's possible for a kernarg intrinsic call to appear in a kernel with no
-    // allocated segment, in which case we do not add the user sgpr argument, so
-    // just return null.
-    assert(PVID == AMDGPUFunctionArgInfo::PreloadedValue::KERNARG_SEGMENT_PTR);
-    return DAG.getConstant(0, SDLoc(), VT);
+    if (PVID == AMDGPUFunctionArgInfo::PreloadedValue::KERNARG_SEGMENT_PTR) {
+      // It's possible for a kernarg intrinsic call to appear in a kernel with
+      // no allocated segment, in which case we do not add the user sgpr
+      // argument, so just return null.
+      return DAG.getConstant(0, SDLoc(), VT);
+    }
+
+    // It's undefined behavior if a function marked with the amdgpu-no-*
+    // attributes uses the corresponding intrinsic.
+    return DAG.getUNDEF(VT);
   }
 
   return CreateLiveInRegister(DAG, RC, Reg->getRegister(), VT);
@@ -2042,31 +2047,33 @@ void SITargetLowering::allocateSpecialInputSGPRs(
   SIMachineFunctionInfo &Info) const {
   auto &ArgInfo = Info.getArgInfo();
 
-  // TODO: Unify handling with private memory pointers.
+  // We need to allocate these in place regardless of their use.
+  const bool IsFixed = AMDGPUTargetMachine::EnableFixedFunctionABI;
 
-  if (Info.hasDispatchPtr())
+  // TODO: Unify handling with private memory pointers.
+  if (IsFixed || Info.hasDispatchPtr())
     allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr);
 
-  if (Info.hasQueuePtr())
+  if (IsFixed || Info.hasQueuePtr())
     allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr);
 
   // Implicit arg ptr takes the place of the kernarg segment pointer. This is a
   // constant offset from the kernarg segment.
-  if (Info.hasImplicitArgPtr())
+  if (IsFixed || Info.hasImplicitArgPtr())
     allocateSGPR64Input(CCInfo, ArgInfo.ImplicitArgPtr);
 
-  if (Info.hasDispatchID())
+  if (IsFixed || Info.hasDispatchID())
     allocateSGPR64Input(CCInfo, ArgInfo.DispatchID);
 
   // flat_scratch_init is not applicable for non-kernel functions.
 
-  if (Info.hasWorkGroupIDX())
+  if (IsFixed || Info.hasWorkGroupIDX())
     allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDX);
 
-  if (Info.hasWorkGroupIDY())
+  if (IsFixed || Info.hasWorkGroupIDY())
     allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDY);
 
-  if (Info.hasWorkGroupIDZ())
+  if (IsFixed || Info.hasWorkGroupIDZ())
     allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDZ);
 }
 
@@ -2766,21 +2773,28 @@ void SITargetLowering::passSpecialInputs(
   // TODO: Unify with private memory register handling. This is complicated by
   // the fact that at least in kernels, the input argument is not necessarily
   // in the same location as the input.
-  AMDGPUFunctionArgInfo::PreloadedValue InputRegs[] = {
-    AMDGPUFunctionArgInfo::DISPATCH_PTR,
-    AMDGPUFunctionArgInfo::QUEUE_PTR,
-    AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR,
-    AMDGPUFunctionArgInfo::DISPATCH_ID,
-    AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
-    AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
-    AMDGPUFunctionArgInfo::WORKGROUP_ID_Z
+  static constexpr std::pair<AMDGPUFunctionArgInfo::PreloadedValue,
+                             StringLiteral> ImplicitAttrs[] = {
+    {AMDGPUFunctionArgInfo::DISPATCH_PTR, "amdgpu-no-dispatch-ptr"},
+    {AMDGPUFunctionArgInfo::QUEUE_PTR, "amdgpu-no-queue-ptr" },
+    {AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"},
+    {AMDGPUFunctionArgInfo::DISPATCH_ID, "amdgpu-no-dispatch-id"},
+    {AMDGPUFunctionArgInfo::WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"},
+    {AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,"amdgpu-no-workgroup-id-y"},
+    {AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"}
   };
 
-  for (auto InputID : InputRegs) {
+  for (auto Attr : ImplicitAttrs) {
     const ArgDescriptor *OutgoingArg;
     const TargetRegisterClass *ArgRC;
     LLT ArgTy;
 
+    AMDGPUFunctionArgInfo::PreloadedValue InputID = Attr.first;
+
+    // If the callee does not use the attribute value, skip copying the value.
+    if (CLI.CB->hasFnAttr(Attr.second))
+      continue;
+
     std::tie(OutgoingArg, ArgRC, ArgTy) =
         CalleeArgInfo->getPreloadedValue(InputID);
     if (!OutgoingArg)
@@ -2846,11 +2860,17 @@ void SITargetLowering::passSpecialInputs(
   SDValue InputReg;
   SDLoc SL;
 
+  const bool NeedWorkItemIDX = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-x");
+  const bool NeedWorkItemIDY = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-y");
+  const bool NeedWorkItemIDZ = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-z");
+
   // If incoming ids are not packed we need to pack them.
-  if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX)
+  if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
+      NeedWorkItemIDX)
     InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgX);
 
-  if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY) {
+  if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
+      NeedWorkItemIDY) {
     SDValue Y = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgY);
     Y = DAG.getNode(ISD::SHL, SL, MVT::i32, Y,
                     DAG.getShiftAmountConstant(10, MVT::i32, SL));
@@ -2858,7 +2878,8 @@ void SITargetLowering::passSpecialInputs(
                  DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Y) : Y;
   }
 
-  if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ) {
+  if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
+      NeedWorkItemIDZ) {
     SDValue Z = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgZ);
     Z = DAG.getNode(ISD::SHL, SL, MVT::i32, Z,
                     DAG.getShiftAmountConstant(20, MVT::i32, SL));
@@ -2866,7 +2887,7 @@ void SITargetLowering::passSpecialInputs(
                  DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Z) : Z;
   }
 
-  if (!InputReg.getNode()) {
+  if (!InputReg && (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
     // Workitem ids are already packed, any of present incoming arguments
     // will carry all required fields.
     ArgDescriptor IncomingArg = ArgDescriptor::createArg(
@@ -2877,13 +2898,17 @@ void SITargetLowering::passSpecialInputs(
   }
 
   if (OutgoingArg->isRegister()) {
-    RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
+    if (InputReg)
+      RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
+
     CCInfo.AllocateReg(OutgoingArg->getRegister());
   } else {
     unsigned SpecialArgOffset = CCInfo.AllocateStack(4, Align(4));
-    SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
-                                            SpecialArgOffset);
-    MemOpChains.push_back(ArgStore);
+    if (InputReg) {
+      SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
+                                              SpecialArgOffset);
+      MemOpChains.push_back(ArgStore);
+    }
   }
 }
 
@@ -5292,9 +5317,18 @@ SDValue SITargetLowering::lowerTrapHsaQueuePtr(
   MachineFunction &MF = DAG.getMachineFunction();
   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
   Register UserSGPR = Info->getQueuePtrUserSGPR();
-  assert(UserSGPR != AMDGPU::NoRegister);
-  SDValue QueuePtr = CreateLiveInRegister(
-    DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
+
+  SDValue QueuePtr;
+  if (UserSGPR == AMDGPU::NoRegister) {
+    // We probably are in a function incorrectly marked with
+    // amdgpu-no-queue-ptr. This is undefined. We don't want to delete the trap,
+    // so just use a null pointer.
+    QueuePtr = DAG.getConstant(0, SL, MVT::i64);
+  } else {
+    QueuePtr = CreateLiveInRegister(
+      DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
+  }
+
   SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64);
   SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01,
                                    QueuePtr, SDValue());
@@ -5371,7 +5405,11 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
   MachineFunction &MF = DAG.getMachineFunction();
   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
   Register UserSGPR = Info->getQueuePtrUserSGPR();
-  assert(UserSGPR != AMDGPU::NoRegister);
+  if (UserSGPR == AMDGPU::NoRegister) {
+    // We probably are in a function incorrectly marked with
+    // amdgpu-no-queue-ptr. This is undefined.
+    return DAG.getUNDEF(MVT::i32);
+  }
 
   SDValue QueuePtr = CreateLiveInRegister(
     DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
index c3c5b161001d9..b5e53eff2dbf1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
@@ -9,47 +9,34 @@ declare hidden void @extern()
 define amdgpu_kernel void @kernel_call_no_workitem_ids() {
   ; CHECK-LABEL: name: kernel_call_no_workitem_ids
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK:   [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK:   [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]]
   ; CHECK:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
   ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
   ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
-  ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; CHECK:   [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]]
+  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]]
+  ; CHECK:   [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>)
+  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY6]](p4)
+  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY7]](p4)
   ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY14]](s32)
-  ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY8]](s64)
+  ; CHECK:   $sgpr12 = COPY [[COPY9]](s32)
+  ; CHECK:   $sgpr13 = COPY [[COPY10]](s32)
+  ; CHECK:   $sgpr14 = COPY [[COPY11]](s32)
+  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14
   ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK:   S_ENDPGM 0
   call void @extern() "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
@@ -59,47 +46,38 @@ define amdgpu_kernel void @kernel_call_no_workitem_ids() {
 define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
   ; CHECK-LABEL: name: kernel_call_no_workgroup_ids
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
   ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK:   [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK:   [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]]
   ; CHECK:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
   ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
   ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK:   [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]]
+  ; CHECK:   [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C2]](s32)
+  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL]]
+  ; CHECK:   [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C3]](s32)
   ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; CHECK:   [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>)
+  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY6]](p4)
+  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY7]](p4)
   ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY14]](s32)
+  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY8]](s64)
   ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31
   ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK:   S_ENDPGM 0
   call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
@@ -109,47 +87,29 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
 define amdgpu_kernel void @kernel_call_no_other_sgprs() {
   ; CHECK-LABEL: name: kernel_call_no_other_sgprs
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+  ; CHECK:   liveins: $vgpr0, $vgpr1, $vgpr2
   ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
   ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
   ; CHECK:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
   ; CHECK:   [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
   ; CHECK:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
   ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK:   [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32)
-  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]]
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C2]](s32)
+  ; CHECK:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL]]
+  ; CHECK:   [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32)
+  ; CHECK:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C3]](s32)
   ; CHECK:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK:   [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; CHECK:   [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>)
   ; CHECK:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY12]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY13]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY14]](s32)
   ; CHECK:   $vgpr31 = COPY [[OR1]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31
   ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK:   S_ENDPGM 0
   call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
@@ -159,40 +119,37 @@ define amdgpu_kernel void @kernel_call_no_other_sgprs() {
 define void @func_call_no_workitem_ids() {
   ; CHECK-LABEL: name: func_call_no_workitem_ids
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; CHECK:   [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY13]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY15]](s32)
-  ; CHECK:   $vgpr31 = COPY [[COPY16]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY4]]
+  ; CHECK:   [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY3]]
+  ; CHECK:   [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY]]
+  ; CHECK:   [[COPY15:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY15]](<4 x s32>)
+  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY8]](p4)
+  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY9]](p4)
+  ; CHECK:   $sgpr8_sgpr9 = COPY [[COPY10]](p4)
+  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+  ; CHECK:   $sgpr12 = COPY [[COPY12]](s32)
+  ; CHECK:   $sgpr13 = COPY [[COPY13]](s32)
+  ; CHECK:   $sgpr14 = COPY [[COPY14]](s32)
+  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14
   ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
-  ; CHECK:   S_SETPC_B64_return [[COPY18]]
+  ; CHECK:   [[COPY16:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY7]]
+  ; CHECK:   S_SETPC_B64_return [[COPY16]]
   call void @extern() "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
   ret void
 }
@@ -200,40 +157,31 @@ define void @func_call_no_workitem_ids() {
 define void @func_call_no_workgroup_ids() {
   ; CHECK-LABEL: name: func_call_no_workgroup_ids
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; CHECK:   liveins: $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
   ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY13]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY15]](s32)
-  ; CHECK:   $vgpr31 = COPY [[COPY16]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK:   [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY4]]
+  ; CHECK:   [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY3]]
+  ; CHECK:   [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY2]]
+  ; CHECK:   [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY1]]
+  ; CHECK:   [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK:   [[COPY11:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY11]](<4 x s32>)
+  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY6]](p4)
+  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY7]](p4)
+  ; CHECK:   $sgpr8_sgpr9 = COPY [[COPY8]](p4)
+  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY9]](s64)
+  ; CHECK:   $vgpr31 = COPY [[COPY10]](s32)
+  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31
   ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
-  ; CHECK:   S_SETPC_B64_return [[COPY18]]
+  ; CHECK:   [[COPY12:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]]
+  ; CHECK:   S_SETPC_B64_return [[COPY12]]
   call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
   ret void
 }
@@ -241,40 +189,22 @@ define void @func_call_no_workgroup_ids() {
 define void @func_call_no_other_sgprs() {
   ; CHECK-LABEL: name: func_call_no_other_sgprs
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; CHECK:   liveins: $vgpr31, $sgpr8_sgpr9, $sgpr30_sgpr31
   ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; CHECK:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; CHECK:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; CHECK:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern
-  ; CHECK:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; CHECK:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; CHECK:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; CHECK:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; CHECK:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; CHECK:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK:   [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
-  ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
-  ; CHECK:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
-  ; CHECK:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
-  ; CHECK:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
-  ; CHECK:   $sgpr12 = COPY [[COPY13]](s32)
-  ; CHECK:   $sgpr13 = COPY [[COPY14]](s32)
-  ; CHECK:   $sgpr14 = COPY [[COPY15]](s32)
-  ; CHECK:   $vgpr31 = COPY [[COPY16]](s32)
-  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; CHECK:   [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY1]]
+  ; CHECK:   [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK:   [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>)
+  ; CHECK:   $sgpr8_sgpr9 = COPY [[COPY3]](p4)
+  ; CHECK:   $vgpr31 = COPY [[COPY4]](s32)
+  ; CHECK:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31
   ; CHECK:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK:   [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
-  ; CHECK:   S_SETPC_B64_return [[COPY18]]
+  ; CHECK:   [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK:   S_SETPC_B64_return [[COPY6]]
   call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
   ret void
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index 2d1e8e4c371c6..c5a1eb8b6a10b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -43,41 +43,17 @@ define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 {
 define hidden fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) #1 {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
-  ; GCN:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   $vgpr0 = COPY [[COPY8]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY9]](s32)
-  ; GCN:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY12]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY13]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY14]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY15]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY16]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY17]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY18]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY19]](s32)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
+  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b)
   ret i32 %ret
@@ -86,46 +62,22 @@ entry:
 define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, i32 %c) #1 {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_stack_object
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
   ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
   ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
   ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32)
   ; GCN:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
   ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
-  ; GCN:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   $vgpr0 = COPY [[COPY8]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY9]](s32)
-  ; GCN:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY12]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY13]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY14]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY15]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY16]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY17]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY18]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY19]](s32)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
+  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
@@ -137,46 +89,22 @@ entry:
 define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i32 %b, i32 %c) #1 {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_callee_stack_object
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
   ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
   ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
   ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32)
   ; GCN:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
   ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_stack_object
-  ; GCN:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   $vgpr0 = COPY [[COPY8]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY9]](s32)
-  ; GCN:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY12]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY13]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY14]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY15]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY16]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY17]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY18]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY19]](s32)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_stack_object, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
+  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_stack_object, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
@@ -188,41 +116,17 @@ entry:
 define fastcc void @sibling_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_unused_result
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
-  ; GCN:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   $vgpr0 = COPY [[COPY8]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY9]](s32)
-  ; GCN:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY12]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY13]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY14]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY15]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY16]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY17]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY18]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY19]](s32)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
+  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b)
   ret void
@@ -232,17 +136,8 @@ entry:
 define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 {
   ; GCN-LABEL: name: kernel_call_i32_fastcc_i32_i32_unused_result
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; GCN:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; GCN:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; GCN:   liveins: $sgpr8_sgpr9
+  ; GCN:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
   ; GCN:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
@@ -253,38 +148,12 @@ define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a,
   ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C2]](s64)
   ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
-  ; GCN:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
-  ; GCN:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-  ; GCN:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; GCN:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
-  ; GCN:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
-  ; GCN:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; GCN:   $vgpr0 = COPY [[EVEC]](s32)
   ; GCN:   $vgpr1 = COPY [[EVEC1]](s32)
-  ; GCN:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY14]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY15]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY16]](s32)
-  ; GCN:   $vgpr31 = COPY [[OR1]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
+  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN:   S_ENDPGM 0
 entry:
@@ -314,52 +183,28 @@ define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)*
 define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i32 addrspace(5)* byval(i32) %b.byval, i32 %c) #1 {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32_byval_parent
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
-  ; GCN:   [[COPY9:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN:   [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5)
+  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32
-  ; GCN:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   $vgpr0 = COPY [[COPY8]](s32)
-  ; GCN:   [[COPY20:%[0-9]+]]:_(p5) = COPY $sgpr32
+  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN:   [[COPY4:%[0-9]+]]:_(p5) = COPY $sgpr32
   ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C]](s32)
+  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY4]], [[C]](s32)
   ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; GCN:   G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5)
-  ; GCN:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY12]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY13]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY14]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY15]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY16]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY17]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY18]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY19]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN:   G_MEMCPY [[PTR_ADD]](p5), [[COPY1]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5)
+  ; GCN:   [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>)
+  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
-  ; GCN:   $vgpr0 = COPY [[COPY22]](s32)
-  ; GCN:   [[COPY23:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY11]]
-  ; GCN:   S_SETPC_B64_return [[COPY23]], implicit $vgpr0
+  ; GCN:   $vgpr0 = COPY [[COPY6]](s32)
+  ; GCN:   [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
+  ; GCN:   S_SETPC_B64_return [[COPY7]], implicit $vgpr0
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) %b.byval)
   ret i32 %ret
@@ -371,77 +216,53 @@ entry:
 define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %large) #1 {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; GCN:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; GCN:   [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; GCN:   [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; GCN:   [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; GCN:   [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; GCN:   [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; GCN:   [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; GCN:   [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GCN:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
   ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
   ; GCN:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5)
   ; GCN:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
   ; GCN:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5)
-  ; GCN:   [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
   ; GCN:   [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[C]](s32)
   ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32
-  ; GCN:   [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   $vgpr0 = COPY [[COPY8]](s32)
+  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
   ; GCN:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
   ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
   ; GCN:   G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (s32) from `i32 addrspace(5)* inttoptr (i32 16 to i32 addrspace(5)*)`, align 16, addrspace 5)
-  ; GCN:   [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY40]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY41]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY42]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY43]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY44]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY45]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY46]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY47]](s32)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_byval_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN:   [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
+  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_byval_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) inttoptr (i32 16 to i32 addrspace(5)*))
   ret i32 %ret
@@ -506,110 +327,86 @@ define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %l
 define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; GCN:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; GCN:   [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; GCN:   [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; GCN:   [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; GCN:   [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; GCN:   [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; GCN:   [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; GCN:   [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GCN:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
   ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
   ; GCN:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5)
   ; GCN:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
   ; GCN:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5)
   ; GCN:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
   ; GCN:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5)
-  ; GCN:   [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
-  ; GCN:   [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   $vgpr0 = COPY [[COPY8]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY9]](s32)
-  ; GCN:   $vgpr2 = COPY [[COPY10]](s32)
-  ; GCN:   $vgpr3 = COPY [[COPY11]](s32)
-  ; GCN:   $vgpr4 = COPY [[COPY12]](s32)
-  ; GCN:   $vgpr5 = COPY [[COPY13]](s32)
-  ; GCN:   $vgpr6 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr7 = COPY [[COPY15]](s32)
-  ; GCN:   $vgpr8 = COPY [[COPY16]](s32)
-  ; GCN:   $vgpr9 = COPY [[COPY17]](s32)
-  ; GCN:   $vgpr10 = COPY [[COPY18]](s32)
-  ; GCN:   $vgpr11 = COPY [[COPY19]](s32)
-  ; GCN:   $vgpr12 = COPY [[COPY20]](s32)
-  ; GCN:   $vgpr13 = COPY [[COPY21]](s32)
-  ; GCN:   $vgpr14 = COPY [[COPY22]](s32)
-  ; GCN:   $vgpr15 = COPY [[COPY23]](s32)
-  ; GCN:   $vgpr16 = COPY [[COPY24]](s32)
-  ; GCN:   $vgpr17 = COPY [[COPY25]](s32)
-  ; GCN:   $vgpr18 = COPY [[COPY26]](s32)
-  ; GCN:   $vgpr19 = COPY [[COPY27]](s32)
-  ; GCN:   $vgpr20 = COPY [[COPY28]](s32)
-  ; GCN:   $vgpr21 = COPY [[COPY29]](s32)
-  ; GCN:   $vgpr22 = COPY [[COPY30]](s32)
-  ; GCN:   $vgpr23 = COPY [[COPY31]](s32)
-  ; GCN:   $vgpr24 = COPY [[COPY32]](s32)
-  ; GCN:   $vgpr25 = COPY [[COPY33]](s32)
-  ; GCN:   $vgpr26 = COPY [[COPY34]](s32)
-  ; GCN:   $vgpr27 = COPY [[COPY35]](s32)
-  ; GCN:   $vgpr28 = COPY [[COPY36]](s32)
-  ; GCN:   $vgpr29 = COPY [[COPY37]](s32)
-  ; GCN:   $vgpr30 = COPY [[COPY38]](s32)
+  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN:   $vgpr2 = COPY [[COPY2]](s32)
+  ; GCN:   $vgpr3 = COPY [[COPY3]](s32)
+  ; GCN:   $vgpr4 = COPY [[COPY4]](s32)
+  ; GCN:   $vgpr5 = COPY [[COPY5]](s32)
+  ; GCN:   $vgpr6 = COPY [[COPY6]](s32)
+  ; GCN:   $vgpr7 = COPY [[COPY7]](s32)
+  ; GCN:   $vgpr8 = COPY [[COPY8]](s32)
+  ; GCN:   $vgpr9 = COPY [[COPY9]](s32)
+  ; GCN:   $vgpr10 = COPY [[COPY10]](s32)
+  ; GCN:   $vgpr11 = COPY [[COPY11]](s32)
+  ; GCN:   $vgpr12 = COPY [[COPY12]](s32)
+  ; GCN:   $vgpr13 = COPY [[COPY13]](s32)
+  ; GCN:   $vgpr14 = COPY [[COPY14]](s32)
+  ; GCN:   $vgpr15 = COPY [[COPY15]](s32)
+  ; GCN:   $vgpr16 = COPY [[COPY16]](s32)
+  ; GCN:   $vgpr17 = COPY [[COPY17]](s32)
+  ; GCN:   $vgpr18 = COPY [[COPY18]](s32)
+  ; GCN:   $vgpr19 = COPY [[COPY19]](s32)
+  ; GCN:   $vgpr20 = COPY [[COPY20]](s32)
+  ; GCN:   $vgpr21 = COPY [[COPY21]](s32)
+  ; GCN:   $vgpr22 = COPY [[COPY22]](s32)
+  ; GCN:   $vgpr23 = COPY [[COPY23]](s32)
+  ; GCN:   $vgpr24 = COPY [[COPY24]](s32)
+  ; GCN:   $vgpr25 = COPY [[COPY25]](s32)
+  ; GCN:   $vgpr26 = COPY [[COPY26]](s32)
+  ; GCN:   $vgpr27 = COPY [[COPY27]](s32)
+  ; GCN:   $vgpr28 = COPY [[COPY28]](s32)
+  ; GCN:   $vgpr29 = COPY [[COPY29]](s32)
+  ; GCN:   $vgpr30 = COPY [[COPY30]](s32)
   ; GCN:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
   ; GCN:   G_STORE [[LOAD]](s32), [[FRAME_INDEX3]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
   ; GCN:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
   ; GCN:   G_STORE [[LOAD1]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
   ; GCN:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
   ; GCN:   G_STORE [[LOAD2]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
-  ; GCN:   [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY40]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY41]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY42]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY43]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY44]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY45]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY46]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY47]](s32)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN:   [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
+  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c)
   ret i32 %ret
@@ -618,115 +415,91 @@ entry:
 define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i32 %b, [32 x i32] %c) #1 {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32_stack_object
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; GCN:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; GCN:   [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; GCN:   [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; GCN:   [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; GCN:   [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; GCN:   [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; GCN:   [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; GCN:   [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GCN:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
   ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
   ; GCN:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5)
   ; GCN:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
   ; GCN:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5)
   ; GCN:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
   ; GCN:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5)
-  ; GCN:   [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
   ; GCN:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
   ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
   ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32)
   ; GCN:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
   ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
-  ; GCN:   [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   $vgpr0 = COPY [[COPY8]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY9]](s32)
-  ; GCN:   $vgpr2 = COPY [[COPY10]](s32)
-  ; GCN:   $vgpr3 = COPY [[COPY11]](s32)
-  ; GCN:   $vgpr4 = COPY [[COPY12]](s32)
-  ; GCN:   $vgpr5 = COPY [[COPY13]](s32)
-  ; GCN:   $vgpr6 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr7 = COPY [[COPY15]](s32)
-  ; GCN:   $vgpr8 = COPY [[COPY16]](s32)
-  ; GCN:   $vgpr9 = COPY [[COPY17]](s32)
-  ; GCN:   $vgpr10 = COPY [[COPY18]](s32)
-  ; GCN:   $vgpr11 = COPY [[COPY19]](s32)
-  ; GCN:   $vgpr12 = COPY [[COPY20]](s32)
-  ; GCN:   $vgpr13 = COPY [[COPY21]](s32)
-  ; GCN:   $vgpr14 = COPY [[COPY22]](s32)
-  ; GCN:   $vgpr15 = COPY [[COPY23]](s32)
-  ; GCN:   $vgpr16 = COPY [[COPY24]](s32)
-  ; GCN:   $vgpr17 = COPY [[COPY25]](s32)
-  ; GCN:   $vgpr18 = COPY [[COPY26]](s32)
-  ; GCN:   $vgpr19 = COPY [[COPY27]](s32)
-  ; GCN:   $vgpr20 = COPY [[COPY28]](s32)
-  ; GCN:   $vgpr21 = COPY [[COPY29]](s32)
-  ; GCN:   $vgpr22 = COPY [[COPY30]](s32)
-  ; GCN:   $vgpr23 = COPY [[COPY31]](s32)
-  ; GCN:   $vgpr24 = COPY [[COPY32]](s32)
-  ; GCN:   $vgpr25 = COPY [[COPY33]](s32)
-  ; GCN:   $vgpr26 = COPY [[COPY34]](s32)
-  ; GCN:   $vgpr27 = COPY [[COPY35]](s32)
-  ; GCN:   $vgpr28 = COPY [[COPY36]](s32)
-  ; GCN:   $vgpr29 = COPY [[COPY37]](s32)
-  ; GCN:   $vgpr30 = COPY [[COPY38]](s32)
+  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN:   $vgpr2 = COPY [[COPY2]](s32)
+  ; GCN:   $vgpr3 = COPY [[COPY3]](s32)
+  ; GCN:   $vgpr4 = COPY [[COPY4]](s32)
+  ; GCN:   $vgpr5 = COPY [[COPY5]](s32)
+  ; GCN:   $vgpr6 = COPY [[COPY6]](s32)
+  ; GCN:   $vgpr7 = COPY [[COPY7]](s32)
+  ; GCN:   $vgpr8 = COPY [[COPY8]](s32)
+  ; GCN:   $vgpr9 = COPY [[COPY9]](s32)
+  ; GCN:   $vgpr10 = COPY [[COPY10]](s32)
+  ; GCN:   $vgpr11 = COPY [[COPY11]](s32)
+  ; GCN:   $vgpr12 = COPY [[COPY12]](s32)
+  ; GCN:   $vgpr13 = COPY [[COPY13]](s32)
+  ; GCN:   $vgpr14 = COPY [[COPY14]](s32)
+  ; GCN:   $vgpr15 = COPY [[COPY15]](s32)
+  ; GCN:   $vgpr16 = COPY [[COPY16]](s32)
+  ; GCN:   $vgpr17 = COPY [[COPY17]](s32)
+  ; GCN:   $vgpr18 = COPY [[COPY18]](s32)
+  ; GCN:   $vgpr19 = COPY [[COPY19]](s32)
+  ; GCN:   $vgpr20 = COPY [[COPY20]](s32)
+  ; GCN:   $vgpr21 = COPY [[COPY21]](s32)
+  ; GCN:   $vgpr22 = COPY [[COPY22]](s32)
+  ; GCN:   $vgpr23 = COPY [[COPY23]](s32)
+  ; GCN:   $vgpr24 = COPY [[COPY24]](s32)
+  ; GCN:   $vgpr25 = COPY [[COPY25]](s32)
+  ; GCN:   $vgpr26 = COPY [[COPY26]](s32)
+  ; GCN:   $vgpr27 = COPY [[COPY27]](s32)
+  ; GCN:   $vgpr28 = COPY [[COPY28]](s32)
+  ; GCN:   $vgpr29 = COPY [[COPY29]](s32)
+  ; GCN:   $vgpr30 = COPY [[COPY30]](s32)
   ; GCN:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
   ; GCN:   G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
   ; GCN:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
   ; GCN:   G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
   ; GCN:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
   ; GCN:   G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
-  ; GCN:   [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY40]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY41]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY42]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY43]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY44]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY45]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY46]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY47]](s32)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN:   [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
+  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
@@ -741,31 +514,15 @@ entry:
 define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
   ; GCN-LABEL: name: no_sibling_call_callee_more_stack_space
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
-  ; GCN:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   $vgpr0 = COPY [[COPY8]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY9]](s32)
+  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
   ; GCN:   $vgpr2 = COPY [[C]](s32)
   ; GCN:   $vgpr3 = COPY [[C]](s32)
   ; GCN:   $vgpr4 = COPY [[C]](s32)
@@ -795,32 +552,24 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
   ; GCN:   $vgpr28 = COPY [[C]](s32)
   ; GCN:   $vgpr29 = COPY [[C]](s32)
   ; GCN:   $vgpr30 = COPY [[C]](s32)
-  ; GCN:   [[COPY19:%[0-9]+]]:_(p5) = COPY $sgpr32
+  ; GCN:   [[COPY3:%[0-9]+]]:_(p5) = COPY $sgpr32
   ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C1]](s32)
+  ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C1]](s32)
   ; GCN:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
   ; GCN:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; GCN:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C2]](s32)
+  ; GCN:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C2]](s32)
   ; GCN:   G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
   ; GCN:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; GCN:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C3]](s32)
+  ; GCN:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C3]](s32)
   ; GCN:   G_STORE [[C]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
-  ; GCN:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY13]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY15]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY16]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY17]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY18]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
+  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN:   ADJCALLSTACKDOWN 0, 12, implicit-def $scc
-  ; GCN:   $vgpr0 = COPY [[COPY21]](s32)
-  ; GCN:   [[COPY22:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]]
-  ; GCN:   S_SETPC_B64_return [[COPY22]], implicit $vgpr0
+  ; GCN:   $vgpr0 = COPY [[COPY5]](s32)
+  ; GCN:   [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; GCN:   S_SETPC_B64_return [[COPY6]], implicit $vgpr0
 entry:
   %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] zeroinitializer)
   ret i32 %ret
@@ -830,67 +579,27 @@ entry:
 define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) #1 {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_other_call
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; GCN:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
-  ; GCN:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   $vgpr0 = COPY [[COPY8]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY9]](s32)
-  ; GCN:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY12]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY13]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY14]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY15]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY16]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY17]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY18]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY19]](s32)
-  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
+  ; GCN:   $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN:   [[GV1:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @sibling_call_i32_fastcc_i32_i32
-  ; GCN:   [[COPY22:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY23:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY24:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY25:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   $vgpr0 = COPY [[COPY8]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY9]](s32)
-  ; GCN:   $vgpr2 = COPY [[COPY21]](s32)
-  ; GCN:   [[COPY30:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY30]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY22]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY23]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY24]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY25]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY26]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY27]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY28]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY29]](s32)
-  ; GCN:   SI_TCRETURN [[GV1]](p0), @sibling_call_i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN:   $vgpr2 = COPY [[COPY5]](s32)
+  ; GCN:   [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>)
+  ; GCN:   SI_TCRETURN [[GV1]](p0), @sibling_call_i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %other.call = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b)
   %ret = tail call fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %other.call)
@@ -902,115 +611,91 @@ entry:
 define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 {
   ; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; GCN:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; GCN:   [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; GCN:   [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; GCN:   [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; GCN:   [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; GCN:   [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; GCN:   [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; GCN:   [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GCN:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
   ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
   ; GCN:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5)
   ; GCN:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
   ; GCN:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5)
   ; GCN:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
   ; GCN:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5)
-  ; GCN:   [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
   ; GCN:   [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
   ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
   ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32)
   ; GCN:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
   ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
-  ; GCN:   [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   $vgpr0 = COPY [[COPY8]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY9]](s32)
-  ; GCN:   $vgpr2 = COPY [[COPY10]](s32)
-  ; GCN:   $vgpr3 = COPY [[COPY11]](s32)
-  ; GCN:   $vgpr4 = COPY [[COPY12]](s32)
-  ; GCN:   $vgpr5 = COPY [[COPY13]](s32)
-  ; GCN:   $vgpr6 = COPY [[COPY14]](s32)
-  ; GCN:   $vgpr7 = COPY [[COPY15]](s32)
-  ; GCN:   $vgpr8 = COPY [[COPY16]](s32)
-  ; GCN:   $vgpr9 = COPY [[COPY17]](s32)
-  ; GCN:   $vgpr10 = COPY [[COPY18]](s32)
-  ; GCN:   $vgpr11 = COPY [[COPY19]](s32)
-  ; GCN:   $vgpr12 = COPY [[COPY20]](s32)
-  ; GCN:   $vgpr13 = COPY [[COPY21]](s32)
-  ; GCN:   $vgpr14 = COPY [[COPY22]](s32)
-  ; GCN:   $vgpr15 = COPY [[COPY23]](s32)
-  ; GCN:   $vgpr16 = COPY [[COPY24]](s32)
-  ; GCN:   $vgpr17 = COPY [[COPY25]](s32)
-  ; GCN:   $vgpr18 = COPY [[COPY26]](s32)
-  ; GCN:   $vgpr19 = COPY [[COPY27]](s32)
-  ; GCN:   $vgpr20 = COPY [[COPY28]](s32)
-  ; GCN:   $vgpr21 = COPY [[COPY29]](s32)
-  ; GCN:   $vgpr22 = COPY [[COPY30]](s32)
-  ; GCN:   $vgpr23 = COPY [[COPY31]](s32)
-  ; GCN:   $vgpr24 = COPY [[COPY32]](s32)
-  ; GCN:   $vgpr25 = COPY [[COPY33]](s32)
-  ; GCN:   $vgpr26 = COPY [[COPY34]](s32)
-  ; GCN:   $vgpr27 = COPY [[COPY35]](s32)
-  ; GCN:   $vgpr28 = COPY [[COPY36]](s32)
-  ; GCN:   $vgpr29 = COPY [[COPY37]](s32)
-  ; GCN:   $vgpr30 = COPY [[COPY38]](s32)
+  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
+  ; GCN:   $vgpr2 = COPY [[COPY2]](s32)
+  ; GCN:   $vgpr3 = COPY [[COPY3]](s32)
+  ; GCN:   $vgpr4 = COPY [[COPY4]](s32)
+  ; GCN:   $vgpr5 = COPY [[COPY5]](s32)
+  ; GCN:   $vgpr6 = COPY [[COPY6]](s32)
+  ; GCN:   $vgpr7 = COPY [[COPY7]](s32)
+  ; GCN:   $vgpr8 = COPY [[COPY8]](s32)
+  ; GCN:   $vgpr9 = COPY [[COPY9]](s32)
+  ; GCN:   $vgpr10 = COPY [[COPY10]](s32)
+  ; GCN:   $vgpr11 = COPY [[COPY11]](s32)
+  ; GCN:   $vgpr12 = COPY [[COPY12]](s32)
+  ; GCN:   $vgpr13 = COPY [[COPY13]](s32)
+  ; GCN:   $vgpr14 = COPY [[COPY14]](s32)
+  ; GCN:   $vgpr15 = COPY [[COPY15]](s32)
+  ; GCN:   $vgpr16 = COPY [[COPY16]](s32)
+  ; GCN:   $vgpr17 = COPY [[COPY17]](s32)
+  ; GCN:   $vgpr18 = COPY [[COPY18]](s32)
+  ; GCN:   $vgpr19 = COPY [[COPY19]](s32)
+  ; GCN:   $vgpr20 = COPY [[COPY20]](s32)
+  ; GCN:   $vgpr21 = COPY [[COPY21]](s32)
+  ; GCN:   $vgpr22 = COPY [[COPY22]](s32)
+  ; GCN:   $vgpr23 = COPY [[COPY23]](s32)
+  ; GCN:   $vgpr24 = COPY [[COPY24]](s32)
+  ; GCN:   $vgpr25 = COPY [[COPY25]](s32)
+  ; GCN:   $vgpr26 = COPY [[COPY26]](s32)
+  ; GCN:   $vgpr27 = COPY [[COPY27]](s32)
+  ; GCN:   $vgpr28 = COPY [[COPY28]](s32)
+  ; GCN:   $vgpr29 = COPY [[COPY29]](s32)
+  ; GCN:   $vgpr30 = COPY [[COPY30]](s32)
   ; GCN:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
   ; GCN:   G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
   ; GCN:   [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
   ; GCN:   G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
   ; GCN:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
   ; GCN:   G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
-  ; GCN:   [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY40]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY41]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY42]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY43]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY44]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY45]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY46]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY47]](s32)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN:   [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
+  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
@@ -1022,46 +707,38 @@ entry:
 define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area(i32 %a, i32 %b, [36 x i32] %c) #1 {
   ; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area
   ; GCN: bb.1.entry:
-  ; GCN:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
-  ; GCN:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
-  ; GCN:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; GCN:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
-  ; GCN:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
-  ; GCN:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; GCN:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
-  ; GCN:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; GCN:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7
-  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8
-  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9
-  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10
-  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11
-  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12
-  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13
-  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14
-  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15
-  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16
-  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17
-  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18
-  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19
-  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20
-  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21
-  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22
-  ; GCN:   [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23
-  ; GCN:   [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24
-  ; GCN:   [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25
-  ; GCN:   [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26
-  ; GCN:   [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27
-  ; GCN:   [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28
-  ; GCN:   [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29
-  ; GCN:   [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30
+  ; GCN:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31
+  ; GCN:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; GCN:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; GCN:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; GCN:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; GCN:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; GCN:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+  ; GCN:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+  ; GCN:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+  ; GCN:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+  ; GCN:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+  ; GCN:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+  ; GCN:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+  ; GCN:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+  ; GCN:   [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+  ; GCN:   [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+  ; GCN:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+  ; GCN:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+  ; GCN:   [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+  ; GCN:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+  ; GCN:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+  ; GCN:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+  ; GCN:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+  ; GCN:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+  ; GCN:   [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+  ; GCN:   [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+  ; GCN:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+  ; GCN:   [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+  ; GCN:   [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
   ; GCN:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9
   ; GCN:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.9, align 16, addrspace 5)
   ; GCN:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
@@ -1076,7 +753,7 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
   ; GCN:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5)
   ; GCN:   [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
   ; GCN:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5)
-  ; GCN:   [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; GCN:   [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
   ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; GCN:   [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca
@@ -1084,16 +761,8 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
   ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX7]], [[C2]](s32)
   ; GCN:   G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
   ; GCN:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
-  ; GCN:   [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN:   [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]]
-  ; GCN:   [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]]
-  ; GCN:   [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]]
-  ; GCN:   [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; GCN:   [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]]
-  ; GCN:   [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]]
-  ; GCN:   [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; GCN:   $vgpr0 = COPY [[COPY8]](s32)
-  ; GCN:   $vgpr1 = COPY [[COPY9]](s32)
+  ; GCN:   $vgpr0 = COPY [[COPY]](s32)
+  ; GCN:   $vgpr1 = COPY [[COPY1]](s32)
   ; GCN:   $vgpr2 = COPY [[C1]](s32)
   ; GCN:   $vgpr3 = COPY [[C1]](s32)
   ; GCN:   $vgpr4 = COPY [[C1]](s32)
@@ -1129,17 +798,9 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
   ; GCN:   G_STORE [[C1]](s32), [[FRAME_INDEX9]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
   ; GCN:   [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
   ; GCN:   G_STORE [[C1]](s32), [[FRAME_INDEX10]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
-  ; GCN:   [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>)
-  ; GCN:   $sgpr4_sgpr5 = COPY [[COPY40]](p4)
-  ; GCN:   $sgpr6_sgpr7 = COPY [[COPY41]](p4)
-  ; GCN:   $sgpr8_sgpr9 = COPY [[COPY42]](p4)
-  ; GCN:   $sgpr10_sgpr11 = COPY [[COPY43]](s64)
-  ; GCN:   $sgpr12 = COPY [[COPY44]](s32)
-  ; GCN:   $sgpr13 = COPY [[COPY45]](s32)
-  ; GCN:   $sgpr14 = COPY [[COPY46]](s32)
-  ; GCN:   $vgpr31 = COPY [[COPY47]](s32)
-  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+  ; GCN:   [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
+  ; GCN:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
   %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5

diff  --git a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
new file mode 100644
index 0000000000000..72a6522ab14a1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
@@ -0,0 +1,562 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-fixed-function-abi=0 < %s | FileCheck -check-prefix=VARABI %s
+; RUN: llc -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-fixed-function-abi=1 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-SDAG %s
+; RUN: llc -global-isel -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-fixed-function-abi=1 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-GISEL %s
+
+; Test with gfx803 so that
+; addrspacecast/llvm.amdgcn.is.shared/llvm.amdgcn.is.private require
+; the queue ptr.  Tests with code object v3 to test
+; llvm.trap/llvm.debugtrap that require the queue ptr.
+
+
+declare hidden void @requires_all_inputs()
+
+; This function incorrectly is marked with the hints that the callee
+; does not require the implicit arguments to the function. Make sure
+; we do not crash.
+define void @parent_func_missing_inputs() #0 {
+; VARABI-LABEL: parent_func_missing_inputs:
+; VARABI:       ; %bb.0:
+; VARABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VARABI-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; VARABI-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
+; VARABI-NEXT:    s_mov_b64 exec, s[4:5]
+; VARABI-NEXT:    v_writelane_b32 v40, s33, 2
+; VARABI-NEXT:    v_writelane_b32 v40, s30, 0
+; VARABI-NEXT:    s_mov_b32 s33, s32
+; VARABI-NEXT:    s_addk_i32 s32, 0x400
+; VARABI-NEXT:    s_getpc_b64 s[4:5]
+; VARABI-NEXT:    s_add_u32 s4, s4, requires_all_inputs at rel32@lo+4
+; VARABI-NEXT:    s_addc_u32 s5, s5, requires_all_inputs at rel32@hi+12
+; VARABI-NEXT:    v_writelane_b32 v40, s31, 1
+; VARABI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; VARABI-NEXT:    v_readlane_b32 s4, v40, 0
+; VARABI-NEXT:    v_readlane_b32 s5, v40, 1
+; VARABI-NEXT:    s_addk_i32 s32, 0xfc00
+; VARABI-NEXT:    v_readlane_b32 s33, v40, 2
+; VARABI-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; VARABI-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
+; VARABI-NEXT:    s_mov_b64 exec, s[6:7]
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    s_setpc_b64 s[4:5]
+;
+; FIXEDABI-LABEL: parent_func_missing_inputs:
+; FIXEDABI:       ; %bb.0:
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-NEXT:    s_or_saveexec_b64 s[16:17], -1
+; FIXEDABI-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
+; FIXEDABI-NEXT:    s_mov_b64 exec, s[16:17]
+; FIXEDABI-NEXT:    v_writelane_b32 v40, s33, 2
+; FIXEDABI-NEXT:    v_writelane_b32 v40, s30, 0
+; FIXEDABI-NEXT:    s_mov_b32 s33, s32
+; FIXEDABI-NEXT:    s_addk_i32 s32, 0x400
+; FIXEDABI-NEXT:    s_getpc_b64 s[16:17]
+; FIXEDABI-NEXT:    s_add_u32 s16, s16, requires_all_inputs at rel32@lo+4
+; FIXEDABI-NEXT:    s_addc_u32 s17, s17, requires_all_inputs at rel32@hi+12
+; FIXEDABI-NEXT:    v_writelane_b32 v40, s31, 1
+; FIXEDABI-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; FIXEDABI-NEXT:    v_readlane_b32 s4, v40, 0
+; FIXEDABI-NEXT:    v_readlane_b32 s5, v40, 1
+; FIXEDABI-NEXT:    s_addk_i32 s32, 0xfc00
+; FIXEDABI-NEXT:    v_readlane_b32 s33, v40, 2
+; FIXEDABI-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; FIXEDABI-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
+; FIXEDABI-NEXT:    s_mov_b64 exec, s[6:7]
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT:    s_setpc_b64 s[4:5]
+  call void @requires_all_inputs()
+  ret void
+}
+
+define amdgpu_kernel void @parent_kernel_missing_inputs() #0 {
+; VARABI-LABEL: parent_kernel_missing_inputs:
+; VARABI:       ; %bb.0:
+; VARABI-NEXT:    s_add_i32 s4, s4, s9
+; VARABI-NEXT:    s_lshr_b32 flat_scratch_hi, s4, 8
+; VARABI-NEXT:    s_add_u32 s0, s0, s9
+; VARABI-NEXT:    s_addc_u32 s1, s1, 0
+; VARABI-NEXT:    s_mov_b32 flat_scratch_lo, s5
+; VARABI-NEXT:    s_getpc_b64 s[4:5]
+; VARABI-NEXT:    s_add_u32 s4, s4, requires_all_inputs at rel32@lo+4
+; VARABI-NEXT:    s_addc_u32 s5, s5, requires_all_inputs at rel32@hi+12
+; VARABI-NEXT:    s_mov_b32 s32, 0
+; VARABI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
+; VARABI-NEXT:    s_endpgm
+;
+; FIXEDABI-SDAG-LABEL: parent_kernel_missing_inputs:
+; FIXEDABI-SDAG:       ; %bb.0:
+; FIXEDABI-SDAG-NEXT:    s_add_i32 s10, s10, s15
+; FIXEDABI-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s10, 8
+; FIXEDABI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; FIXEDABI-SDAG-NEXT:    s_add_u32 s0, s0, s15
+; FIXEDABI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; FIXEDABI-SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
+; FIXEDABI-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s11
+; FIXEDABI-SDAG-NEXT:    s_addc_u32 s1, s1, 0
+; FIXEDABI-SDAG-NEXT:    s_mov_b64 s[10:11], s[8:9]
+; FIXEDABI-SDAG-NEXT:    v_or_b32_e32 v31, v0, v2
+; FIXEDABI-SDAG-NEXT:    s_mov_b64 s[8:9], 0
+; FIXEDABI-SDAG-NEXT:    s_getpc_b64 s[16:17]
+; FIXEDABI-SDAG-NEXT:    s_add_u32 s16, s16, requires_all_inputs at rel32@lo+4
+; FIXEDABI-SDAG-NEXT:    s_addc_u32 s17, s17, requires_all_inputs at rel32@hi+12
+; FIXEDABI-SDAG-NEXT:    s_mov_b32 s32, 0
+; FIXEDABI-SDAG-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; FIXEDABI-SDAG-NEXT:    s_endpgm
+;
+; FIXEDABI-GISEL-LABEL: parent_kernel_missing_inputs:
+; FIXEDABI-GISEL:       ; %bb.0:
+; FIXEDABI-GISEL-NEXT:    s_add_i32 s10, s10, s15
+; FIXEDABI-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s10, 8
+; FIXEDABI-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; FIXEDABI-GISEL-NEXT:    s_add_u32 s0, s0, s15
+; FIXEDABI-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
+; FIXEDABI-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
+; FIXEDABI-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s11
+; FIXEDABI-GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; FIXEDABI-GISEL-NEXT:    s_mov_b64 s[10:11], s[8:9]
+; FIXEDABI-GISEL-NEXT:    v_or_b32_e32 v31, v0, v1
+; FIXEDABI-GISEL-NEXT:    s_mov_b64 s[8:9], 0
+; FIXEDABI-GISEL-NEXT:    s_getpc_b64 s[16:17]
+; FIXEDABI-GISEL-NEXT:    s_add_u32 s16, s16, requires_all_inputs at rel32@lo+4
+; FIXEDABI-GISEL-NEXT:    s_addc_u32 s17, s17, requires_all_inputs at rel32@hi+12
+; FIXEDABI-GISEL-NEXT:    s_mov_b32 s32, 0
+; FIXEDABI-GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; FIXEDABI-GISEL-NEXT:    s_endpgm
+  call void @requires_all_inputs()
+  ret void
+}
+
+; Function is marked with amdgpu-no-workitem-id-* but uses them anyway
+define void @marked_func_use_workitem_id(i32 addrspace(1)* %ptr) #0 {
+; VARABI-LABEL: marked_func_use_workitem_id:
+; VARABI:       ; %bb.0:
+; VARABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VARABI-NEXT:    v_and_b32_e32 v3, 0x3ff, v2
+; VARABI-NEXT:    flat_store_dword v[0:1], v3
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    v_bfe_u32 v3, v2, 10, 10
+; VARABI-NEXT:    v_bfe_u32 v2, v2, 20, 10
+; VARABI-NEXT:    flat_store_dword v[0:1], v3
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    flat_store_dword v[0:1], v2
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    s_setpc_b64 s[30:31]
+;
+; FIXEDABI-SDAG-LABEL: marked_func_use_workitem_id:
+; FIXEDABI-SDAG:       ; %bb.0:
+; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT:    v_and_b32_e32 v2, 0x3ff, v31
+; FIXEDABI-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-SDAG-NEXT:    v_bfe_u32 v2, v31, 10, 10
+; FIXEDABI-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-SDAG-NEXT:    v_bfe_u32 v2, v31, 20, 10
+; FIXEDABI-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; FIXEDABI-GISEL-LABEL: marked_func_use_workitem_id:
+; FIXEDABI-GISEL:       ; %bb.0:
+; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT:    v_and_b32_e32 v2, 0x3ff, v31
+; FIXEDABI-GISEL-NEXT:    v_bfe_u32 v3, v31, 10, 10
+; FIXEDABI-GISEL-NEXT:    v_bfe_u32 v4, v31, 20, 10
+; FIXEDABI-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-GISEL-NEXT:    flat_store_dword v[0:1], v3
+; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-GISEL-NEXT:    flat_store_dword v[0:1], v4
+; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+  %id.y = call i32 @llvm.amdgcn.workitem.id.y()
+  %id.z = call i32 @llvm.amdgcn.workitem.id.z()
+  store volatile i32 %id.x, i32 addrspace(1)* %ptr
+  store volatile i32 %id.y, i32 addrspace(1)* %ptr
+  store volatile i32 %id.z, i32 addrspace(1)* %ptr
+  ret void
+}
+
+; Function is marked with amdgpu-no-workitem-id-* but uses them anyway
+define amdgpu_kernel void @marked_kernel_use_workitem_id(i32 addrspace(1)* %ptr) #0 {
+; VARABI-LABEL: marked_kernel_use_workitem_id:
+; VARABI:       ; %bb.0:
+; VARABI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VARABI-NEXT:    s_waitcnt lgkmcnt(0)
+; VARABI-NEXT:    v_mov_b32_e32 v4, s1
+; VARABI-NEXT:    v_mov_b32_e32 v3, s0
+; VARABI-NEXT:    flat_store_dword v[3:4], v0
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    flat_store_dword v[3:4], v1
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    flat_store_dword v[3:4], v2
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    s_endpgm
+;
+; FIXEDABI-LABEL: marked_kernel_use_workitem_id:
+; FIXEDABI:       ; %bb.0:
+; FIXEDABI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; FIXEDABI-NEXT:    s_waitcnt lgkmcnt(0)
+; FIXEDABI-NEXT:    v_mov_b32_e32 v4, s1
+; FIXEDABI-NEXT:    v_mov_b32_e32 v3, s0
+; FIXEDABI-NEXT:    flat_store_dword v[3:4], v0
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT:    flat_store_dword v[3:4], v1
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT:    flat_store_dword v[3:4], v2
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT:    s_endpgm
+  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+  %id.y = call i32 @llvm.amdgcn.workitem.id.y()
+  %id.z = call i32 @llvm.amdgcn.workitem.id.z()
+  store volatile i32 %id.x, i32 addrspace(1)* %ptr
+  store volatile i32 %id.y, i32 addrspace(1)* %ptr
+  store volatile i32 %id.z, i32 addrspace(1)* %ptr
+  ret void
+}
+
+define void @marked_func_use_workgroup_id(i32 addrspace(1)* %ptr) #0 {
+; VARABI-LABEL: marked_func_use_workgroup_id:
+; VARABI:       ; %bb.0:
+; VARABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VARABI-NEXT:    v_mov_b32_e32 v2, s4
+; VARABI-NEXT:    flat_store_dword v[0:1], v2
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    v_mov_b32_e32 v2, s5
+; VARABI-NEXT:    flat_store_dword v[0:1], v2
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    v_mov_b32_e32 v2, s6
+; VARABI-NEXT:    flat_store_dword v[0:1], v2
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    s_setpc_b64 s[30:31]
+;
+; FIXEDABI-LABEL: marked_func_use_workgroup_id:
+; FIXEDABI:       ; %bb.0:
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s12
+; FIXEDABI-NEXT:    flat_store_dword v[0:1], v2
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s13
+; FIXEDABI-NEXT:    flat_store_dword v[0:1], v2
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s14
+; FIXEDABI-NEXT:    flat_store_dword v[0:1], v2
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT:    s_setpc_b64 s[30:31]
+  %id.x = call i32 @llvm.amdgcn.workgroup.id.x()
+  %id.y = call i32 @llvm.amdgcn.workgroup.id.y()
+  %id.z = call i32 @llvm.amdgcn.workgroup.id.z()
+  store volatile i32 %id.x, i32 addrspace(1)* %ptr
+  store volatile i32 %id.y, i32 addrspace(1)* %ptr
+  store volatile i32 %id.z, i32 addrspace(1)* %ptr
+  ret void
+}
+
+define amdgpu_kernel void @marked_kernel_use_workgroup_id(i32 addrspace(1)* %ptr) #0 {
+; VARABI-LABEL: marked_kernel_use_workgroup_id:
+; VARABI:       ; %bb.0:
+; VARABI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VARABI-NEXT:    v_mov_b32_e32 v2, s6
+; VARABI-NEXT:    s_waitcnt lgkmcnt(0)
+; VARABI-NEXT:    v_mov_b32_e32 v0, s0
+; VARABI-NEXT:    v_mov_b32_e32 v1, s1
+; VARABI-NEXT:    flat_store_dword v[0:1], v2
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    v_mov_b32_e32 v2, s7
+; VARABI-NEXT:    flat_store_dword v[0:1], v2
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    v_mov_b32_e32 v2, s8
+; VARABI-NEXT:    flat_store_dword v[0:1], v2
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    s_endpgm
+;
+; FIXEDABI-LABEL: marked_kernel_use_workgroup_id:
+; FIXEDABI:       ; %bb.0:
+; FIXEDABI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s6
+; FIXEDABI-NEXT:    s_waitcnt lgkmcnt(0)
+; FIXEDABI-NEXT:    v_mov_b32_e32 v0, s0
+; FIXEDABI-NEXT:    v_mov_b32_e32 v1, s1
+; FIXEDABI-NEXT:    flat_store_dword v[0:1], v2
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s7
+; FIXEDABI-NEXT:    flat_store_dword v[0:1], v2
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s8
+; FIXEDABI-NEXT:    flat_store_dword v[0:1], v2
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT:    s_endpgm
+  %id.x = call i32 @llvm.amdgcn.workgroup.id.x()
+  %id.y = call i32 @llvm.amdgcn.workgroup.id.y()
+  %id.z = call i32 @llvm.amdgcn.workgroup.id.z()
+  store volatile i32 %id.x, i32 addrspace(1)* %ptr
+  store volatile i32 %id.y, i32 addrspace(1)* %ptr
+  store volatile i32 %id.z, i32 addrspace(1)* %ptr
+  ret void
+}
+
+define void @marked_func_use_other_sgpr(i64 addrspace(1)* %ptr) #0 {
+; VARABI-LABEL: marked_func_use_other_sgpr:
+; VARABI:       ; %bb.0:
+; VARABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VARABI-NEXT:    flat_load_ubyte v0, v[0:1] glc
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    flat_load_ubyte v0, v[0:1] glc
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    flat_load_ubyte v0, v[0:1] glc
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    s_setpc_b64 s[30:31]
+;
+; FIXEDABI-LABEL: marked_func_use_other_sgpr:
+; FIXEDABI:       ; %bb.0:
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s6
+; FIXEDABI-NEXT:    v_mov_b32_e32 v3, s7
+; FIXEDABI-NEXT:    flat_load_ubyte v2, v[2:3] glc
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s8
+; FIXEDABI-NEXT:    v_mov_b32_e32 v3, s9
+; FIXEDABI-NEXT:    flat_load_ubyte v2, v[2:3] glc
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s4
+; FIXEDABI-NEXT:    v_mov_b32_e32 v3, s5
+; FIXEDABI-NEXT:    flat_load_ubyte v2, v[2:3] glc
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s10
+; FIXEDABI-NEXT:    v_mov_b32_e32 v3, s11
+; FIXEDABI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT:    s_setpc_b64 s[30:31]
+  %queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
+  %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+  %dispatch.id = call i64 @llvm.amdgcn.dispatch.id()
+  %queue.load = load volatile i8, i8 addrspace(4)* %queue.ptr
+  %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr
+  %dispatch.load = load volatile i8, i8 addrspace(4)* %dispatch.ptr
+  store volatile i64 %dispatch.id, i64 addrspace(1)* %ptr
+  ret void
+}
+
+define amdgpu_kernel void @marked_kernel_use_other_sgpr(i64 addrspace(1)* %ptr) #0 {
+; VARABI-LABEL: marked_kernel_use_other_sgpr:
+; VARABI:       ; %bb.0:
+; VARABI-NEXT:    s_add_u32 s0, s4, 8
+; VARABI-NEXT:    flat_load_ubyte v0, v[0:1] glc
+; VARABI-NEXT:    s_addc_u32 s1, s5, 0
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    v_mov_b32_e32 v0, s0
+; VARABI-NEXT:    v_mov_b32_e32 v1, s1
+; VARABI-NEXT:    flat_load_ubyte v0, v[0:1] glc
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    flat_load_ubyte v0, v[0:1] glc
+; VARABI-NEXT:    s_endpgm
+;
+; FIXEDABI-LABEL: marked_kernel_use_other_sgpr:
+; FIXEDABI:       ; %bb.0:
+; FIXEDABI-NEXT:    s_add_u32 s0, s4, 8
+; FIXEDABI-NEXT:    flat_load_ubyte v0, v[0:1] glc
+; FIXEDABI-NEXT:    s_addc_u32 s1, s5, 0
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT:    v_mov_b32_e32 v0, s0
+; FIXEDABI-NEXT:    v_mov_b32_e32 v1, s1
+; FIXEDABI-NEXT:    flat_load_ubyte v0, v[0:1] glc
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT:    flat_load_ubyte v0, v[0:1] glc
+; FIXEDABI-NEXT:    s_endpgm
+  %queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
+  %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+  %dispatch.id = call i64 @llvm.amdgcn.dispatch.id()
+  %queue.load = load volatile i8, i8 addrspace(4)* %queue.ptr
+  %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr
+  %dispatch.load = load volatile i8, i8 addrspace(4)* %dispatch.ptr
+  store volatile i64 %dispatch.id, i64 addrspace(1)* %ptr
+  ret void
+}
+
+define amdgpu_kernel void @marked_kernel_nokernargs_implicitarg_ptr() #0 {
+; VARABI-LABEL: marked_kernel_nokernargs_implicitarg_ptr:
+; VARABI:       ; %bb.0:
+; VARABI-NEXT:    v_mov_b32_e32 v0, 0
+; VARABI-NEXT:    v_mov_b32_e32 v1, 0
+; VARABI-NEXT:    flat_load_ubyte v0, v[0:1] glc
+; VARABI-NEXT:    s_endpgm
+;
+; FIXEDABI-LABEL: marked_kernel_nokernargs_implicitarg_ptr:
+; FIXEDABI:       ; %bb.0:
+; FIXEDABI-NEXT:    v_mov_b32_e32 v0, 0
+; FIXEDABI-NEXT:    v_mov_b32_e32 v1, 0
+; FIXEDABI-NEXT:    flat_load_ubyte v0, v[0:1] glc
+; FIXEDABI-NEXT:    s_endpgm
+  %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr
+  ret void
+}
+
+; On gfx8, the queue ptr is required for this addrspacecast.
+define void @addrspacecast_requires_queue_ptr(i32 addrspace(5)* %ptr.private, i32 addrspace(3)* %ptr.local) #0 {
+; VARABI-LABEL: addrspacecast_requires_queue_ptr:
+; VARABI:       ; %bb.0:
+; VARABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VARABI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; VARABI-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
+; VARABI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; VARABI-NEXT:    v_mov_b32_e32 v3, 0
+; VARABI-NEXT:    v_mov_b32_e32 v4, 1
+; VARABI-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; VARABI-NEXT:    flat_store_dword v[2:3], v4
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    v_mov_b32_e32 v1, v3
+; VARABI-NEXT:    v_mov_b32_e32 v2, 2
+; VARABI-NEXT:    flat_store_dword v[0:1], v2
+; VARABI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VARABI-NEXT:    s_setpc_b64 s[30:31]
+;
+; FIXEDABI-SDAG-LABEL: addrspacecast_requires_queue_ptr:
+; FIXEDABI-SDAG:       ; %bb.0:
+; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT:    s_load_dword s4, s[6:7], 0x40
+; FIXEDABI-SDAG-NEXT:    s_load_dword s5, s[6:7], 0x44
+; FIXEDABI-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; FIXEDABI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT:    v_mov_b32_e32 v2, s5
+; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v2, vcc
+; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
+; FIXEDABI-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; FIXEDABI-SDAG-NEXT:    v_mov_b32_e32 v0, s4
+; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v0, vcc
+; FIXEDABI-SDAG-NEXT:    v_mov_b32_e32 v0, 1
+; FIXEDABI-SDAG-NEXT:    flat_store_dword v[2:3], v0
+; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v1, vcc
+; FIXEDABI-SDAG-NEXT:    v_mov_b32_e32 v0, 2
+; FIXEDABI-SDAG-NEXT:    flat_store_dword v[4:5], v0
+; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; FIXEDABI-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; FIXEDABI-GISEL-LABEL: addrspacecast_requires_queue_ptr:
+; FIXEDABI-GISEL:       ; %bb.0:
+; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT:    s_load_dword s4, s[6:7], 0x44
+; FIXEDABI-GISEL-NEXT:    s_load_dword s5, s[6:7], 0x40
+; FIXEDABI-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; FIXEDABI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
+; FIXEDABI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT:    v_mov_b32_e32 v3, s4
+; FIXEDABI-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
+; FIXEDABI-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; FIXEDABI-GISEL-NEXT:    v_mov_b32_e32 v4, s5
+; FIXEDABI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
+; FIXEDABI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v4, vcc
+; FIXEDABI-GISEL-NEXT:    v_mov_b32_e32 v4, 1
+; FIXEDABI-GISEL-NEXT:    flat_store_dword v[2:3], v4
+; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-GISEL-NEXT:    v_mov_b32_e32 v2, 2
+; FIXEDABI-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; FIXEDABI-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %flat.private = addrspacecast i32 addrspace(5)* %ptr.private to i32*
+  %flat.local = addrspacecast i32 addrspace(3)* %ptr.local to i32*
+  store volatile i32 1, i32* %flat.private
+  store volatile i32 2, i32* %flat.local
+  ret void
+}
+
+define void @is_shared_requires_queue_ptr(i8* %ptr) #0 {
+; VARABI-LABEL: is_shared_requires_queue_ptr:
+; VARABI:       ; %bb.0:
+; VARABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VARABI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; VARABI-NEXT:    flat_store_dword v[0:1], v0
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    s_setpc_b64 s[30:31]
+;
+; FIXEDABI-LABEL: is_shared_requires_queue_ptr:
+; FIXEDABI:       ; %bb.0:
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-NEXT:    s_load_dword s4, s[6:7], 0x40
+; FIXEDABI-NEXT:    s_waitcnt lgkmcnt(0)
+; FIXEDABI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
+; FIXEDABI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; FIXEDABI-NEXT:    flat_store_dword v[0:1], v0
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT:    s_setpc_b64 s[30:31]
+  %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
+  %zext = zext i1 %is.shared to i32
+  store volatile i32 %zext, i32 addrspace(1)* undef
+  ret void
+}
+
+define void @is_private_requires_queue_ptr(i8* %ptr) #0 {
+; VARABI-LABEL: is_private_requires_queue_ptr:
+; VARABI:       ; %bb.0:
+; VARABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VARABI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; VARABI-NEXT:    flat_store_dword v[0:1], v0
+; VARABI-NEXT:    s_waitcnt vmcnt(0)
+; VARABI-NEXT:    s_setpc_b64 s[30:31]
+;
+; FIXEDABI-LABEL: is_private_requires_queue_ptr:
+; FIXEDABI:       ; %bb.0:
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-NEXT:    s_load_dword s4, s[6:7], 0x44
+; FIXEDABI-NEXT:    s_waitcnt lgkmcnt(0)
+; FIXEDABI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
+; FIXEDABI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; FIXEDABI-NEXT:    flat_store_dword v[0:1], v0
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
+; FIXEDABI-NEXT:    s_setpc_b64 s[30:31]
+  %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr)
+  %zext = zext i1 %is.private to i32
+  store volatile i32 %zext, i32 addrspace(1)* undef
+  ret void
+}
+
+define void @trap_requires_queue() #0 {
+; VARABI-LABEL: trap_requires_queue:
+; VARABI:       ; %bb.0:
+; VARABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VARABI-NEXT:    s_mov_b64 s[0:1], 0
+; VARABI-NEXT:    s_trap 2
+;
+; FIXEDABI-LABEL: trap_requires_queue:
+; FIXEDABI:       ; %bb.0:
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; FIXEDABI-NEXT:    s_trap 2
+  call void @llvm.trap()
+  unreachable
+}
+
+define void @debugtrap_requires_queue() #0 {
+; VARABI-LABEL: debugtrap_requires_queue:
+; VARABI:       ; %bb.0:
+; VARABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VARABI-NEXT:    s_trap 3
+;
+; FIXEDABI-LABEL: debugtrap_requires_queue:
+; FIXEDABI:       ; %bb.0:
+; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; FIXEDABI-NEXT:    s_trap 3
+  call void @llvm.debugtrap()
+  unreachable
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x()
+declare i32 @llvm.amdgcn.workitem.id.y()
+declare i32 @llvm.amdgcn.workitem.id.z()
+declare i32 @llvm.amdgcn.workgroup.id.x()
+declare i32 @llvm.amdgcn.workgroup.id.y()
+declare i32 @llvm.amdgcn.workgroup.id.z()
+declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
+declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+declare i64 @llvm.amdgcn.dispatch.id()
+declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+declare i1 @llvm.amdgcn.is.shared(i8*)
+declare i1 @llvm.amdgcn.is.private(i8*)
+declare void @llvm.trap()
+declare void @llvm.debugtrap()
+
+attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" }

diff  --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
index e8021176d8fbb..968dbf20eb929 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
@@ -144,17 +144,21 @@ define hidden void @func_indirect_use_workgroup_id_x() #1 {
   ret void
 }
 
+; Argument is in right place already. We are free to clobber other
+; SGPR arguments
 ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y:
-; GCN-NOT: s4
-; GCN: v_readlane_b32 s4, v40, 0
+; GCN-NOT: s12
+; GCN-NOT: s13
+; GCN-NOT: s14
 define hidden void @func_indirect_use_workgroup_id_y() #1 {
   call void @use_workgroup_id_y()
   ret void
 }
 
 ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z:
-; GCN-NOT: s4
-; GCN: v_readlane_b32 s4, v40, 0
+; GCN-NOT: s12
+; GCN-NOT: s13
+; GCN-NOT: s14
 define hidden void @func_indirect_use_workgroup_id_z() #1 {
   call void @use_workgroup_id_z()
   ret void

diff  --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
index 6a4ab5a30e180..a373442364055 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
@@ -131,10 +131,11 @@ define void @use_workitem_id_yz() #1 {
 ; VARABI: enable_vgpr_workitem_id = 0
 ; FIXEDABI: enable_vgpr_workitem_id = 2
 
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v31
+; FIXEDABI: v_mov_b32_e32 v31, v0{{$}}
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v31
 
 ; VARABI-NOT: v31
 ; GCN: s_swappc_b64
@@ -148,20 +149,18 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
 ; VARABI: enable_vgpr_workitem_id = 1
 ; FIXEDABI: enable_vgpr_workitem_id = 2
 
-; FIXEDABI-NOT: v0
-; FIXEDABI-NOT: v1
 
 ; VARABI-NOT: v31
 ; VARABI: v_lshlrev_b32_e32 v0, 10, v1
 
-
-; FIXEDABI-DAG:	v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
-
 ; FIXEDABI-NOT: v0
 ; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
+; FIXEDABI: v_lshlrev_b32_e32 v31, 10, v1
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
+
 ; VARABI-NOT: v31
 
 ; GCN: s_swappc_b64
@@ -179,10 +178,11 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
 ; VARABI-NOT: v0
 ; VARABI-NOT: v1
 
-; FIXEDABI-DAG:	v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI: v_lshlrev_b32_e32 v31, 20, v2
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
 
 ; GCN: s_swappc_b64
 define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
@@ -198,10 +198,14 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
 ; VARABI-NOT: v0
 ; VARABI-NOT: v1
 
-; FIXEDABI-DAG:	v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
+; FIXEDABI: v_lshlrev_b32_e32 v1, 10, v1
+; FIXEDABI-NEXT: v_or_b32_e32 v31, v0, v1
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
 
 ; GCN: s_swappc_b64
 define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
@@ -218,10 +222,14 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 {
 ; VARABI-NOT: v2
 
 
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
+; FIXEDABI: v_lshlrev_b32_e32 v1, 20, v2
+; FIXEDABI-NEXT: v_or_b32_e32 v31, v0, v1
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
 
 ; GCN: s_swappc_b64
 define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
@@ -238,11 +246,15 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 {
 ; VARABI-NOT: v1
 ; VARABI-NOT: v2
 
-
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
+; FIXEDABI:v_lshlrev_b32_e32 v0, 20, v2
+; FIXEDABI-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; FIXEDABI-NEXT: v_or_b32_e32 v31, v1, v0
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
 
 ; GCN: s_swappc_b64
 define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 {
@@ -348,10 +360,9 @@ define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
 ; VARABI: v_mov_b32_e32 v1, v0
 ; VARABI: v_mov_b32_e32 v0, 0x22b
 
-; FIXEDABI-DAG:	v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
+; FIXEDABI-NOT: v0
+; FIXEDABI: v_mov_b32_e32 v31, v0
+; FIXEDABI: v_mov_b32_e32 v0, 0x22b
 
 ; GCN: s_swappc_b64
 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
@@ -371,10 +382,12 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
 ; VARABI-NOT: v0
 
 ; FIXEDABI: enable_vgpr_workitem_id = 2
-; FIXEDABI-DAG:	v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
+
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
+; FIXEDABI: v_lshlrev_b32_e32 v31, 10, v1
+; FIXEDABI: v_mov_b32_e32 v0, 0x22b
 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
   call void @other_arg_use_workitem_id_y(i32 555)
   ret void
@@ -388,11 +401,11 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
 ; VARABI: s_swappc_b64
 ; VARABI-NOT: v0
 
-
-; FIXEDABI-DAG:	v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
+; FIXEDABI: v_lshlrev_b32_e32 v31, 20, v2
+; FIXEDABI: v_mov_b32_e32 v0, 0x22b
 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
   call void @other_arg_use_workitem_id_z(i32 555)
   ret void
@@ -462,13 +475,13 @@ define void @too_many_args_use_workitem_id_x(
 
 
 ; FIXEDABI: enable_vgpr_workitem_id = 2
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
 ; FIXEDABI-DAG: s_mov_b32 s32, 0
 ; FIXEDABI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x140{{$}}
-; FIXEDABI-DAG:	v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI-DAG: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI-DAG: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
-; FIXEDABI: buffer_store_dword [[K]], off, s[0:3], s32{{$}}
+; FIXEDABI-DAG: buffer_store_dword [[K]], off, s[0:3], s32{{$}}
+; FIXEDABI-DAG: v_mov_b32_e32 v31, v0
 
 ; FIXEDABI: s_swappc_b64
 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
@@ -622,6 +635,10 @@ define void @too_many_args_use_workitem_id_x_byval(
 ; VARABI: s_swappc_b64
 
 
+; FIXEDABI-NOT: v0
+; FIXEDABI-NOT: v1
+; FIXEDABI-NOT: v2
+; FIXEDABI: v_mov_b32_e32 v31, v0
 ; FIXEDABI: v_mov_b32_e32 [[K0:v[0-9]+]], 0x3e7
 ; FIXEDABI: buffer_store_dword [[K0]], off, s[0:3], 0 offset:4{{$}}
 ; FIXEDABI: s_movk_i32 s32, 0x400{{$}}
@@ -632,11 +649,6 @@ define void @too_many_args_use_workitem_id_x_byval(
 ; FIXME: Why this reload?
 ; FIXEDABI: buffer_load_dword [[RELOAD:v[0-9]+]], off, s[0:3], 0 offset:4{{$}}
 
-; FIXEDABI-DAG:	v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1
-; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2
-; FIXEDABI-DAG: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]]
-; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]]
-
 ; FIXEDABI-NOT: s32
 ; FIXEDABI: buffer_store_dword [[RELOAD]], off, s[0:3], s32 offset:4
 ; FIXEDABI: s_swappc_b64
@@ -885,9 +897,53 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz()
   ret void
 }
 
+declare hidden void @extern_hint(i32) #2
+
+; Workitem IDs should not be passed due to the attribute
+; GCN-LABEL: {{^}}kern_call_no_workitem_id_hints:
+; GCN-NOT: v30
+; GCN-NOT: v31
+; GCN: v_mov_b32_e32 v0, 9
+; GCN-NOT: v0
+; GCN-NOT: v31
+; GCN: s_swappc_b64
+define amdgpu_kernel void @kern_call_no_workitem_id_hints() #2 {
+  call void @extern_hint(i32 9)
+  ret void
+}
+
+; GCN-LABEL: {{^}}func_call_no_workitem_id_hints:
+; GCN-NOT: v30
+; GCN-NOT: v31
+; GCN: v_mov_b32_e32 v0, 9
+; GCN-NOT: v0
+; GCN-NOT: v31
+; GCN: s_swappc_b64
+define void @func_call_no_workitem_id_hints() #2 {
+  call void @extern_hint(i32 9)
+  ret void
+}
+
+declare hidden void @extern_nohint(i32)
+
+; Check that the hint is respected on the callsite, not the function
+; declaration
+; GCN-LABEL: {{^}}kern_callsite_workitem_id_hints:
+; GCN-NOT: v30
+; GCN-NOT: v31
+; GCN: v_mov_b32_e32 v0, 9
+; GCN-NOT: v0
+; GCN-NOT: v31
+; GCN: s_swappc_b64
+define amdgpu_kernel void @kern_callsite_workitem_id_hints() #2 {
+  call void @extern_nohint(i32 9) #2
+  ret void
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x() #0
 declare i32 @llvm.amdgcn.workitem.id.y() #0
 declare i32 @llvm.amdgcn.workitem.id.z() #0
 
 attributes #0 = { nounwind readnone speculatable }
 attributes #1 = { nounwind noinline }
+attributes #2 = { nounwind "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }


        


More information about the llvm-commits mailing list