[llvm-branch-commits] [llvm] [AMDGPU] Tail call support for whole wave functions (PR #145860)

Diana Picus via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Jun 27 04:59:18 PDT 2025


https://github.com/rovka updated https://github.com/llvm/llvm-project/pull/145860

>From 6ce379c3c2ecc75932d0e98d4defca7724ebd6a8 Mon Sep 17 00:00:00 2001
From: Diana Picus <diana-magda.picus at amd.com>
Date: Thu, 5 Jun 2025 13:05:45 +0200
Subject: [PATCH 1/2] [AMDGPU] Tail call support for whole wave functions

Support tail calls to whole wave functions (trivial) and from whole wave
functions (slightly more involved because we need a new pseudo for the
tail call return, that patches up the EXEC mask).

Move the expansion of whole wave function return pseudos (regular and
tail call returns) to prolog epilog insertion, since that's where we
patch up the EXEC mask.

Unnecessary register spills will be dealt with in a future patch.
---
 .../SelectionDAG/SelectionDAGBuilder.cpp      |   52 +-
 .../SelectionDAG/SelectionDAGBuilder.h        |    4 +
 llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp |   35 +-
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp |    1 +
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h   |    1 +
 llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td     |    4 +
 llvm/lib/Target/AMDGPU/SIFrameLowering.cpp    |   20 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |    6 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |    1 -
 llvm/lib/Target/AMDGPU/SIInstructions.td      |   27 +
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |    1 +
 .../CodeGen/AMDGPU/amdgcn-call-whole-wave.ll  |  666 +++++++
 .../AMDGPU/whole-wave-functions-pei.mir       |   18 +-
 .../CodeGen/AMDGPU/whole-wave-functions.ll    | 1675 +++++++++++++++++
 14 files changed, 2470 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 2310d511b1df8..0e27b0f764795 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7977,6 +7977,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
   }
   case Intrinsic::amdgcn_call_whole_wave: {
     TargetLowering::ArgListTy Args;
+    bool isTailCall = I.isTailCall();
 
     // The first argument is the callee. Skip it when assembling the call args.
     TargetLowering::ArgListEntry Arg;
@@ -7984,6 +7985,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
       Arg.Node = getValue(I.getArgOperand(Idx));
       Arg.Ty = I.getArgOperand(Idx)->getType();
       Arg.setAttributes(&I, Idx);
+
+      if (Arg.IsSRet && isa<Instruction>(I.getArgOperand(Idx)))
+        isTailCall = false;
+
       Args.push_back(Arg);
     }
 
@@ -7998,7 +8003,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
         .setChain(getRoot())
         .setCallee(CallingConv::AMDGPU_Gfx_WholeWave, I.getType(),
                    getValue(I.getArgOperand(0)), std::move(Args))
-        .setTailCall(false)
+        .setTailCall(isTailCall && canTailCall(I))
         .setIsPreallocated(
             I.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0)
         .setConvergent(I.isConvergent())
@@ -8879,6 +8884,29 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
   return Result;
 }
 
+bool SelectionDAGBuilder::canTailCall(const CallBase &CB) const {
+  bool isMustTailCall = CB.isMustTailCall();
+
+  // Avoid emitting tail calls in functions with the disable-tail-calls
+  // attribute.
+  auto *Caller = CB.getParent()->getParent();
+  if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
+          "true" &&
+      !isMustTailCall)
+    return false;
+
+  // We can't tail call inside a function with a swifterror argument. Lowering
+  // does not support this yet. It would have to move into the swifterror
+  // register before the call.
+  if (DAG.getTargetLoweringInfo().supportSwiftError() &&
+      Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+    return false;
+
+  // Check if target-independent constraints permit a tail call here.
+  // Target-dependent constraints are checked within TLI->LowerCallTo.
+  return isInTailCallPosition(CB, DAG.getTarget());
+}
+
 void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
                                       bool isTailCall, bool isMustTailCall,
                                       const BasicBlock *EHPadBB,
@@ -8893,21 +8921,8 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
   const Value *SwiftErrorVal = nullptr;
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
-  if (isTailCall) {
-    // Avoid emitting tail calls in functions with the disable-tail-calls
-    // attribute.
-    auto *Caller = CB.getParent()->getParent();
-    if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
-        "true" && !isMustTailCall)
-      isTailCall = false;
-
-    // We can't tail call inside a function with a swifterror argument. Lowering
-    // does not support this yet. It would have to move into the swifterror
-    // register before the call.
-    if (TLI.supportSwiftError() &&
-        Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
-      isTailCall = false;
-  }
+  if (isTailCall)
+    isTailCall = canTailCall(CB);
 
   for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
     TargetLowering::ArgListEntry Entry;
@@ -8952,11 +8967,6 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
     Args.push_back(Entry);
   }
 
-  // Check if target-independent constraints permit a tail call here.
-  // Target-dependent constraints are checked within TLI->LowerCallTo.
-  if (isTailCall && !isInTailCallPosition(CB, DAG.getTarget()))
-    isTailCall = false;
-
   // Disable tail calls if there is an swifterror argument. Targets have not
   // been updated to support tail calls.
   if (TLI.supportSwiftError() && SwiftErrorVal)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 1c278076a219d..58d9f04c61156 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -408,6 +408,10 @@ class SelectionDAGBuilder {
                    bool IsMustTailCall, const BasicBlock *EHPadBB = nullptr,
                    const TargetLowering::PtrAuthInfo *PAI = nullptr);
 
+  // Check some of the target-independent constraints for tail calls. This does
+  // not iterate over the call arguments.
+  bool canTailCall(const CallBase &CB) const;
+
   // Lower range metadata from 0 to N to assert zext to an integer of nearest
   // floor power of two.
   SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 9488cccf8fe5c..1df6eb79763b4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -993,8 +993,14 @@ static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
     return IsWave32 ? AMDGPU::SI_CS_CHAIN_TC_W32 : AMDGPU::SI_CS_CHAIN_TC_W64;
   }
 
-  return CC == CallingConv::AMDGPU_Gfx ? AMDGPU::SI_TCRETURN_GFX :
-                                         AMDGPU::SI_TCRETURN;
+  if (CallerF.getFunction().getCallingConv() ==
+      CallingConv::AMDGPU_Gfx_WholeWave)
+    return AMDGPU::SI_TCRETURN_GFX_WholeWave;
+
+  if (CC == CallingConv::AMDGPU_Gfx || CC == CallingConv::AMDGPU_Gfx_WholeWave)
+    return AMDGPU::SI_TCRETURN_GFX;
+
+  return AMDGPU::SI_TCRETURN;
 }
 
 // Add operands to call instruction to track the callee.
@@ -1273,6 +1279,13 @@ bool AMDGPUCallLowering::lowerTailCall(
   unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), /*IsTailCall*/ true,
                                ST.isWave32(), CalleeCC, IsDynamicVGPRChainCall);
   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
+
+  if (FuncInfo->isWholeWaveFunction())
+    addOriginalExecToReturn(MF, MIB);
+
+  // Keep track of the index of the next operand to be added to the call
+  unsigned CalleeIdx = MIB->getNumOperands();
+
   if (!addCallTargetOperands(MIB, MIRBuilder, Info, IsDynamicVGPRChainCall))
     return false;
 
@@ -1390,7 +1403,7 @@ bool AMDGPUCallLowering::lowerTailCall(
   // If we have -tailcallopt, we need to adjust the stack. We'll do the call
   // sequence start and end here.
   if (!IsSibCall) {
-    MIB->getOperand(1).setImm(FPDiff);
+    MIB->getOperand(CalleeIdx + 1).setImm(FPDiff);
     CallSeqStart.addImm(NumBytes).addImm(0);
     // End the call sequence *before* emitting the call. Normally, we would
     // tidy the frame up after the call. However, here, we've laid out the
@@ -1402,16 +1415,24 @@ bool AMDGPUCallLowering::lowerTailCall(
   // Now we can add the actual call instruction to the correct basic block.
   MIRBuilder.insertInstr(MIB);
 
+  // If this is a whole wave tail call, we need to constrain the register for
+  // the original EXEC.
+  if (MIB->getOpcode() == AMDGPU::SI_TCRETURN_GFX_WholeWave) {
+    MIB->getOperand(0).setReg(
+        constrainOperandRegClass(MF, *TRI, MRI, *TII, *ST.getRegBankInfo(),
+                                 *MIB, MIB->getDesc(), MIB->getOperand(0), 0));
+  }
+
   // If Callee is a reg, since it is used by a target specific
   // instruction, it must have a register class matching the
   // constraint of that instruction.
 
   // FIXME: We should define regbankselectable call instructions to handle
   // divergent call targets.
-  if (MIB->getOperand(0).isReg()) {
-    MIB->getOperand(0).setReg(
-        constrainOperandRegClass(MF, *TRI, MRI, *TII, *ST.getRegBankInfo(),
-                                 *MIB, MIB->getDesc(), MIB->getOperand(0), 0));
+  if (MIB->getOperand(CalleeIdx).isReg()) {
+    MIB->getOperand(CalleeIdx).setReg(constrainOperandRegClass(
+        MF, *TRI, MRI, *TII, *ST.getRegBankInfo(), *MIB, MIB->getDesc(),
+        MIB->getOperand(CalleeIdx), CalleeIdx));
   }
 
   MF.getFrameInfo().setHasTailCall();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 0421ed87e61f4..389c7c1ea95a0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -5639,6 +5639,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(CALL)
   NODE_NAME_CASE(TC_RETURN)
   NODE_NAME_CASE(TC_RETURN_GFX)
+  NODE_NAME_CASE(TC_RETURN_GFX_WholeWave)
   NODE_NAME_CASE(TC_RETURN_CHAIN)
   NODE_NAME_CASE(TC_RETURN_CHAIN_DVGPR)
   NODE_NAME_CASE(TRAP)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 5716711de3402..120fa819e8a55 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -402,6 +402,7 @@ enum NodeType : unsigned {
   CALL,
   TC_RETURN,
   TC_RETURN_GFX,
+  TC_RETURN_GFX_WholeWave,
   TC_RETURN_CHAIN,
   TC_RETURN_CHAIN_DVGPR,
   TRAP,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index e305f08925cc6..b8fa6f3fc6867 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -94,6 +94,10 @@ def AMDGPUtc_return_gfx: SDNode<"AMDGPUISD::TC_RETURN_GFX", AMDGPUTCReturnTP,
 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
 >;
 
+def AMDGPUtc_return_gfx_ww: SDNode<"AMDGPUISD::TC_RETURN_GFX_WholeWave", AMDGPUTCReturnTP,
+[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
+>;
+
 def AMDGPUtc_return_chain: SDNode<"AMDGPUISD::TC_RETURN_CHAIN",
   SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
   [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index b88df50c6c999..3caeda651f96b 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1125,9 +1125,18 @@ void SIFrameLowering::emitCSRSpillRestores(
     RestoreWWMRegisters(WWMCalleeSavedRegs);
 
     // The original EXEC is the first operand of the return instruction.
-    const MachineInstr &Return = MBB.instr_back();
-    assert(Return.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN &&
-           "Unexpected return inst");
+    MachineInstr &Return = MBB.instr_back();
+    unsigned Opcode = Return.getOpcode();
+    switch (Opcode) {
+    case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:
+      Opcode = AMDGPU::SI_RETURN;
+      break;
+    case AMDGPU::SI_TCRETURN_GFX_WholeWave:
+      Opcode = AMDGPU::SI_TCRETURN_GFX;
+      break;
+    default:
+      llvm_unreachable("Unexpected return inst");
+    }
     Register OrigExec = Return.getOperand(0).getReg();
 
     if (!WWMScratchRegs.empty()) {
@@ -1141,6 +1150,11 @@ void SIFrameLowering::emitCSRSpillRestores(
     // Restore original EXEC.
     unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
     BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addReg(OrigExec);
+
+    // Drop the first operand and update the opcode.
+    Return.removeOperand(0);
+    Return.setDesc(TII->get(Opcode));
+
     return;
   }
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index eec2dbcd2dd4a..cff04e38f3cd3 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4131,6 +4131,11 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
       break;
     }
 
+    // If the caller is a whole wave function, we need to use a special opcode
+    // so we can patch up EXEC.
+    if (Info->isWholeWaveFunction())
+      OPC = AMDGPUISD::TC_RETURN_GFX_WholeWave;
+
     return DAG.getNode(OPC, DL, MVT::Other, Ops);
   }
 
@@ -5872,6 +5877,7 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     MI.eraseFromParent();
     return SplitBB;
   }
+  case AMDGPU::SI_TCRETURN_GFX_WholeWave:
   case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN: {
     assert(MFI->isWholeWaveFunction());
 
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index fc469f19c7808..96ad3168c2b41 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2515,7 +2515,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     MI.setDesc(get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
     break;
   }
-  case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:
   case AMDGPU::SI_RETURN: {
     const MachineFunction *MF = MBB.getParent();
     const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 225a073db33d1..ca77573311ffa 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -670,6 +670,33 @@ def SI_WHOLE_WAVE_FUNC_RETURN : SPseudoInstSI <
 def : GCNPat<
   (AMDGPUwhole_wave_return), (SI_WHOLE_WAVE_FUNC_RETURN (i1 (IMPLICIT_DEF)))>;
 
+// Restores the previous EXEC and otherwise behaves entirely like a SI_TCRETURN.
+// This is used for tail calls *from* a whole wave function. Tail calls to
+// a whole wave function may use the usual opcodes, depending on the calling
+// convention of the caller.
+def SI_TCRETURN_GFX_WholeWave : SPseudoInstSI <
+  (outs),
+  (ins SReg_1:$orig_exec, Gfx_CCR_SGPR_64:$src0, unknown:$callee, i32imm:$fpdiff)> {
+  let isCall = 1;
+  let isTerminator = 1;
+  let isReturn = 1;
+  let isBarrier = 1;
+  let UseNamedOperandTable = 1;
+  let SchedRW = [WriteBranch];
+  let isConvergent = 1;
+
+  // We're going to use custom handling to set the $orig_exec to the correct value.
+  let usesCustomInserter = 1;
+}
+
+// Generate a SI_TCRETURN_GFX_WholeWave pseudo with a placeholder for its
+// argument. It will be filled in by the custom inserter.
+def : GCNPat<
+  (AMDGPUtc_return_gfx_ww i64:$src0, tglobaladdr:$callee, i32:$fpdiff),
+  (SI_TCRETURN_GFX_WholeWave (i1 (IMPLICIT_DEF)), Gfx_CCR_SGPR_64:$src0,
+   tglobaladdr:$callee, i32:$fpdiff)>;
+
+
 // Return for returning shaders to a shader variant epilog.
 def SI_RETURN_TO_EPILOG : SPseudoInstSI <
   (outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index e6af1ecc8db77..39a1d24a2a42c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1417,6 +1417,7 @@ constexpr bool mayTailCallThisCC(CallingConv::ID CC) {
   switch (CC) {
   case CallingConv::C:
   case CallingConv::AMDGPU_Gfx:
+  case CallingConv::AMDGPU_Gfx_WholeWave:
     return true;
   default:
     return canGuaranteeTCO(CC);
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll
index eac0767c88d80..356bf4b3cac28 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll
@@ -96,6 +96,672 @@ define amdgpu_gfx void @basic_test(i32 %x, i32 inreg %c, ptr addrspace(1) %ptr)
   ret void
 }
 
+define amdgpu_gfx i32 @tail_call_from_gfx(i32 %x, i32 inreg %c) {
+; DAGISEL-LABEL: tail_call_from_gfx:
+; DAGISEL:       ; %bb.0:
+; DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; DAGISEL-NEXT:    s_wait_expcnt 0x0
+; DAGISEL-NEXT:    s_wait_samplecnt 0x0
+; DAGISEL-NEXT:    s_wait_bvhcnt 0x0
+; DAGISEL-NEXT:    s_wait_kmcnt 0x0
+; DAGISEL-NEXT:    v_add_nc_u32_e32 v1, 13, v0
+; DAGISEL-NEXT:    s_mov_b32 s1, good_callee at abs32@hi
+; DAGISEL-NEXT:    s_mov_b32 s0, good_callee at abs32@lo
+; DAGISEL-NEXT:    s_wait_alu 0xfffe
+; DAGISEL-NEXT:    s_setpc_b64 s[0:1]
+;
+; GISEL-LABEL: tail_call_from_gfx:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-NEXT:    s_wait_expcnt 0x0
+; GISEL-NEXT:    s_wait_samplecnt 0x0
+; GISEL-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-NEXT:    s_wait_kmcnt 0x0
+; GISEL-NEXT:    v_add_nc_u32_e32 v1, 13, v0
+; GISEL-NEXT:    s_mov_b32 s36, good_callee at abs32@lo
+; GISEL-NEXT:    s_mov_b32 s37, good_callee at abs32@hi
+; GISEL-NEXT:    s_wait_alu 0xfffe
+; GISEL-NEXT:    s_setpc_b64 s[36:37]
+  %y = add i32 %x, 13
+  %ret = tail call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @good_callee, i32 %x, i32 %y, i32 inreg %c)
+  ret i32 %ret
+}
+
+define amdgpu_gfx_whole_wave i32 @tail_call_from_whole_wave(i1 %active, i32 %x, i32 inreg %c) {
+; DAGISEL-LABEL: tail_call_from_whole_wave:
+; DAGISEL:       ; %bb.0:
+; DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; DAGISEL-NEXT:    s_wait_expcnt 0x0
+; DAGISEL-NEXT:    s_wait_samplecnt 0x0
+; DAGISEL-NEXT:    s_wait_bvhcnt 0x0
+; DAGISEL-NEXT:    s_wait_kmcnt 0x0
+; DAGISEL-NEXT:    s_xor_saveexec_b32 s0, -1
+; DAGISEL-NEXT:    s_clause 0x1f
+; DAGISEL-NEXT:    scratch_store_b32 off, v0, s32
+; DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4
+; DAGISEL-NEXT:    scratch_store_b32 off, v2, s32 offset:8
+; DAGISEL-NEXT:    scratch_store_b32 off, v3, s32 offset:12
+; DAGISEL-NEXT:    scratch_store_b32 off, v4, s32 offset:16
+; DAGISEL-NEXT:    scratch_store_b32 off, v5, s32 offset:20
+; DAGISEL-NEXT:    scratch_store_b32 off, v6, s32 offset:24
+; DAGISEL-NEXT:    scratch_store_b32 off, v7, s32 offset:28
+; DAGISEL-NEXT:    scratch_store_b32 off, v8, s32 offset:32
+; DAGISEL-NEXT:    scratch_store_b32 off, v9, s32 offset:36
+; DAGISEL-NEXT:    scratch_store_b32 off, v10, s32 offset:40
+; DAGISEL-NEXT:    scratch_store_b32 off, v11, s32 offset:44
+; DAGISEL-NEXT:    scratch_store_b32 off, v12, s32 offset:48
+; DAGISEL-NEXT:    scratch_store_b32 off, v13, s32 offset:52
+; DAGISEL-NEXT:    scratch_store_b32 off, v14, s32 offset:56
+; DAGISEL-NEXT:    scratch_store_b32 off, v15, s32 offset:60
+; DAGISEL-NEXT:    scratch_store_b32 off, v16, s32 offset:64
+; DAGISEL-NEXT:    scratch_store_b32 off, v17, s32 offset:68
+; DAGISEL-NEXT:    scratch_store_b32 off, v18, s32 offset:72
+; DAGISEL-NEXT:    scratch_store_b32 off, v19, s32 offset:76
+; DAGISEL-NEXT:    scratch_store_b32 off, v20, s32 offset:80
+; DAGISEL-NEXT:    scratch_store_b32 off, v21, s32 offset:84
+; DAGISEL-NEXT:    scratch_store_b32 off, v22, s32 offset:88
+; DAGISEL-NEXT:    scratch_store_b32 off, v23, s32 offset:92
+; DAGISEL-NEXT:    scratch_store_b32 off, v24, s32 offset:96
+; DAGISEL-NEXT:    scratch_store_b32 off, v25, s32 offset:100
+; DAGISEL-NEXT:    scratch_store_b32 off, v26, s32 offset:104
+; DAGISEL-NEXT:    scratch_store_b32 off, v27, s32 offset:108
+; DAGISEL-NEXT:    scratch_store_b32 off, v28, s32 offset:112
+; DAGISEL-NEXT:    scratch_store_b32 off, v29, s32 offset:116
+; DAGISEL-NEXT:    scratch_store_b32 off, v30, s32 offset:120
+; DAGISEL-NEXT:    scratch_store_b32 off, v31, s32 offset:124
+; DAGISEL-NEXT:    s_clause 0x1f
+; DAGISEL-NEXT:    scratch_store_b32 off, v32, s32 offset:128
+; DAGISEL-NEXT:    scratch_store_b32 off, v33, s32 offset:132
+; DAGISEL-NEXT:    scratch_store_b32 off, v34, s32 offset:136
+; DAGISEL-NEXT:    scratch_store_b32 off, v35, s32 offset:140
+; DAGISEL-NEXT:    scratch_store_b32 off, v36, s32 offset:144
+; DAGISEL-NEXT:    scratch_store_b32 off, v37, s32 offset:148
+; DAGISEL-NEXT:    scratch_store_b32 off, v38, s32 offset:152
+; DAGISEL-NEXT:    scratch_store_b32 off, v39, s32 offset:156
+; DAGISEL-NEXT:    scratch_store_b32 off, v48, s32 offset:160
+; DAGISEL-NEXT:    scratch_store_b32 off, v49, s32 offset:164
+; DAGISEL-NEXT:    scratch_store_b32 off, v50, s32 offset:168
+; DAGISEL-NEXT:    scratch_store_b32 off, v51, s32 offset:172
+; DAGISEL-NEXT:    scratch_store_b32 off, v52, s32 offset:176
+; DAGISEL-NEXT:    scratch_store_b32 off, v53, s32 offset:180
+; DAGISEL-NEXT:    scratch_store_b32 off, v54, s32 offset:184
+; DAGISEL-NEXT:    scratch_store_b32 off, v55, s32 offset:188
+; DAGISEL-NEXT:    scratch_store_b32 off, v64, s32 offset:192
+; DAGISEL-NEXT:    scratch_store_b32 off, v65, s32 offset:196
+; DAGISEL-NEXT:    scratch_store_b32 off, v66, s32 offset:200
+; DAGISEL-NEXT:    scratch_store_b32 off, v67, s32 offset:204
+; DAGISEL-NEXT:    scratch_store_b32 off, v68, s32 offset:208
+; DAGISEL-NEXT:    scratch_store_b32 off, v69, s32 offset:212
+; DAGISEL-NEXT:    scratch_store_b32 off, v70, s32 offset:216
+; DAGISEL-NEXT:    scratch_store_b32 off, v71, s32 offset:220
+; DAGISEL-NEXT:    scratch_store_b32 off, v80, s32 offset:224
+; DAGISEL-NEXT:    scratch_store_b32 off, v81, s32 offset:228
+; DAGISEL-NEXT:    scratch_store_b32 off, v82, s32 offset:232
+; DAGISEL-NEXT:    scratch_store_b32 off, v83, s32 offset:236
+; DAGISEL-NEXT:    scratch_store_b32 off, v84, s32 offset:240
+; DAGISEL-NEXT:    scratch_store_b32 off, v85, s32 offset:244
+; DAGISEL-NEXT:    scratch_store_b32 off, v86, s32 offset:248
+; DAGISEL-NEXT:    scratch_store_b32 off, v87, s32 offset:252
+; DAGISEL-NEXT:    s_clause 0x1f
+; DAGISEL-NEXT:    scratch_store_b32 off, v96, s32 offset:256
+; DAGISEL-NEXT:    scratch_store_b32 off, v97, s32 offset:260
+; DAGISEL-NEXT:    scratch_store_b32 off, v98, s32 offset:264
+; DAGISEL-NEXT:    scratch_store_b32 off, v99, s32 offset:268
+; DAGISEL-NEXT:    scratch_store_b32 off, v100, s32 offset:272
+; DAGISEL-NEXT:    scratch_store_b32 off, v101, s32 offset:276
+; DAGISEL-NEXT:    scratch_store_b32 off, v102, s32 offset:280
+; DAGISEL-NEXT:    scratch_store_b32 off, v103, s32 offset:284
+; DAGISEL-NEXT:    scratch_store_b32 off, v112, s32 offset:288
+; DAGISEL-NEXT:    scratch_store_b32 off, v113, s32 offset:292
+; DAGISEL-NEXT:    scratch_store_b32 off, v114, s32 offset:296
+; DAGISEL-NEXT:    scratch_store_b32 off, v115, s32 offset:300
+; DAGISEL-NEXT:    scratch_store_b32 off, v116, s32 offset:304
+; DAGISEL-NEXT:    scratch_store_b32 off, v117, s32 offset:308
+; DAGISEL-NEXT:    scratch_store_b32 off, v118, s32 offset:312
+; DAGISEL-NEXT:    scratch_store_b32 off, v119, s32 offset:316
+; DAGISEL-NEXT:    scratch_store_b32 off, v128, s32 offset:320
+; DAGISEL-NEXT:    scratch_store_b32 off, v129, s32 offset:324
+; DAGISEL-NEXT:    scratch_store_b32 off, v130, s32 offset:328
+; DAGISEL-NEXT:    scratch_store_b32 off, v131, s32 offset:332
+; DAGISEL-NEXT:    scratch_store_b32 off, v132, s32 offset:336
+; DAGISEL-NEXT:    scratch_store_b32 off, v133, s32 offset:340
+; DAGISEL-NEXT:    scratch_store_b32 off, v134, s32 offset:344
+; DAGISEL-NEXT:    scratch_store_b32 off, v135, s32 offset:348
+; DAGISEL-NEXT:    scratch_store_b32 off, v144, s32 offset:352
+; DAGISEL-NEXT:    scratch_store_b32 off, v145, s32 offset:356
+; DAGISEL-NEXT:    scratch_store_b32 off, v146, s32 offset:360
+; DAGISEL-NEXT:    scratch_store_b32 off, v147, s32 offset:364
+; DAGISEL-NEXT:    scratch_store_b32 off, v148, s32 offset:368
+; DAGISEL-NEXT:    scratch_store_b32 off, v149, s32 offset:372
+; DAGISEL-NEXT:    scratch_store_b32 off, v150, s32 offset:376
+; DAGISEL-NEXT:    scratch_store_b32 off, v151, s32 offset:380
+; DAGISEL-NEXT:    s_clause 0x1f
+; DAGISEL-NEXT:    scratch_store_b32 off, v160, s32 offset:384
+; DAGISEL-NEXT:    scratch_store_b32 off, v161, s32 offset:388
+; DAGISEL-NEXT:    scratch_store_b32 off, v162, s32 offset:392
+; DAGISEL-NEXT:    scratch_store_b32 off, v163, s32 offset:396
+; DAGISEL-NEXT:    scratch_store_b32 off, v164, s32 offset:400
+; DAGISEL-NEXT:    scratch_store_b32 off, v165, s32 offset:404
+; DAGISEL-NEXT:    scratch_store_b32 off, v166, s32 offset:408
+; DAGISEL-NEXT:    scratch_store_b32 off, v167, s32 offset:412
+; DAGISEL-NEXT:    scratch_store_b32 off, v176, s32 offset:416
+; DAGISEL-NEXT:    scratch_store_b32 off, v177, s32 offset:420
+; DAGISEL-NEXT:    scratch_store_b32 off, v178, s32 offset:424
+; DAGISEL-NEXT:    scratch_store_b32 off, v179, s32 offset:428
+; DAGISEL-NEXT:    scratch_store_b32 off, v180, s32 offset:432
+; DAGISEL-NEXT:    scratch_store_b32 off, v181, s32 offset:436
+; DAGISEL-NEXT:    scratch_store_b32 off, v182, s32 offset:440
+; DAGISEL-NEXT:    scratch_store_b32 off, v183, s32 offset:444
+; DAGISEL-NEXT:    scratch_store_b32 off, v192, s32 offset:448
+; DAGISEL-NEXT:    scratch_store_b32 off, v193, s32 offset:452
+; DAGISEL-NEXT:    scratch_store_b32 off, v194, s32 offset:456
+; DAGISEL-NEXT:    scratch_store_b32 off, v195, s32 offset:460
+; DAGISEL-NEXT:    scratch_store_b32 off, v196, s32 offset:464
+; DAGISEL-NEXT:    scratch_store_b32 off, v197, s32 offset:468
+; DAGISEL-NEXT:    scratch_store_b32 off, v198, s32 offset:472
+; DAGISEL-NEXT:    scratch_store_b32 off, v199, s32 offset:476
+; DAGISEL-NEXT:    scratch_store_b32 off, v208, s32 offset:480
+; DAGISEL-NEXT:    scratch_store_b32 off, v209, s32 offset:484
+; DAGISEL-NEXT:    scratch_store_b32 off, v210, s32 offset:488
+; DAGISEL-NEXT:    scratch_store_b32 off, v211, s32 offset:492
+; DAGISEL-NEXT:    scratch_store_b32 off, v212, s32 offset:496
+; DAGISEL-NEXT:    scratch_store_b32 off, v213, s32 offset:500
+; DAGISEL-NEXT:    scratch_store_b32 off, v214, s32 offset:504
+; DAGISEL-NEXT:    scratch_store_b32 off, v215, s32 offset:508
+; DAGISEL-NEXT:    s_clause 0xf
+; DAGISEL-NEXT:    scratch_store_b32 off, v224, s32 offset:512
+; DAGISEL-NEXT:    scratch_store_b32 off, v225, s32 offset:516
+; DAGISEL-NEXT:    scratch_store_b32 off, v226, s32 offset:520
+; DAGISEL-NEXT:    scratch_store_b32 off, v227, s32 offset:524
+; DAGISEL-NEXT:    scratch_store_b32 off, v228, s32 offset:528
+; DAGISEL-NEXT:    scratch_store_b32 off, v229, s32 offset:532
+; DAGISEL-NEXT:    scratch_store_b32 off, v230, s32 offset:536
+; DAGISEL-NEXT:    scratch_store_b32 off, v231, s32 offset:540
+; DAGISEL-NEXT:    scratch_store_b32 off, v240, s32 offset:544
+; DAGISEL-NEXT:    scratch_store_b32 off, v241, s32 offset:548
+; DAGISEL-NEXT:    scratch_store_b32 off, v242, s32 offset:552
+; DAGISEL-NEXT:    scratch_store_b32 off, v243, s32 offset:556
+; DAGISEL-NEXT:    scratch_store_b32 off, v244, s32 offset:560
+; DAGISEL-NEXT:    scratch_store_b32 off, v245, s32 offset:564
+; DAGISEL-NEXT:    scratch_store_b32 off, v246, s32 offset:568
+; DAGISEL-NEXT:    scratch_store_b32 off, v247, s32 offset:572
+; DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
+; DAGISEL-NEXT:    v_add_nc_u32_e32 v1, 13, v0
+; DAGISEL-NEXT:    s_mov_b32 s37, good_callee at abs32@hi
+; DAGISEL-NEXT:    s_mov_b32 s36, good_callee at abs32@lo
+; DAGISEL-NEXT:    s_wait_alu 0xfffe
+; DAGISEL-NEXT:    s_xor_b32 exec_lo, s0, -1
+; DAGISEL-NEXT:    s_clause 0x1f
+; DAGISEL-NEXT:    scratch_load_b32 v0, off, s32
+; DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4
+; DAGISEL-NEXT:    scratch_load_b32 v2, off, s32 offset:8
+; DAGISEL-NEXT:    scratch_load_b32 v3, off, s32 offset:12
+; DAGISEL-NEXT:    scratch_load_b32 v4, off, s32 offset:16
+; DAGISEL-NEXT:    scratch_load_b32 v5, off, s32 offset:20
+; DAGISEL-NEXT:    scratch_load_b32 v6, off, s32 offset:24
+; DAGISEL-NEXT:    scratch_load_b32 v7, off, s32 offset:28
+; DAGISEL-NEXT:    scratch_load_b32 v8, off, s32 offset:32
+; DAGISEL-NEXT:    scratch_load_b32 v9, off, s32 offset:36
+; DAGISEL-NEXT:    scratch_load_b32 v10, off, s32 offset:40
+; DAGISEL-NEXT:    scratch_load_b32 v11, off, s32 offset:44
+; DAGISEL-NEXT:    scratch_load_b32 v12, off, s32 offset:48
+; DAGISEL-NEXT:    scratch_load_b32 v13, off, s32 offset:52
+; DAGISEL-NEXT:    scratch_load_b32 v14, off, s32 offset:56
+; DAGISEL-NEXT:    scratch_load_b32 v15, off, s32 offset:60
+; DAGISEL-NEXT:    scratch_load_b32 v16, off, s32 offset:64
+; DAGISEL-NEXT:    scratch_load_b32 v17, off, s32 offset:68
+; DAGISEL-NEXT:    scratch_load_b32 v18, off, s32 offset:72
+; DAGISEL-NEXT:    scratch_load_b32 v19, off, s32 offset:76
+; DAGISEL-NEXT:    scratch_load_b32 v20, off, s32 offset:80
+; DAGISEL-NEXT:    scratch_load_b32 v21, off, s32 offset:84
+; DAGISEL-NEXT:    scratch_load_b32 v22, off, s32 offset:88
+; DAGISEL-NEXT:    scratch_load_b32 v23, off, s32 offset:92
+; DAGISEL-NEXT:    scratch_load_b32 v24, off, s32 offset:96
+; DAGISEL-NEXT:    scratch_load_b32 v25, off, s32 offset:100
+; DAGISEL-NEXT:    scratch_load_b32 v26, off, s32 offset:104
+; DAGISEL-NEXT:    scratch_load_b32 v27, off, s32 offset:108
+; DAGISEL-NEXT:    scratch_load_b32 v28, off, s32 offset:112
+; DAGISEL-NEXT:    scratch_load_b32 v29, off, s32 offset:116
+; DAGISEL-NEXT:    scratch_load_b32 v30, off, s32 offset:120
+; DAGISEL-NEXT:    scratch_load_b32 v31, off, s32 offset:124
+; DAGISEL-NEXT:    s_clause 0x1f
+; DAGISEL-NEXT:    scratch_load_b32 v32, off, s32 offset:128
+; DAGISEL-NEXT:    scratch_load_b32 v33, off, s32 offset:132
+; DAGISEL-NEXT:    scratch_load_b32 v34, off, s32 offset:136
+; DAGISEL-NEXT:    scratch_load_b32 v35, off, s32 offset:140
+; DAGISEL-NEXT:    scratch_load_b32 v36, off, s32 offset:144
+; DAGISEL-NEXT:    scratch_load_b32 v37, off, s32 offset:148
+; DAGISEL-NEXT:    scratch_load_b32 v38, off, s32 offset:152
+; DAGISEL-NEXT:    scratch_load_b32 v39, off, s32 offset:156
+; DAGISEL-NEXT:    scratch_load_b32 v48, off, s32 offset:160
+; DAGISEL-NEXT:    scratch_load_b32 v49, off, s32 offset:164
+; DAGISEL-NEXT:    scratch_load_b32 v50, off, s32 offset:168
+; DAGISEL-NEXT:    scratch_load_b32 v51, off, s32 offset:172
+; DAGISEL-NEXT:    scratch_load_b32 v52, off, s32 offset:176
+; DAGISEL-NEXT:    scratch_load_b32 v53, off, s32 offset:180
+; DAGISEL-NEXT:    scratch_load_b32 v54, off, s32 offset:184
+; DAGISEL-NEXT:    scratch_load_b32 v55, off, s32 offset:188
+; DAGISEL-NEXT:    scratch_load_b32 v64, off, s32 offset:192
+; DAGISEL-NEXT:    scratch_load_b32 v65, off, s32 offset:196
+; DAGISEL-NEXT:    scratch_load_b32 v66, off, s32 offset:200
+; DAGISEL-NEXT:    scratch_load_b32 v67, off, s32 offset:204
+; DAGISEL-NEXT:    scratch_load_b32 v68, off, s32 offset:208
+; DAGISEL-NEXT:    scratch_load_b32 v69, off, s32 offset:212
+; DAGISEL-NEXT:    scratch_load_b32 v70, off, s32 offset:216
+; DAGISEL-NEXT:    scratch_load_b32 v71, off, s32 offset:220
+; DAGISEL-NEXT:    scratch_load_b32 v80, off, s32 offset:224
+; DAGISEL-NEXT:    scratch_load_b32 v81, off, s32 offset:228
+; DAGISEL-NEXT:    scratch_load_b32 v82, off, s32 offset:232
+; DAGISEL-NEXT:    scratch_load_b32 v83, off, s32 offset:236
+; DAGISEL-NEXT:    scratch_load_b32 v84, off, s32 offset:240
+; DAGISEL-NEXT:    scratch_load_b32 v85, off, s32 offset:244
+; DAGISEL-NEXT:    scratch_load_b32 v86, off, s32 offset:248
+; DAGISEL-NEXT:    scratch_load_b32 v87, off, s32 offset:252
+; DAGISEL-NEXT:    s_clause 0x1f
+; DAGISEL-NEXT:    scratch_load_b32 v96, off, s32 offset:256
+; DAGISEL-NEXT:    scratch_load_b32 v97, off, s32 offset:260
+; DAGISEL-NEXT:    scratch_load_b32 v98, off, s32 offset:264
+; DAGISEL-NEXT:    scratch_load_b32 v99, off, s32 offset:268
+; DAGISEL-NEXT:    scratch_load_b32 v100, off, s32 offset:272
+; DAGISEL-NEXT:    scratch_load_b32 v101, off, s32 offset:276
+; DAGISEL-NEXT:    scratch_load_b32 v102, off, s32 offset:280
+; DAGISEL-NEXT:    scratch_load_b32 v103, off, s32 offset:284
+; DAGISEL-NEXT:    scratch_load_b32 v112, off, s32 offset:288
+; DAGISEL-NEXT:    scratch_load_b32 v113, off, s32 offset:292
+; DAGISEL-NEXT:    scratch_load_b32 v114, off, s32 offset:296
+; DAGISEL-NEXT:    scratch_load_b32 v115, off, s32 offset:300
+; DAGISEL-NEXT:    scratch_load_b32 v116, off, s32 offset:304
+; DAGISEL-NEXT:    scratch_load_b32 v117, off, s32 offset:308
+; DAGISEL-NEXT:    scratch_load_b32 v118, off, s32 offset:312
+; DAGISEL-NEXT:    scratch_load_b32 v119, off, s32 offset:316
+; DAGISEL-NEXT:    scratch_load_b32 v128, off, s32 offset:320
+; DAGISEL-NEXT:    scratch_load_b32 v129, off, s32 offset:324
+; DAGISEL-NEXT:    scratch_load_b32 v130, off, s32 offset:328
+; DAGISEL-NEXT:    scratch_load_b32 v131, off, s32 offset:332
+; DAGISEL-NEXT:    scratch_load_b32 v132, off, s32 offset:336
+; DAGISEL-NEXT:    scratch_load_b32 v133, off, s32 offset:340
+; DAGISEL-NEXT:    scratch_load_b32 v134, off, s32 offset:344
+; DAGISEL-NEXT:    scratch_load_b32 v135, off, s32 offset:348
+; DAGISEL-NEXT:    scratch_load_b32 v144, off, s32 offset:352
+; DAGISEL-NEXT:    scratch_load_b32 v145, off, s32 offset:356
+; DAGISEL-NEXT:    scratch_load_b32 v146, off, s32 offset:360
+; DAGISEL-NEXT:    scratch_load_b32 v147, off, s32 offset:364
+; DAGISEL-NEXT:    scratch_load_b32 v148, off, s32 offset:368
+; DAGISEL-NEXT:    scratch_load_b32 v149, off, s32 offset:372
+; DAGISEL-NEXT:    scratch_load_b32 v150, off, s32 offset:376
+; DAGISEL-NEXT:    scratch_load_b32 v151, off, s32 offset:380
+; DAGISEL-NEXT:    s_clause 0x1f
+; DAGISEL-NEXT:    scratch_load_b32 v160, off, s32 offset:384
+; DAGISEL-NEXT:    scratch_load_b32 v161, off, s32 offset:388
+; DAGISEL-NEXT:    scratch_load_b32 v162, off, s32 offset:392
+; DAGISEL-NEXT:    scratch_load_b32 v163, off, s32 offset:396
+; DAGISEL-NEXT:    scratch_load_b32 v164, off, s32 offset:400
+; DAGISEL-NEXT:    scratch_load_b32 v165, off, s32 offset:404
+; DAGISEL-NEXT:    scratch_load_b32 v166, off, s32 offset:408
+; DAGISEL-NEXT:    scratch_load_b32 v167, off, s32 offset:412
+; DAGISEL-NEXT:    scratch_load_b32 v176, off, s32 offset:416
+; DAGISEL-NEXT:    scratch_load_b32 v177, off, s32 offset:420
+; DAGISEL-NEXT:    scratch_load_b32 v178, off, s32 offset:424
+; DAGISEL-NEXT:    scratch_load_b32 v179, off, s32 offset:428
+; DAGISEL-NEXT:    scratch_load_b32 v180, off, s32 offset:432
+; DAGISEL-NEXT:    scratch_load_b32 v181, off, s32 offset:436
+; DAGISEL-NEXT:    scratch_load_b32 v182, off, s32 offset:440
+; DAGISEL-NEXT:    scratch_load_b32 v183, off, s32 offset:444
+; DAGISEL-NEXT:    scratch_load_b32 v192, off, s32 offset:448
+; DAGISEL-NEXT:    scratch_load_b32 v193, off, s32 offset:452
+; DAGISEL-NEXT:    scratch_load_b32 v194, off, s32 offset:456
+; DAGISEL-NEXT:    scratch_load_b32 v195, off, s32 offset:460
+; DAGISEL-NEXT:    scratch_load_b32 v196, off, s32 offset:464
+; DAGISEL-NEXT:    scratch_load_b32 v197, off, s32 offset:468
+; DAGISEL-NEXT:    scratch_load_b32 v198, off, s32 offset:472
+; DAGISEL-NEXT:    scratch_load_b32 v199, off, s32 offset:476
+; DAGISEL-NEXT:    scratch_load_b32 v208, off, s32 offset:480
+; DAGISEL-NEXT:    scratch_load_b32 v209, off, s32 offset:484
+; DAGISEL-NEXT:    scratch_load_b32 v210, off, s32 offset:488
+; DAGISEL-NEXT:    scratch_load_b32 v211, off, s32 offset:492
+; DAGISEL-NEXT:    scratch_load_b32 v212, off, s32 offset:496
+; DAGISEL-NEXT:    scratch_load_b32 v213, off, s32 offset:500
+; DAGISEL-NEXT:    scratch_load_b32 v214, off, s32 offset:504
+; DAGISEL-NEXT:    scratch_load_b32 v215, off, s32 offset:508
+; DAGISEL-NEXT:    s_clause 0xf
+; DAGISEL-NEXT:    scratch_load_b32 v224, off, s32 offset:512
+; DAGISEL-NEXT:    scratch_load_b32 v225, off, s32 offset:516
+; DAGISEL-NEXT:    scratch_load_b32 v226, off, s32 offset:520
+; DAGISEL-NEXT:    scratch_load_b32 v227, off, s32 offset:524
+; DAGISEL-NEXT:    scratch_load_b32 v228, off, s32 offset:528
+; DAGISEL-NEXT:    scratch_load_b32 v229, off, s32 offset:532
+; DAGISEL-NEXT:    scratch_load_b32 v230, off, s32 offset:536
+; DAGISEL-NEXT:    scratch_load_b32 v231, off, s32 offset:540
+; DAGISEL-NEXT:    scratch_load_b32 v240, off, s32 offset:544
+; DAGISEL-NEXT:    scratch_load_b32 v241, off, s32 offset:548
+; DAGISEL-NEXT:    scratch_load_b32 v242, off, s32 offset:552
+; DAGISEL-NEXT:    scratch_load_b32 v243, off, s32 offset:556
+; DAGISEL-NEXT:    scratch_load_b32 v244, off, s32 offset:560
+; DAGISEL-NEXT:    scratch_load_b32 v245, off, s32 offset:564
+; DAGISEL-NEXT:    scratch_load_b32 v246, off, s32 offset:568
+; DAGISEL-NEXT:    scratch_load_b32 v247, off, s32 offset:572
+; DAGISEL-NEXT:    s_mov_b32 exec_lo, s0
+; DAGISEL-NEXT:    s_setpc_b64 s[36:37]
+;
+; GISEL-LABEL: tail_call_from_whole_wave:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-NEXT:    s_wait_expcnt 0x0
+; GISEL-NEXT:    s_wait_samplecnt 0x0
+; GISEL-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-NEXT:    s_wait_kmcnt 0x0
+; GISEL-NEXT:    s_xor_saveexec_b32 s0, -1
+; GISEL-NEXT:    s_clause 0x1f
+; GISEL-NEXT:    scratch_store_b32 off, v0, s32
+; GISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4
+; GISEL-NEXT:    scratch_store_b32 off, v2, s32 offset:8
+; GISEL-NEXT:    scratch_store_b32 off, v3, s32 offset:12
+; GISEL-NEXT:    scratch_store_b32 off, v4, s32 offset:16
+; GISEL-NEXT:    scratch_store_b32 off, v5, s32 offset:20
+; GISEL-NEXT:    scratch_store_b32 off, v6, s32 offset:24
+; GISEL-NEXT:    scratch_store_b32 off, v7, s32 offset:28
+; GISEL-NEXT:    scratch_store_b32 off, v8, s32 offset:32
+; GISEL-NEXT:    scratch_store_b32 off, v9, s32 offset:36
+; GISEL-NEXT:    scratch_store_b32 off, v10, s32 offset:40
+; GISEL-NEXT:    scratch_store_b32 off, v11, s32 offset:44
+; GISEL-NEXT:    scratch_store_b32 off, v12, s32 offset:48
+; GISEL-NEXT:    scratch_store_b32 off, v13, s32 offset:52
+; GISEL-NEXT:    scratch_store_b32 off, v14, s32 offset:56
+; GISEL-NEXT:    scratch_store_b32 off, v15, s32 offset:60
+; GISEL-NEXT:    scratch_store_b32 off, v16, s32 offset:64
+; GISEL-NEXT:    scratch_store_b32 off, v17, s32 offset:68
+; GISEL-NEXT:    scratch_store_b32 off, v18, s32 offset:72
+; GISEL-NEXT:    scratch_store_b32 off, v19, s32 offset:76
+; GISEL-NEXT:    scratch_store_b32 off, v20, s32 offset:80
+; GISEL-NEXT:    scratch_store_b32 off, v21, s32 offset:84
+; GISEL-NEXT:    scratch_store_b32 off, v22, s32 offset:88
+; GISEL-NEXT:    scratch_store_b32 off, v23, s32 offset:92
+; GISEL-NEXT:    scratch_store_b32 off, v24, s32 offset:96
+; GISEL-NEXT:    scratch_store_b32 off, v25, s32 offset:100
+; GISEL-NEXT:    scratch_store_b32 off, v26, s32 offset:104
+; GISEL-NEXT:    scratch_store_b32 off, v27, s32 offset:108
+; GISEL-NEXT:    scratch_store_b32 off, v28, s32 offset:112
+; GISEL-NEXT:    scratch_store_b32 off, v29, s32 offset:116
+; GISEL-NEXT:    scratch_store_b32 off, v30, s32 offset:120
+; GISEL-NEXT:    scratch_store_b32 off, v31, s32 offset:124
+; GISEL-NEXT:    s_clause 0x1f
+; GISEL-NEXT:    scratch_store_b32 off, v32, s32 offset:128
+; GISEL-NEXT:    scratch_store_b32 off, v33, s32 offset:132
+; GISEL-NEXT:    scratch_store_b32 off, v34, s32 offset:136
+; GISEL-NEXT:    scratch_store_b32 off, v35, s32 offset:140
+; GISEL-NEXT:    scratch_store_b32 off, v36, s32 offset:144
+; GISEL-NEXT:    scratch_store_b32 off, v37, s32 offset:148
+; GISEL-NEXT:    scratch_store_b32 off, v38, s32 offset:152
+; GISEL-NEXT:    scratch_store_b32 off, v39, s32 offset:156
+; GISEL-NEXT:    scratch_store_b32 off, v48, s32 offset:160
+; GISEL-NEXT:    scratch_store_b32 off, v49, s32 offset:164
+; GISEL-NEXT:    scratch_store_b32 off, v50, s32 offset:168
+; GISEL-NEXT:    scratch_store_b32 off, v51, s32 offset:172
+; GISEL-NEXT:    scratch_store_b32 off, v52, s32 offset:176
+; GISEL-NEXT:    scratch_store_b32 off, v53, s32 offset:180
+; GISEL-NEXT:    scratch_store_b32 off, v54, s32 offset:184
+; GISEL-NEXT:    scratch_store_b32 off, v55, s32 offset:188
+; GISEL-NEXT:    scratch_store_b32 off, v64, s32 offset:192
+; GISEL-NEXT:    scratch_store_b32 off, v65, s32 offset:196
+; GISEL-NEXT:    scratch_store_b32 off, v66, s32 offset:200
+; GISEL-NEXT:    scratch_store_b32 off, v67, s32 offset:204
+; GISEL-NEXT:    scratch_store_b32 off, v68, s32 offset:208
+; GISEL-NEXT:    scratch_store_b32 off, v69, s32 offset:212
+; GISEL-NEXT:    scratch_store_b32 off, v70, s32 offset:216
+; GISEL-NEXT:    scratch_store_b32 off, v71, s32 offset:220
+; GISEL-NEXT:    scratch_store_b32 off, v80, s32 offset:224
+; GISEL-NEXT:    scratch_store_b32 off, v81, s32 offset:228
+; GISEL-NEXT:    scratch_store_b32 off, v82, s32 offset:232
+; GISEL-NEXT:    scratch_store_b32 off, v83, s32 offset:236
+; GISEL-NEXT:    scratch_store_b32 off, v84, s32 offset:240
+; GISEL-NEXT:    scratch_store_b32 off, v85, s32 offset:244
+; GISEL-NEXT:    scratch_store_b32 off, v86, s32 offset:248
+; GISEL-NEXT:    scratch_store_b32 off, v87, s32 offset:252
+; GISEL-NEXT:    s_clause 0x1f
+; GISEL-NEXT:    scratch_store_b32 off, v96, s32 offset:256
+; GISEL-NEXT:    scratch_store_b32 off, v97, s32 offset:260
+; GISEL-NEXT:    scratch_store_b32 off, v98, s32 offset:264
+; GISEL-NEXT:    scratch_store_b32 off, v99, s32 offset:268
+; GISEL-NEXT:    scratch_store_b32 off, v100, s32 offset:272
+; GISEL-NEXT:    scratch_store_b32 off, v101, s32 offset:276
+; GISEL-NEXT:    scratch_store_b32 off, v102, s32 offset:280
+; GISEL-NEXT:    scratch_store_b32 off, v103, s32 offset:284
+; GISEL-NEXT:    scratch_store_b32 off, v112, s32 offset:288
+; GISEL-NEXT:    scratch_store_b32 off, v113, s32 offset:292
+; GISEL-NEXT:    scratch_store_b32 off, v114, s32 offset:296
+; GISEL-NEXT:    scratch_store_b32 off, v115, s32 offset:300
+; GISEL-NEXT:    scratch_store_b32 off, v116, s32 offset:304
+; GISEL-NEXT:    scratch_store_b32 off, v117, s32 offset:308
+; GISEL-NEXT:    scratch_store_b32 off, v118, s32 offset:312
+; GISEL-NEXT:    scratch_store_b32 off, v119, s32 offset:316
+; GISEL-NEXT:    scratch_store_b32 off, v128, s32 offset:320
+; GISEL-NEXT:    scratch_store_b32 off, v129, s32 offset:324
+; GISEL-NEXT:    scratch_store_b32 off, v130, s32 offset:328
+; GISEL-NEXT:    scratch_store_b32 off, v131, s32 offset:332
+; GISEL-NEXT:    scratch_store_b32 off, v132, s32 offset:336
+; GISEL-NEXT:    scratch_store_b32 off, v133, s32 offset:340
+; GISEL-NEXT:    scratch_store_b32 off, v134, s32 offset:344
+; GISEL-NEXT:    scratch_store_b32 off, v135, s32 offset:348
+; GISEL-NEXT:    scratch_store_b32 off, v144, s32 offset:352
+; GISEL-NEXT:    scratch_store_b32 off, v145, s32 offset:356
+; GISEL-NEXT:    scratch_store_b32 off, v146, s32 offset:360
+; GISEL-NEXT:    scratch_store_b32 off, v147, s32 offset:364
+; GISEL-NEXT:    scratch_store_b32 off, v148, s32 offset:368
+; GISEL-NEXT:    scratch_store_b32 off, v149, s32 offset:372
+; GISEL-NEXT:    scratch_store_b32 off, v150, s32 offset:376
+; GISEL-NEXT:    scratch_store_b32 off, v151, s32 offset:380
+; GISEL-NEXT:    s_clause 0x1f
+; GISEL-NEXT:    scratch_store_b32 off, v160, s32 offset:384
+; GISEL-NEXT:    scratch_store_b32 off, v161, s32 offset:388
+; GISEL-NEXT:    scratch_store_b32 off, v162, s32 offset:392
+; GISEL-NEXT:    scratch_store_b32 off, v163, s32 offset:396
+; GISEL-NEXT:    scratch_store_b32 off, v164, s32 offset:400
+; GISEL-NEXT:    scratch_store_b32 off, v165, s32 offset:404
+; GISEL-NEXT:    scratch_store_b32 off, v166, s32 offset:408
+; GISEL-NEXT:    scratch_store_b32 off, v167, s32 offset:412
+; GISEL-NEXT:    scratch_store_b32 off, v176, s32 offset:416
+; GISEL-NEXT:    scratch_store_b32 off, v177, s32 offset:420
+; GISEL-NEXT:    scratch_store_b32 off, v178, s32 offset:424
+; GISEL-NEXT:    scratch_store_b32 off, v179, s32 offset:428
+; GISEL-NEXT:    scratch_store_b32 off, v180, s32 offset:432
+; GISEL-NEXT:    scratch_store_b32 off, v181, s32 offset:436
+; GISEL-NEXT:    scratch_store_b32 off, v182, s32 offset:440
+; GISEL-NEXT:    scratch_store_b32 off, v183, s32 offset:444
+; GISEL-NEXT:    scratch_store_b32 off, v192, s32 offset:448
+; GISEL-NEXT:    scratch_store_b32 off, v193, s32 offset:452
+; GISEL-NEXT:    scratch_store_b32 off, v194, s32 offset:456
+; GISEL-NEXT:    scratch_store_b32 off, v195, s32 offset:460
+; GISEL-NEXT:    scratch_store_b32 off, v196, s32 offset:464
+; GISEL-NEXT:    scratch_store_b32 off, v197, s32 offset:468
+; GISEL-NEXT:    scratch_store_b32 off, v198, s32 offset:472
+; GISEL-NEXT:    scratch_store_b32 off, v199, s32 offset:476
+; GISEL-NEXT:    scratch_store_b32 off, v208, s32 offset:480
+; GISEL-NEXT:    scratch_store_b32 off, v209, s32 offset:484
+; GISEL-NEXT:    scratch_store_b32 off, v210, s32 offset:488
+; GISEL-NEXT:    scratch_store_b32 off, v211, s32 offset:492
+; GISEL-NEXT:    scratch_store_b32 off, v212, s32 offset:496
+; GISEL-NEXT:    scratch_store_b32 off, v213, s32 offset:500
+; GISEL-NEXT:    scratch_store_b32 off, v214, s32 offset:504
+; GISEL-NEXT:    scratch_store_b32 off, v215, s32 offset:508
+; GISEL-NEXT:    s_clause 0xf
+; GISEL-NEXT:    scratch_store_b32 off, v224, s32 offset:512
+; GISEL-NEXT:    scratch_store_b32 off, v225, s32 offset:516
+; GISEL-NEXT:    scratch_store_b32 off, v226, s32 offset:520
+; GISEL-NEXT:    scratch_store_b32 off, v227, s32 offset:524
+; GISEL-NEXT:    scratch_store_b32 off, v228, s32 offset:528
+; GISEL-NEXT:    scratch_store_b32 off, v229, s32 offset:532
+; GISEL-NEXT:    scratch_store_b32 off, v230, s32 offset:536
+; GISEL-NEXT:    scratch_store_b32 off, v231, s32 offset:540
+; GISEL-NEXT:    scratch_store_b32 off, v240, s32 offset:544
+; GISEL-NEXT:    scratch_store_b32 off, v241, s32 offset:548
+; GISEL-NEXT:    scratch_store_b32 off, v242, s32 offset:552
+; GISEL-NEXT:    scratch_store_b32 off, v243, s32 offset:556
+; GISEL-NEXT:    scratch_store_b32 off, v244, s32 offset:560
+; GISEL-NEXT:    scratch_store_b32 off, v245, s32 offset:564
+; GISEL-NEXT:    scratch_store_b32 off, v246, s32 offset:568
+; GISEL-NEXT:    scratch_store_b32 off, v247, s32 offset:572
+; GISEL-NEXT:    s_mov_b32 exec_lo, -1
+; GISEL-NEXT:    v_add_nc_u32_e32 v1, 13, v0
+; GISEL-NEXT:    s_mov_b32 s36, good_callee at abs32@lo
+; GISEL-NEXT:    s_mov_b32 s37, good_callee at abs32@hi
+; GISEL-NEXT:    s_wait_alu 0xfffe
+; GISEL-NEXT:    s_xor_b32 exec_lo, s0, -1
+; GISEL-NEXT:    s_clause 0x1f
+; GISEL-NEXT:    scratch_load_b32 v0, off, s32
+; GISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4
+; GISEL-NEXT:    scratch_load_b32 v2, off, s32 offset:8
+; GISEL-NEXT:    scratch_load_b32 v3, off, s32 offset:12
+; GISEL-NEXT:    scratch_load_b32 v4, off, s32 offset:16
+; GISEL-NEXT:    scratch_load_b32 v5, off, s32 offset:20
+; GISEL-NEXT:    scratch_load_b32 v6, off, s32 offset:24
+; GISEL-NEXT:    scratch_load_b32 v7, off, s32 offset:28
+; GISEL-NEXT:    scratch_load_b32 v8, off, s32 offset:32
+; GISEL-NEXT:    scratch_load_b32 v9, off, s32 offset:36
+; GISEL-NEXT:    scratch_load_b32 v10, off, s32 offset:40
+; GISEL-NEXT:    scratch_load_b32 v11, off, s32 offset:44
+; GISEL-NEXT:    scratch_load_b32 v12, off, s32 offset:48
+; GISEL-NEXT:    scratch_load_b32 v13, off, s32 offset:52
+; GISEL-NEXT:    scratch_load_b32 v14, off, s32 offset:56
+; GISEL-NEXT:    scratch_load_b32 v15, off, s32 offset:60
+; GISEL-NEXT:    scratch_load_b32 v16, off, s32 offset:64
+; GISEL-NEXT:    scratch_load_b32 v17, off, s32 offset:68
+; GISEL-NEXT:    scratch_load_b32 v18, off, s32 offset:72
+; GISEL-NEXT:    scratch_load_b32 v19, off, s32 offset:76
+; GISEL-NEXT:    scratch_load_b32 v20, off, s32 offset:80
+; GISEL-NEXT:    scratch_load_b32 v21, off, s32 offset:84
+; GISEL-NEXT:    scratch_load_b32 v22, off, s32 offset:88
+; GISEL-NEXT:    scratch_load_b32 v23, off, s32 offset:92
+; GISEL-NEXT:    scratch_load_b32 v24, off, s32 offset:96
+; GISEL-NEXT:    scratch_load_b32 v25, off, s32 offset:100
+; GISEL-NEXT:    scratch_load_b32 v26, off, s32 offset:104
+; GISEL-NEXT:    scratch_load_b32 v27, off, s32 offset:108
+; GISEL-NEXT:    scratch_load_b32 v28, off, s32 offset:112
+; GISEL-NEXT:    scratch_load_b32 v29, off, s32 offset:116
+; GISEL-NEXT:    scratch_load_b32 v30, off, s32 offset:120
+; GISEL-NEXT:    scratch_load_b32 v31, off, s32 offset:124
+; GISEL-NEXT:    s_clause 0x1f
+; GISEL-NEXT:    scratch_load_b32 v32, off, s32 offset:128
+; GISEL-NEXT:    scratch_load_b32 v33, off, s32 offset:132
+; GISEL-NEXT:    scratch_load_b32 v34, off, s32 offset:136
+; GISEL-NEXT:    scratch_load_b32 v35, off, s32 offset:140
+; GISEL-NEXT:    scratch_load_b32 v36, off, s32 offset:144
+; GISEL-NEXT:    scratch_load_b32 v37, off, s32 offset:148
+; GISEL-NEXT:    scratch_load_b32 v38, off, s32 offset:152
+; GISEL-NEXT:    scratch_load_b32 v39, off, s32 offset:156
+; GISEL-NEXT:    scratch_load_b32 v48, off, s32 offset:160
+; GISEL-NEXT:    scratch_load_b32 v49, off, s32 offset:164
+; GISEL-NEXT:    scratch_load_b32 v50, off, s32 offset:168
+; GISEL-NEXT:    scratch_load_b32 v51, off, s32 offset:172
+; GISEL-NEXT:    scratch_load_b32 v52, off, s32 offset:176
+; GISEL-NEXT:    scratch_load_b32 v53, off, s32 offset:180
+; GISEL-NEXT:    scratch_load_b32 v54, off, s32 offset:184
+; GISEL-NEXT:    scratch_load_b32 v55, off, s32 offset:188
+; GISEL-NEXT:    scratch_load_b32 v64, off, s32 offset:192
+; GISEL-NEXT:    scratch_load_b32 v65, off, s32 offset:196
+; GISEL-NEXT:    scratch_load_b32 v66, off, s32 offset:200
+; GISEL-NEXT:    scratch_load_b32 v67, off, s32 offset:204
+; GISEL-NEXT:    scratch_load_b32 v68, off, s32 offset:208
+; GISEL-NEXT:    scratch_load_b32 v69, off, s32 offset:212
+; GISEL-NEXT:    scratch_load_b32 v70, off, s32 offset:216
+; GISEL-NEXT:    scratch_load_b32 v71, off, s32 offset:220
+; GISEL-NEXT:    scratch_load_b32 v80, off, s32 offset:224
+; GISEL-NEXT:    scratch_load_b32 v81, off, s32 offset:228
+; GISEL-NEXT:    scratch_load_b32 v82, off, s32 offset:232
+; GISEL-NEXT:    scratch_load_b32 v83, off, s32 offset:236
+; GISEL-NEXT:    scratch_load_b32 v84, off, s32 offset:240
+; GISEL-NEXT:    scratch_load_b32 v85, off, s32 offset:244
+; GISEL-NEXT:    scratch_load_b32 v86, off, s32 offset:248
+; GISEL-NEXT:    scratch_load_b32 v87, off, s32 offset:252
+; GISEL-NEXT:    s_clause 0x1f
+; GISEL-NEXT:    scratch_load_b32 v96, off, s32 offset:256
+; GISEL-NEXT:    scratch_load_b32 v97, off, s32 offset:260
+; GISEL-NEXT:    scratch_load_b32 v98, off, s32 offset:264
+; GISEL-NEXT:    scratch_load_b32 v99, off, s32 offset:268
+; GISEL-NEXT:    scratch_load_b32 v100, off, s32 offset:272
+; GISEL-NEXT:    scratch_load_b32 v101, off, s32 offset:276
+; GISEL-NEXT:    scratch_load_b32 v102, off, s32 offset:280
+; GISEL-NEXT:    scratch_load_b32 v103, off, s32 offset:284
+; GISEL-NEXT:    scratch_load_b32 v112, off, s32 offset:288
+; GISEL-NEXT:    scratch_load_b32 v113, off, s32 offset:292
+; GISEL-NEXT:    scratch_load_b32 v114, off, s32 offset:296
+; GISEL-NEXT:    scratch_load_b32 v115, off, s32 offset:300
+; GISEL-NEXT:    scratch_load_b32 v116, off, s32 offset:304
+; GISEL-NEXT:    scratch_load_b32 v117, off, s32 offset:308
+; GISEL-NEXT:    scratch_load_b32 v118, off, s32 offset:312
+; GISEL-NEXT:    scratch_load_b32 v119, off, s32 offset:316
+; GISEL-NEXT:    scratch_load_b32 v128, off, s32 offset:320
+; GISEL-NEXT:    scratch_load_b32 v129, off, s32 offset:324
+; GISEL-NEXT:    scratch_load_b32 v130, off, s32 offset:328
+; GISEL-NEXT:    scratch_load_b32 v131, off, s32 offset:332
+; GISEL-NEXT:    scratch_load_b32 v132, off, s32 offset:336
+; GISEL-NEXT:    scratch_load_b32 v133, off, s32 offset:340
+; GISEL-NEXT:    scratch_load_b32 v134, off, s32 offset:344
+; GISEL-NEXT:    scratch_load_b32 v135, off, s32 offset:348
+; GISEL-NEXT:    scratch_load_b32 v144, off, s32 offset:352
+; GISEL-NEXT:    scratch_load_b32 v145, off, s32 offset:356
+; GISEL-NEXT:    scratch_load_b32 v146, off, s32 offset:360
+; GISEL-NEXT:    scratch_load_b32 v147, off, s32 offset:364
+; GISEL-NEXT:    scratch_load_b32 v148, off, s32 offset:368
+; GISEL-NEXT:    scratch_load_b32 v149, off, s32 offset:372
+; GISEL-NEXT:    scratch_load_b32 v150, off, s32 offset:376
+; GISEL-NEXT:    scratch_load_b32 v151, off, s32 offset:380
+; GISEL-NEXT:    s_clause 0x1f
+; GISEL-NEXT:    scratch_load_b32 v160, off, s32 offset:384
+; GISEL-NEXT:    scratch_load_b32 v161, off, s32 offset:388
+; GISEL-NEXT:    scratch_load_b32 v162, off, s32 offset:392
+; GISEL-NEXT:    scratch_load_b32 v163, off, s32 offset:396
+; GISEL-NEXT:    scratch_load_b32 v164, off, s32 offset:400
+; GISEL-NEXT:    scratch_load_b32 v165, off, s32 offset:404
+; GISEL-NEXT:    scratch_load_b32 v166, off, s32 offset:408
+; GISEL-NEXT:    scratch_load_b32 v167, off, s32 offset:412
+; GISEL-NEXT:    scratch_load_b32 v176, off, s32 offset:416
+; GISEL-NEXT:    scratch_load_b32 v177, off, s32 offset:420
+; GISEL-NEXT:    scratch_load_b32 v178, off, s32 offset:424
+; GISEL-NEXT:    scratch_load_b32 v179, off, s32 offset:428
+; GISEL-NEXT:    scratch_load_b32 v180, off, s32 offset:432
+; GISEL-NEXT:    scratch_load_b32 v181, off, s32 offset:436
+; GISEL-NEXT:    scratch_load_b32 v182, off, s32 offset:440
+; GISEL-NEXT:    scratch_load_b32 v183, off, s32 offset:444
+; GISEL-NEXT:    scratch_load_b32 v192, off, s32 offset:448
+; GISEL-NEXT:    scratch_load_b32 v193, off, s32 offset:452
+; GISEL-NEXT:    scratch_load_b32 v194, off, s32 offset:456
+; GISEL-NEXT:    scratch_load_b32 v195, off, s32 offset:460
+; GISEL-NEXT:    scratch_load_b32 v196, off, s32 offset:464
+; GISEL-NEXT:    scratch_load_b32 v197, off, s32 offset:468
+; GISEL-NEXT:    scratch_load_b32 v198, off, s32 offset:472
+; GISEL-NEXT:    scratch_load_b32 v199, off, s32 offset:476
+; GISEL-NEXT:    scratch_load_b32 v208, off, s32 offset:480
+; GISEL-NEXT:    scratch_load_b32 v209, off, s32 offset:484
+; GISEL-NEXT:    scratch_load_b32 v210, off, s32 offset:488
+; GISEL-NEXT:    scratch_load_b32 v211, off, s32 offset:492
+; GISEL-NEXT:    scratch_load_b32 v212, off, s32 offset:496
+; GISEL-NEXT:    scratch_load_b32 v213, off, s32 offset:500
+; GISEL-NEXT:    scratch_load_b32 v214, off, s32 offset:504
+; GISEL-NEXT:    scratch_load_b32 v215, off, s32 offset:508
+; GISEL-NEXT:    s_clause 0xf
+; GISEL-NEXT:    scratch_load_b32 v224, off, s32 offset:512
+; GISEL-NEXT:    scratch_load_b32 v225, off, s32 offset:516
+; GISEL-NEXT:    scratch_load_b32 v226, off, s32 offset:520
+; GISEL-NEXT:    scratch_load_b32 v227, off, s32 offset:524
+; GISEL-NEXT:    scratch_load_b32 v228, off, s32 offset:528
+; GISEL-NEXT:    scratch_load_b32 v229, off, s32 offset:532
+; GISEL-NEXT:    scratch_load_b32 v230, off, s32 offset:536
+; GISEL-NEXT:    scratch_load_b32 v231, off, s32 offset:540
+; GISEL-NEXT:    scratch_load_b32 v240, off, s32 offset:544
+; GISEL-NEXT:    scratch_load_b32 v241, off, s32 offset:548
+; GISEL-NEXT:    scratch_load_b32 v242, off, s32 offset:552
+; GISEL-NEXT:    scratch_load_b32 v243, off, s32 offset:556
+; GISEL-NEXT:    scratch_load_b32 v244, off, s32 offset:560
+; GISEL-NEXT:    scratch_load_b32 v245, off, s32 offset:564
+; GISEL-NEXT:    scratch_load_b32 v246, off, s32 offset:568
+; GISEL-NEXT:    scratch_load_b32 v247, off, s32 offset:572
+; GISEL-NEXT:    s_mov_b32 exec_lo, s0
+; GISEL-NEXT:    s_setpc_b64 s[36:37]
+  %y = add i32 %x, 13
+  %ret = tail call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @good_callee, i32 %x, i32 %y, i32 inreg %c)
+  ret i32 %ret
+}
+
 declare amdgpu_gfx_whole_wave void @void_callee(i1 %active, i32 %x)
 
 define amdgpu_gfx void @ret_void(i32 %x) {
diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir b/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir
index 93f489170cea0..adba762235d8c 100644
--- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir
+++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir
@@ -33,7 +33,7 @@ body:             |
     ; CHECK-NEXT: $exec_lo = S_XOR_B32 $sgpr0, -1, implicit-def $scc
     ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0
-    ; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0, implicit killed $vgpr0
+    ; CHECK-NEXT: SI_RETURN implicit killed $vgpr0
     renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 14, implicit $exec
     SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0, implicit killed $vgpr0
@@ -69,7 +69,7 @@ body:             |
     ; CHECK-NEXT: $vgpr40 = V_MOV_B32_e32 14, implicit $exec
     ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0
-    ; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0
+    ; CHECK-NEXT: SI_RETURN
     renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
     $vgpr40 = V_MOV_B32_e32 14, implicit $exec
     SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0
@@ -110,7 +110,7 @@ body:             |
     ; CHECK-NEXT: $exec_lo = S_XOR_B32 $vcc_lo, -1, implicit-def $scc
     ; CHECK-NEXT: $vgpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo
-    ; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $vcc_lo
+    ; CHECK-NEXT: SI_RETURN
     $vgpr192 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr192
     renamable $vcc_lo = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
     $sgpr20 = S_MOV_B32 14, implicit $exec
@@ -151,7 +151,7 @@ body:             |
     ; CHECK-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr191, 0
     ; CHECK-NEXT: $vgpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo
-    ; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $vcc_lo
+    ; CHECK-NEXT: SI_RETURN
     $vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191
     renamable $vcc_lo = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
     $sgpr20 = S_MOV_B32 14, implicit $exec
@@ -207,7 +207,7 @@ body:             |
     ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
     ; CHECK-NEXT: $vgpr49 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo
-    ; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $vcc_lo
+    ; CHECK-NEXT: SI_RETURN
     $vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191
     renamable $vcc_lo = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
     S_NOP 0, implicit-def $vgpr40, implicit-def $sgpr20
@@ -265,7 +265,7 @@ body:             |
     ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
     ; CHECK-NEXT: $vgpr49 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr3
-    ; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr3
+    ; CHECK-NEXT: SI_RETURN
     $vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191
     renamable $vcc_lo = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
     S_NOP 0, implicit-def $vgpr40, implicit-def $sgpr20
@@ -322,7 +322,7 @@ body:             |
     ; CHECK-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
     ; CHECK-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.4, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0
-    ; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0, implicit killed $vgpr0
+    ; CHECK-NEXT: SI_RETURN implicit killed $vgpr0
     renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
     $vgpr0 = V_MOV_B32_e32 14, implicit $exec
     S_NOP 0, implicit-def $vgpr2_vgpr3_vgpr4_vgpr5, implicit-def $vgpr40_vgpr41_vgpr42
@@ -363,7 +363,7 @@ body:             |
     ; CHECK-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; CHECK-NEXT: S_NOP 0, implicit $vgpr0, implicit $vgpr20, implicit $vgpr40
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0
-    ; CHECK-NEXT: SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0, implicit killed $vgpr0
+    ; CHECK-NEXT: SI_RETURN implicit killed $vgpr0
     renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
     S_NOP 0, implicit $vgpr0, implicit $vgpr20, implicit $vgpr40
     SI_WHOLE_WAVE_FUNC_RETURN killed renamable $sgpr0, implicit killed $vgpr0
@@ -422,7 +422,7 @@ body:             |
   ; CHECK-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
   ; CHECK-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
   ; CHECK-NEXT:   $exec_lo = S_MOV_B32 $vcc_lo
-  ; CHECK-NEXT:   SI_WHOLE_WAVE_FUNC_RETURN killed renamable $vcc_lo, implicit $vgpr0
+  ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
   bb.0:
     successors: %bb.1, %bb.2
     liveins: $vgpr0, $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
index 3c8478c5a885b..c478f6f3c8dfb 100644
--- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
+++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
@@ -2413,6 +2413,1681 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2
   ret <2 x half> %ret
 }
 
+define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %active, <2 x half> %x, <2 x half> %y) {
+  ; This should not be turned into a tail call.
+; DAGISEL-LABEL: tail_call_gfx_from_whole_wave:
+; DAGISEL:       ; %bb.0:
+; DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; DAGISEL-NEXT:    s_wait_expcnt 0x0
+; DAGISEL-NEXT:    s_wait_samplecnt 0x0
+; DAGISEL-NEXT:    s_wait_bvhcnt 0x0
+; DAGISEL-NEXT:    s_wait_kmcnt 0x0
+; DAGISEL-NEXT:    s_mov_b32 s35, s33
+; DAGISEL-NEXT:    s_mov_b32 s33, s32
+; DAGISEL-NEXT:    s_xor_saveexec_b32 s34, -1
+; DAGISEL-NEXT:    s_clause 0x1f
+; DAGISEL-NEXT:    scratch_store_b32 off, v0, s33 offset:8
+; DAGISEL-NEXT:    scratch_store_b32 off, v1, s33 offset:12
+; DAGISEL-NEXT:    scratch_store_b32 off, v2, s33 offset:16
+; DAGISEL-NEXT:    scratch_store_b32 off, v3, s33 offset:20
+; DAGISEL-NEXT:    scratch_store_b32 off, v4, s33 offset:24
+; DAGISEL-NEXT:    scratch_store_b32 off, v5, s33 offset:28
+; DAGISEL-NEXT:    scratch_store_b32 off, v6, s33 offset:32
+; DAGISEL-NEXT:    scratch_store_b32 off, v7, s33 offset:36
+; DAGISEL-NEXT:    scratch_store_b32 off, v8, s33 offset:40
+; DAGISEL-NEXT:    scratch_store_b32 off, v9, s33 offset:44
+; DAGISEL-NEXT:    scratch_store_b32 off, v10, s33 offset:48
+; DAGISEL-NEXT:    scratch_store_b32 off, v11, s33 offset:52
+; DAGISEL-NEXT:    scratch_store_b32 off, v12, s33 offset:56
+; DAGISEL-NEXT:    scratch_store_b32 off, v13, s33 offset:60
+; DAGISEL-NEXT:    scratch_store_b32 off, v14, s33 offset:64
+; DAGISEL-NEXT:    scratch_store_b32 off, v15, s33 offset:68
+; DAGISEL-NEXT:    scratch_store_b32 off, v16, s33 offset:72
+; DAGISEL-NEXT:    scratch_store_b32 off, v17, s33 offset:76
+; DAGISEL-NEXT:    scratch_store_b32 off, v18, s33 offset:80
+; DAGISEL-NEXT:    scratch_store_b32 off, v19, s33 offset:84
+; DAGISEL-NEXT:    scratch_store_b32 off, v20, s33 offset:88
+; DAGISEL-NEXT:    scratch_store_b32 off, v21, s33 offset:92
+; DAGISEL-NEXT:    scratch_store_b32 off, v22, s33 offset:96
+; DAGISEL-NEXT:    scratch_store_b32 off, v23, s33 offset:100
+; DAGISEL-NEXT:    scratch_store_b32 off, v24, s33 offset:104
+; DAGISEL-NEXT:    scratch_store_b32 off, v25, s33 offset:108
+; DAGISEL-NEXT:    scratch_store_b32 off, v26, s33 offset:112
+; DAGISEL-NEXT:    scratch_store_b32 off, v27, s33 offset:116
+; DAGISEL-NEXT:    scratch_store_b32 off, v28, s33 offset:120
+; DAGISEL-NEXT:    scratch_store_b32 off, v29, s33 offset:124
+; DAGISEL-NEXT:    scratch_store_b32 off, v30, s33 offset:128
+; DAGISEL-NEXT:    scratch_store_b32 off, v31, s33 offset:132
+; DAGISEL-NEXT:    s_clause 0x1f
+; DAGISEL-NEXT:    scratch_store_b32 off, v32, s33 offset:136
+; DAGISEL-NEXT:    scratch_store_b32 off, v33, s33 offset:140
+; DAGISEL-NEXT:    scratch_store_b32 off, v34, s33 offset:144
+; DAGISEL-NEXT:    scratch_store_b32 off, v35, s33 offset:148
+; DAGISEL-NEXT:    scratch_store_b32 off, v36, s33 offset:152
+; DAGISEL-NEXT:    scratch_store_b32 off, v37, s33 offset:156
+; DAGISEL-NEXT:    scratch_store_b32 off, v38, s33 offset:160
+; DAGISEL-NEXT:    scratch_store_b32 off, v39, s33 offset:164
+; DAGISEL-NEXT:    scratch_store_b32 off, v48, s33 offset:168
+; DAGISEL-NEXT:    scratch_store_b32 off, v49, s33 offset:172
+; DAGISEL-NEXT:    scratch_store_b32 off, v50, s33 offset:176
+; DAGISEL-NEXT:    scratch_store_b32 off, v51, s33 offset:180
+; DAGISEL-NEXT:    scratch_store_b32 off, v52, s33 offset:184
+; DAGISEL-NEXT:    scratch_store_b32 off, v53, s33 offset:188
+; DAGISEL-NEXT:    scratch_store_b32 off, v54, s33 offset:192
+; DAGISEL-NEXT:    scratch_store_b32 off, v55, s33 offset:196
+; DAGISEL-NEXT:    scratch_store_b32 off, v64, s33 offset:200
+; DAGISEL-NEXT:    scratch_store_b32 off, v65, s33 offset:204
+; DAGISEL-NEXT:    scratch_store_b32 off, v66, s33 offset:208
+; DAGISEL-NEXT:    scratch_store_b32 off, v67, s33 offset:212
+; DAGISEL-NEXT:    scratch_store_b32 off, v68, s33 offset:216
+; DAGISEL-NEXT:    scratch_store_b32 off, v69, s33 offset:220
+; DAGISEL-NEXT:    scratch_store_b32 off, v70, s33 offset:224
+; DAGISEL-NEXT:    scratch_store_b32 off, v71, s33 offset:228
+; DAGISEL-NEXT:    scratch_store_b32 off, v80, s33 offset:232
+; DAGISEL-NEXT:    scratch_store_b32 off, v81, s33 offset:236
+; DAGISEL-NEXT:    scratch_store_b32 off, v82, s33 offset:240
+; DAGISEL-NEXT:    scratch_store_b32 off, v83, s33 offset:244
+; DAGISEL-NEXT:    scratch_store_b32 off, v84, s33 offset:248
+; DAGISEL-NEXT:    scratch_store_b32 off, v85, s33 offset:252
+; DAGISEL-NEXT:    scratch_store_b32 off, v86, s33 offset:256
+; DAGISEL-NEXT:    scratch_store_b32 off, v87, s33 offset:260
+; DAGISEL-NEXT:    s_clause 0x1f
+; DAGISEL-NEXT:    scratch_store_b32 off, v96, s33 offset:264
+; DAGISEL-NEXT:    scratch_store_b32 off, v97, s33 offset:268
+; DAGISEL-NEXT:    scratch_store_b32 off, v98, s33 offset:272
+; DAGISEL-NEXT:    scratch_store_b32 off, v99, s33 offset:276
+; DAGISEL-NEXT:    scratch_store_b32 off, v100, s33 offset:280
+; DAGISEL-NEXT:    scratch_store_b32 off, v101, s33 offset:284
+; DAGISEL-NEXT:    scratch_store_b32 off, v102, s33 offset:288
+; DAGISEL-NEXT:    scratch_store_b32 off, v103, s33 offset:292
+; DAGISEL-NEXT:    scratch_store_b32 off, v112, s33 offset:296
+; DAGISEL-NEXT:    scratch_store_b32 off, v113, s33 offset:300
+; DAGISEL-NEXT:    scratch_store_b32 off, v114, s33 offset:304
+; DAGISEL-NEXT:    scratch_store_b32 off, v115, s33 offset:308
+; DAGISEL-NEXT:    scratch_store_b32 off, v116, s33 offset:312
+; DAGISEL-NEXT:    scratch_store_b32 off, v117, s33 offset:316
+; DAGISEL-NEXT:    scratch_store_b32 off, v118, s33 offset:320
+; DAGISEL-NEXT:    scratch_store_b32 off, v119, s33 offset:324
+; DAGISEL-NEXT:    scratch_store_b32 off, v128, s33 offset:328
+; DAGISEL-NEXT:    scratch_store_b32 off, v129, s33 offset:332
+; DAGISEL-NEXT:    scratch_store_b32 off, v130, s33 offset:336
+; DAGISEL-NEXT:    scratch_store_b32 off, v131, s33 offset:340
+; DAGISEL-NEXT:    scratch_store_b32 off, v132, s33 offset:344
+; DAGISEL-NEXT:    scratch_store_b32 off, v133, s33 offset:348
+; DAGISEL-NEXT:    scratch_store_b32 off, v134, s33 offset:352
+; DAGISEL-NEXT:    scratch_store_b32 off, v135, s33 offset:356
+; DAGISEL-NEXT:    scratch_store_b32 off, v144, s33 offset:360
+; DAGISEL-NEXT:    scratch_store_b32 off, v145, s33 offset:364
+; DAGISEL-NEXT:    scratch_store_b32 off, v146, s33 offset:368
+; DAGISEL-NEXT:    scratch_store_b32 off, v147, s33 offset:372
+; DAGISEL-NEXT:    scratch_store_b32 off, v148, s33 offset:376
+; DAGISEL-NEXT:    scratch_store_b32 off, v149, s33 offset:380
+; DAGISEL-NEXT:    scratch_store_b32 off, v150, s33 offset:384
+; DAGISEL-NEXT:    scratch_store_b32 off, v151, s33 offset:388
+; DAGISEL-NEXT:    s_clause 0x1f
+; DAGISEL-NEXT:    scratch_store_b32 off, v160, s33 offset:392
+; DAGISEL-NEXT:    scratch_store_b32 off, v161, s33 offset:396
+; DAGISEL-NEXT:    scratch_store_b32 off, v162, s33 offset:400
+; DAGISEL-NEXT:    scratch_store_b32 off, v163, s33 offset:404
+; DAGISEL-NEXT:    scratch_store_b32 off, v164, s33 offset:408
+; DAGISEL-NEXT:    scratch_store_b32 off, v165, s33 offset:412
+; DAGISEL-NEXT:    scratch_store_b32 off, v166, s33 offset:416
+; DAGISEL-NEXT:    scratch_store_b32 off, v167, s33 offset:420
+; DAGISEL-NEXT:    scratch_store_b32 off, v176, s33 offset:424
+; DAGISEL-NEXT:    scratch_store_b32 off, v177, s33 offset:428
+; DAGISEL-NEXT:    scratch_store_b32 off, v178, s33 offset:432
+; DAGISEL-NEXT:    scratch_store_b32 off, v179, s33 offset:436
+; DAGISEL-NEXT:    scratch_store_b32 off, v180, s33 offset:440
+; DAGISEL-NEXT:    scratch_store_b32 off, v181, s33 offset:444
+; DAGISEL-NEXT:    scratch_store_b32 off, v182, s33 offset:448
+; DAGISEL-NEXT:    scratch_store_b32 off, v183, s33 offset:452
+; DAGISEL-NEXT:    scratch_store_b32 off, v192, s33 offset:456
+; DAGISEL-NEXT:    scratch_store_b32 off, v193, s33 offset:460
+; DAGISEL-NEXT:    scratch_store_b32 off, v194, s33 offset:464
+; DAGISEL-NEXT:    scratch_store_b32 off, v195, s33 offset:468
+; DAGISEL-NEXT:    scratch_store_b32 off, v196, s33 offset:472
+; DAGISEL-NEXT:    scratch_store_b32 off, v197, s33 offset:476
+; DAGISEL-NEXT:    scratch_store_b32 off, v198, s33 offset:480
+; DAGISEL-NEXT:    scratch_store_b32 off, v199, s33 offset:484
+; DAGISEL-NEXT:    scratch_store_b32 off, v208, s33 offset:488
+; DAGISEL-NEXT:    scratch_store_b32 off, v209, s33 offset:492
+; DAGISEL-NEXT:    scratch_store_b32 off, v210, s33 offset:496
+; DAGISEL-NEXT:    scratch_store_b32 off, v211, s33 offset:500
+; DAGISEL-NEXT:    scratch_store_b32 off, v212, s33 offset:504
+; DAGISEL-NEXT:    scratch_store_b32 off, v213, s33 offset:508
+; DAGISEL-NEXT:    scratch_store_b32 off, v214, s33 offset:512
+; DAGISEL-NEXT:    scratch_store_b32 off, v215, s33 offset:516
+; DAGISEL-NEXT:    s_clause 0xf
+; DAGISEL-NEXT:    scratch_store_b32 off, v224, s33 offset:520
+; DAGISEL-NEXT:    scratch_store_b32 off, v225, s33 offset:524
+; DAGISEL-NEXT:    scratch_store_b32 off, v226, s33 offset:528
+; DAGISEL-NEXT:    scratch_store_b32 off, v227, s33 offset:532
+; DAGISEL-NEXT:    scratch_store_b32 off, v228, s33 offset:536
+; DAGISEL-NEXT:    scratch_store_b32 off, v229, s33 offset:540
+; DAGISEL-NEXT:    scratch_store_b32 off, v230, s33 offset:544
+; DAGISEL-NEXT:    scratch_store_b32 off, v231, s33 offset:548
+; DAGISEL-NEXT:    scratch_store_b32 off, v240, s33 offset:552
+; DAGISEL-NEXT:    scratch_store_b32 off, v241, s33 offset:556
+; DAGISEL-NEXT:    scratch_store_b32 off, v242, s33 offset:560
+; DAGISEL-NEXT:    scratch_store_b32 off, v243, s33 offset:564
+; DAGISEL-NEXT:    scratch_store_b32 off, v244, s33 offset:568
+; DAGISEL-NEXT:    scratch_store_b32 off, v245, s33 offset:572
+; DAGISEL-NEXT:    scratch_store_b32 off, v246, s33 offset:576
+; DAGISEL-NEXT:    scratch_store_b32 off, v247, s33 offset:580
+; DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
+; DAGISEL-NEXT:    s_clause 0x1
+; DAGISEL-NEXT:    scratch_store_b32 off, v40, s33
+; DAGISEL-NEXT:    scratch_store_b32 off, v41, s33 offset:4
+; DAGISEL-NEXT:    v_writelane_b32 v40, s4, 0
+; DAGISEL-NEXT:    v_writelane_b32 v41, s76, 0
+; DAGISEL-NEXT:    v_mov_b32_e32 v2, v0
+; DAGISEL-NEXT:    v_swap_b32 v0, v1
+; DAGISEL-NEXT:    v_writelane_b32 v40, s5, 1
+; DAGISEL-NEXT:    v_writelane_b32 v41, s77, 1
+; DAGISEL-NEXT:    s_mov_b32 s1, gfx_callee at abs32@hi
+; DAGISEL-NEXT:    s_mov_b32 s0, gfx_callee at abs32@lo
+; DAGISEL-NEXT:    s_addk_co_i32 s32, 0x250
+; DAGISEL-NEXT:    v_writelane_b32 v40, s6, 2
+; DAGISEL-NEXT:    v_writelane_b32 v41, s78, 2
+; DAGISEL-NEXT:    v_writelane_b32 v40, s7, 3
+; DAGISEL-NEXT:    v_writelane_b32 v41, s79, 3
+; DAGISEL-NEXT:    v_writelane_b32 v40, s8, 4
+; DAGISEL-NEXT:    v_writelane_b32 v41, s88, 4
+; DAGISEL-NEXT:    v_writelane_b32 v40, s9, 5
+; DAGISEL-NEXT:    v_writelane_b32 v41, s89, 5
+; DAGISEL-NEXT:    s_mov_b64 s[8:9], 0
+; DAGISEL-NEXT:    v_writelane_b32 v40, s10, 6
+; DAGISEL-NEXT:    v_writelane_b32 v41, s90, 6
+; DAGISEL-NEXT:    v_writelane_b32 v40, s11, 7
+; DAGISEL-NEXT:    v_writelane_b32 v41, s91, 7
+; DAGISEL-NEXT:    v_writelane_b32 v40, s12, 8
+; DAGISEL-NEXT:    v_writelane_b32 v41, s92, 8
+; DAGISEL-NEXT:    v_writelane_b32 v40, s13, 9
+; DAGISEL-NEXT:    v_writelane_b32 v41, s93, 9
+; DAGISEL-NEXT:    v_writelane_b32 v40, s14, 10
+; DAGISEL-NEXT:    v_writelane_b32 v41, s94, 10
+; DAGISEL-NEXT:    v_writelane_b32 v40, s15, 11
+; DAGISEL-NEXT:    v_writelane_b32 v41, s95, 11
+; DAGISEL-NEXT:    v_writelane_b32 v40, s16, 12
+; DAGISEL-NEXT:    v_writelane_b32 v40, s17, 13
+; DAGISEL-NEXT:    v_writelane_b32 v40, s18, 14
+; DAGISEL-NEXT:    v_writelane_b32 v40, s19, 15
+; DAGISEL-NEXT:    v_writelane_b32 v40, s20, 16
+; DAGISEL-NEXT:    v_writelane_b32 v40, s21, 17
+; DAGISEL-NEXT:    v_writelane_b32 v40, s22, 18
+; DAGISEL-NEXT:    v_writelane_b32 v40, s23, 19
+; DAGISEL-NEXT:    v_writelane_b32 v40, s24, 20
+; DAGISEL-NEXT:    v_writelane_b32 v40, s25, 21
+; DAGISEL-NEXT:    v_writelane_b32 v40, s26, 22
+; DAGISEL-NEXT:    v_writelane_b32 v40, s27, 23
+; DAGISEL-NEXT:    v_writelane_b32 v40, s28, 24
+; DAGISEL-NEXT:    v_writelane_b32 v40, s29, 25
+; DAGISEL-NEXT:    v_writelane_b32 v40, s30, 26
+; DAGISEL-NEXT:    v_writelane_b32 v40, s31, 27
+; DAGISEL-NEXT:    v_writelane_b32 v40, s72, 28
+; DAGISEL-NEXT:    v_writelane_b32 v40, s73, 29
+; DAGISEL-NEXT:    v_writelane_b32 v40, s74, 30
+; DAGISEL-NEXT:    v_writelane_b32 v40, s75, 31
+; DAGISEL-NEXT:    s_wait_alu 0xfffe
+; DAGISEL-NEXT:    s_swappc_b64 s[30:31], s[0:1]
+; DAGISEL-NEXT:    v_readlane_b32 s95, v41, 11
+; DAGISEL-NEXT:    v_readlane_b32 s94, v41, 10
+; DAGISEL-NEXT:    v_readlane_b32 s93, v41, 9
+; DAGISEL-NEXT:    v_readlane_b32 s92, v41, 8
+; DAGISEL-NEXT:    v_readlane_b32 s91, v41, 7
+; DAGISEL-NEXT:    v_readlane_b32 s90, v41, 6
+; DAGISEL-NEXT:    v_readlane_b32 s89, v41, 5
+; DAGISEL-NEXT:    v_readlane_b32 s88, v41, 4
+; DAGISEL-NEXT:    v_readlane_b32 s79, v41, 3
+; DAGISEL-NEXT:    v_readlane_b32 s78, v41, 2
+; DAGISEL-NEXT:    v_readlane_b32 s77, v41, 1
+; DAGISEL-NEXT:    v_readlane_b32 s76, v41, 0
+; DAGISEL-NEXT:    v_readlane_b32 s75, v40, 31
+; DAGISEL-NEXT:    v_readlane_b32 s74, v40, 30
+; DAGISEL-NEXT:    v_readlane_b32 s73, v40, 29
+; DAGISEL-NEXT:    v_readlane_b32 s72, v40, 28
+; DAGISEL-NEXT:    v_readlane_b32 s31, v40, 27
+; DAGISEL-NEXT:    v_readlane_b32 s30, v40, 26
+; DAGISEL-NEXT:    v_readlane_b32 s29, v40, 25
+; DAGISEL-NEXT:    v_readlane_b32 s28, v40, 24
+; DAGISEL-NEXT:    v_readlane_b32 s27, v40, 23
+; DAGISEL-NEXT:    v_readlane_b32 s26, v40, 22
+; DAGISEL-NEXT:    v_readlane_b32 s25, v40, 21
+; DAGISEL-NEXT:    v_readlane_b32 s24, v40, 20
+; DAGISEL-NEXT:    v_readlane_b32 s23, v40, 19
+; DAGISEL-NEXT:    v_readlane_b32 s22, v40, 18
+; DAGISEL-NEXT:    v_readlane_b32 s21, v40, 17
+; DAGISEL-NEXT:    v_readlane_b32 s20, v40, 16
+; DAGISEL-NEXT:    v_readlane_b32 s19, v40, 15
+; DAGISEL-NEXT:    v_readlane_b32 s18, v40, 14
+; DAGISEL-NEXT:    v_readlane_b32 s17, v40, 13
+; DAGISEL-NEXT:    v_readlane_b32 s16, v40, 12
+; DAGISEL-NEXT:    v_readlane_b32 s15, v40, 11
+; DAGISEL-NEXT:    v_readlane_b32 s14, v40, 10
+; DAGISEL-NEXT:    v_readlane_b32 s13, v40, 9
+; DAGISEL-NEXT:    v_readlane_b32 s12, v40, 8
+; DAGISEL-NEXT:    v_readlane_b32 s11, v40, 7
+; DAGISEL-NEXT:    v_readlane_b32 s10, v40, 6
+; DAGISEL-NEXT:    v_readlane_b32 s9, v40, 5
+; DAGISEL-NEXT:    v_readlane_b32 s8, v40, 4
+; DAGISEL-NEXT:    v_readlane_b32 s7, v40, 3
+; DAGISEL-NEXT:    v_readlane_b32 s6, v40, 2
+; DAGISEL-NEXT:    v_readlane_b32 s5, v40, 1
+; DAGISEL-NEXT:    v_readlane_b32 s4, v40, 0
+; DAGISEL-NEXT:    s_clause 0x1
+; DAGISEL-NEXT:    scratch_load_b32 v40, off, s33
+; DAGISEL-NEXT:    scratch_load_b32 v41, off, s33 offset:4
+; DAGISEL-NEXT:    s_mov_b32 s32, s33
+; DAGISEL-NEXT:    s_xor_b32 exec_lo, s34, -1
+; DAGISEL-NEXT:    s_clause 0x1f
+; DAGISEL-NEXT:    scratch_load_b32 v0, off, s33 offset:8
+; DAGISEL-NEXT:    scratch_load_b32 v1, off, s33 offset:12
+; DAGISEL-NEXT:    scratch_load_b32 v2, off, s33 offset:16
+; DAGISEL-NEXT:    scratch_load_b32 v3, off, s33 offset:20
+; DAGISEL-NEXT:    scratch_load_b32 v4, off, s33 offset:24
+; DAGISEL-NEXT:    scratch_load_b32 v5, off, s33 offset:28
+; DAGISEL-NEXT:    scratch_load_b32 v6, off, s33 offset:32
+; DAGISEL-NEXT:    scratch_load_b32 v7, off, s33 offset:36
+; DAGISEL-NEXT:    scratch_load_b32 v8, off, s33 offset:40
+; DAGISEL-NEXT:    scratch_load_b32 v9, off, s33 offset:44
+; DAGISEL-NEXT:    scratch_load_b32 v10, off, s33 offset:48
+; DAGISEL-NEXT:    scratch_load_b32 v11, off, s33 offset:52
+; DAGISEL-NEXT:    scratch_load_b32 v12, off, s33 offset:56
+; DAGISEL-NEXT:    scratch_load_b32 v13, off, s33 offset:60
+; DAGISEL-NEXT:    scratch_load_b32 v14, off, s33 offset:64
+; DAGISEL-NEXT:    scratch_load_b32 v15, off, s33 offset:68
+; DAGISEL-NEXT:    scratch_load_b32 v16, off, s33 offset:72
+; DAGISEL-NEXT:    scratch_load_b32 v17, off, s33 offset:76
+; DAGISEL-NEXT:    scratch_load_b32 v18, off, s33 offset:80
+; DAGISEL-NEXT:    scratch_load_b32 v19, off, s33 offset:84
+; DAGISEL-NEXT:    scratch_load_b32 v20, off, s33 offset:88
+; DAGISEL-NEXT:    scratch_load_b32 v21, off, s33 offset:92
+; DAGISEL-NEXT:    scratch_load_b32 v22, off, s33 offset:96
+; DAGISEL-NEXT:    scratch_load_b32 v23, off, s33 offset:100
+; DAGISEL-NEXT:    scratch_load_b32 v24, off, s33 offset:104
+; DAGISEL-NEXT:    scratch_load_b32 v25, off, s33 offset:108
+; DAGISEL-NEXT:    scratch_load_b32 v26, off, s33 offset:112
+; DAGISEL-NEXT:    scratch_load_b32 v27, off, s33 offset:116
+; DAGISEL-NEXT:    scratch_load_b32 v28, off, s33 offset:120
+; DAGISEL-NEXT:    scratch_load_b32 v29, off, s33 offset:124
+; DAGISEL-NEXT:    scratch_load_b32 v30, off, s33 offset:128
+; DAGISEL-NEXT:    scratch_load_b32 v31, off, s33 offset:132
+; DAGISEL-NEXT:    s_clause 0x1f
+; DAGISEL-NEXT:    scratch_load_b32 v32, off, s33 offset:136
+; DAGISEL-NEXT:    scratch_load_b32 v33, off, s33 offset:140
+; DAGISEL-NEXT:    scratch_load_b32 v34, off, s33 offset:144
+; DAGISEL-NEXT:    scratch_load_b32 v35, off, s33 offset:148
+; DAGISEL-NEXT:    scratch_load_b32 v36, off, s33 offset:152
+; DAGISEL-NEXT:    scratch_load_b32 v37, off, s33 offset:156
+; DAGISEL-NEXT:    scratch_load_b32 v38, off, s33 offset:160
+; DAGISEL-NEXT:    scratch_load_b32 v39, off, s33 offset:164
+; DAGISEL-NEXT:    scratch_load_b32 v48, off, s33 offset:168
+; DAGISEL-NEXT:    scratch_load_b32 v49, off, s33 offset:172
+; DAGISEL-NEXT:    scratch_load_b32 v50, off, s33 offset:176
+; DAGISEL-NEXT:    scratch_load_b32 v51, off, s33 offset:180
+; DAGISEL-NEXT:    scratch_load_b32 v52, off, s33 offset:184
+; DAGISEL-NEXT:    scratch_load_b32 v53, off, s33 offset:188
+; DAGISEL-NEXT:    scratch_load_b32 v54, off, s33 offset:192
+; DAGISEL-NEXT:    scratch_load_b32 v55, off, s33 offset:196
+; DAGISEL-NEXT:    scratch_load_b32 v64, off, s33 offset:200
+; DAGISEL-NEXT:    scratch_load_b32 v65, off, s33 offset:204
+; DAGISEL-NEXT:    scratch_load_b32 v66, off, s33 offset:208
+; DAGISEL-NEXT:    scratch_load_b32 v67, off, s33 offset:212
+; DAGISEL-NEXT:    scratch_load_b32 v68, off, s33 offset:216
+; DAGISEL-NEXT:    scratch_load_b32 v69, off, s33 offset:220
+; DAGISEL-NEXT:    scratch_load_b32 v70, off, s33 offset:224
+; DAGISEL-NEXT:    scratch_load_b32 v71, off, s33 offset:228
+; DAGISEL-NEXT:    scratch_load_b32 v80, off, s33 offset:232
+; DAGISEL-NEXT:    scratch_load_b32 v81, off, s33 offset:236
+; DAGISEL-NEXT:    scratch_load_b32 v82, off, s33 offset:240
+; DAGISEL-NEXT:    scratch_load_b32 v83, off, s33 offset:244
+; DAGISEL-NEXT:    scratch_load_b32 v84, off, s33 offset:248
+; DAGISEL-NEXT:    scratch_load_b32 v85, off, s33 offset:252
+; DAGISEL-NEXT:    scratch_load_b32 v86, off, s33 offset:256
+; DAGISEL-NEXT:    scratch_load_b32 v87, off, s33 offset:260
+; DAGISEL-NEXT:    s_clause 0x1f
+; DAGISEL-NEXT:    scratch_load_b32 v96, off, s33 offset:264
+; DAGISEL-NEXT:    scratch_load_b32 v97, off, s33 offset:268
+; DAGISEL-NEXT:    scratch_load_b32 v98, off, s33 offset:272
+; DAGISEL-NEXT:    scratch_load_b32 v99, off, s33 offset:276
+; DAGISEL-NEXT:    scratch_load_b32 v100, off, s33 offset:280
+; DAGISEL-NEXT:    scratch_load_b32 v101, off, s33 offset:284
+; DAGISEL-NEXT:    scratch_load_b32 v102, off, s33 offset:288
+; DAGISEL-NEXT:    scratch_load_b32 v103, off, s33 offset:292
+; DAGISEL-NEXT:    scratch_load_b32 v112, off, s33 offset:296
+; DAGISEL-NEXT:    scratch_load_b32 v113, off, s33 offset:300
+; DAGISEL-NEXT:    scratch_load_b32 v114, off, s33 offset:304
+; DAGISEL-NEXT:    scratch_load_b32 v115, off, s33 offset:308
+; DAGISEL-NEXT:    scratch_load_b32 v116, off, s33 offset:312
+; DAGISEL-NEXT:    scratch_load_b32 v117, off, s33 offset:316
+; DAGISEL-NEXT:    scratch_load_b32 v118, off, s33 offset:320
+; DAGISEL-NEXT:    scratch_load_b32 v119, off, s33 offset:324
+; DAGISEL-NEXT:    scratch_load_b32 v128, off, s33 offset:328
+; DAGISEL-NEXT:    scratch_load_b32 v129, off, s33 offset:332
+; DAGISEL-NEXT:    scratch_load_b32 v130, off, s33 offset:336
+; DAGISEL-NEXT:    scratch_load_b32 v131, off, s33 offset:340
+; DAGISEL-NEXT:    scratch_load_b32 v132, off, s33 offset:344
+; DAGISEL-NEXT:    scratch_load_b32 v133, off, s33 offset:348
+; DAGISEL-NEXT:    scratch_load_b32 v134, off, s33 offset:352
+; DAGISEL-NEXT:    scratch_load_b32 v135, off, s33 offset:356
+; DAGISEL-NEXT:    scratch_load_b32 v144, off, s33 offset:360
+; DAGISEL-NEXT:    scratch_load_b32 v145, off, s33 offset:364
+; DAGISEL-NEXT:    scratch_load_b32 v146, off, s33 offset:368
+; DAGISEL-NEXT:    scratch_load_b32 v147, off, s33 offset:372
+; DAGISEL-NEXT:    scratch_load_b32 v148, off, s33 offset:376
+; DAGISEL-NEXT:    scratch_load_b32 v149, off, s33 offset:380
+; DAGISEL-NEXT:    scratch_load_b32 v150, off, s33 offset:384
+; DAGISEL-NEXT:    scratch_load_b32 v151, off, s33 offset:388
+; DAGISEL-NEXT:    s_clause 0x1f
+; DAGISEL-NEXT:    scratch_load_b32 v160, off, s33 offset:392
+; DAGISEL-NEXT:    scratch_load_b32 v161, off, s33 offset:396
+; DAGISEL-NEXT:    scratch_load_b32 v162, off, s33 offset:400
+; DAGISEL-NEXT:    scratch_load_b32 v163, off, s33 offset:404
+; DAGISEL-NEXT:    scratch_load_b32 v164, off, s33 offset:408
+; DAGISEL-NEXT:    scratch_load_b32 v165, off, s33 offset:412
+; DAGISEL-NEXT:    scratch_load_b32 v166, off, s33 offset:416
+; DAGISEL-NEXT:    scratch_load_b32 v167, off, s33 offset:420
+; DAGISEL-NEXT:    scratch_load_b32 v176, off, s33 offset:424
+; DAGISEL-NEXT:    scratch_load_b32 v177, off, s33 offset:428
+; DAGISEL-NEXT:    scratch_load_b32 v178, off, s33 offset:432
+; DAGISEL-NEXT:    scratch_load_b32 v179, off, s33 offset:436
+; DAGISEL-NEXT:    scratch_load_b32 v180, off, s33 offset:440
+; DAGISEL-NEXT:    scratch_load_b32 v181, off, s33 offset:444
+; DAGISEL-NEXT:    scratch_load_b32 v182, off, s33 offset:448
+; DAGISEL-NEXT:    scratch_load_b32 v183, off, s33 offset:452
+; DAGISEL-NEXT:    scratch_load_b32 v192, off, s33 offset:456
+; DAGISEL-NEXT:    scratch_load_b32 v193, off, s33 offset:460
+; DAGISEL-NEXT:    scratch_load_b32 v194, off, s33 offset:464
+; DAGISEL-NEXT:    scratch_load_b32 v195, off, s33 offset:468
+; DAGISEL-NEXT:    scratch_load_b32 v196, off, s33 offset:472
+; DAGISEL-NEXT:    scratch_load_b32 v197, off, s33 offset:476
+; DAGISEL-NEXT:    scratch_load_b32 v198, off, s33 offset:480
+; DAGISEL-NEXT:    scratch_load_b32 v199, off, s33 offset:484
+; DAGISEL-NEXT:    scratch_load_b32 v208, off, s33 offset:488
+; DAGISEL-NEXT:    scratch_load_b32 v209, off, s33 offset:492
+; DAGISEL-NEXT:    scratch_load_b32 v210, off, s33 offset:496
+; DAGISEL-NEXT:    scratch_load_b32 v211, off, s33 offset:500
+; DAGISEL-NEXT:    scratch_load_b32 v212, off, s33 offset:504
+; DAGISEL-NEXT:    scratch_load_b32 v213, off, s33 offset:508
+; DAGISEL-NEXT:    scratch_load_b32 v214, off, s33 offset:512
+; DAGISEL-NEXT:    scratch_load_b32 v215, off, s33 offset:516
+; DAGISEL-NEXT:    s_clause 0xf
+; DAGISEL-NEXT:    scratch_load_b32 v224, off, s33 offset:520
+; DAGISEL-NEXT:    scratch_load_b32 v225, off, s33 offset:524
+; DAGISEL-NEXT:    scratch_load_b32 v226, off, s33 offset:528
+; DAGISEL-NEXT:    scratch_load_b32 v227, off, s33 offset:532
+; DAGISEL-NEXT:    scratch_load_b32 v228, off, s33 offset:536
+; DAGISEL-NEXT:    scratch_load_b32 v229, off, s33 offset:540
+; DAGISEL-NEXT:    scratch_load_b32 v230, off, s33 offset:544
+; DAGISEL-NEXT:    scratch_load_b32 v231, off, s33 offset:548
+; DAGISEL-NEXT:    scratch_load_b32 v240, off, s33 offset:552
+; DAGISEL-NEXT:    scratch_load_b32 v241, off, s33 offset:556
+; DAGISEL-NEXT:    scratch_load_b32 v242, off, s33 offset:560
+; DAGISEL-NEXT:    scratch_load_b32 v243, off, s33 offset:564
+; DAGISEL-NEXT:    scratch_load_b32 v244, off, s33 offset:568
+; DAGISEL-NEXT:    scratch_load_b32 v245, off, s33 offset:572
+; DAGISEL-NEXT:    scratch_load_b32 v246, off, s33 offset:576
+; DAGISEL-NEXT:    scratch_load_b32 v247, off, s33 offset:580
+; DAGISEL-NEXT:    s_mov_b32 exec_lo, s34
+; DAGISEL-NEXT:    s_mov_b32 s33, s35
+; DAGISEL-NEXT:    s_wait_loadcnt 0x0
+; DAGISEL-NEXT:    s_wait_alu 0xfffe
+; DAGISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: tail_call_gfx_from_whole_wave:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-NEXT:    s_wait_expcnt 0x0
+; GISEL-NEXT:    s_wait_samplecnt 0x0
+; GISEL-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-NEXT:    s_wait_kmcnt 0x0
+; GISEL-NEXT:    s_mov_b32 s35, s33
+; GISEL-NEXT:    s_mov_b32 s33, s32
+; GISEL-NEXT:    s_xor_saveexec_b32 s34, -1
+; GISEL-NEXT:    s_clause 0x1f
+; GISEL-NEXT:    scratch_store_b32 off, v0, s33 offset:8
+; GISEL-NEXT:    scratch_store_b32 off, v1, s33 offset:12
+; GISEL-NEXT:    scratch_store_b32 off, v2, s33 offset:16
+; GISEL-NEXT:    scratch_store_b32 off, v3, s33 offset:20
+; GISEL-NEXT:    scratch_store_b32 off, v4, s33 offset:24
+; GISEL-NEXT:    scratch_store_b32 off, v5, s33 offset:28
+; GISEL-NEXT:    scratch_store_b32 off, v6, s33 offset:32
+; GISEL-NEXT:    scratch_store_b32 off, v7, s33 offset:36
+; GISEL-NEXT:    scratch_store_b32 off, v8, s33 offset:40
+; GISEL-NEXT:    scratch_store_b32 off, v9, s33 offset:44
+; GISEL-NEXT:    scratch_store_b32 off, v10, s33 offset:48
+; GISEL-NEXT:    scratch_store_b32 off, v11, s33 offset:52
+; GISEL-NEXT:    scratch_store_b32 off, v12, s33 offset:56
+; GISEL-NEXT:    scratch_store_b32 off, v13, s33 offset:60
+; GISEL-NEXT:    scratch_store_b32 off, v14, s33 offset:64
+; GISEL-NEXT:    scratch_store_b32 off, v15, s33 offset:68
+; GISEL-NEXT:    scratch_store_b32 off, v16, s33 offset:72
+; GISEL-NEXT:    scratch_store_b32 off, v17, s33 offset:76
+; GISEL-NEXT:    scratch_store_b32 off, v18, s33 offset:80
+; GISEL-NEXT:    scratch_store_b32 off, v19, s33 offset:84
+; GISEL-NEXT:    scratch_store_b32 off, v20, s33 offset:88
+; GISEL-NEXT:    scratch_store_b32 off, v21, s33 offset:92
+; GISEL-NEXT:    scratch_store_b32 off, v22, s33 offset:96
+; GISEL-NEXT:    scratch_store_b32 off, v23, s33 offset:100
+; GISEL-NEXT:    scratch_store_b32 off, v24, s33 offset:104
+; GISEL-NEXT:    scratch_store_b32 off, v25, s33 offset:108
+; GISEL-NEXT:    scratch_store_b32 off, v26, s33 offset:112
+; GISEL-NEXT:    scratch_store_b32 off, v27, s33 offset:116
+; GISEL-NEXT:    scratch_store_b32 off, v28, s33 offset:120
+; GISEL-NEXT:    scratch_store_b32 off, v29, s33 offset:124
+; GISEL-NEXT:    scratch_store_b32 off, v30, s33 offset:128
+; GISEL-NEXT:    scratch_store_b32 off, v31, s33 offset:132
+; GISEL-NEXT:    s_clause 0x1f
+; GISEL-NEXT:    scratch_store_b32 off, v32, s33 offset:136
+; GISEL-NEXT:    scratch_store_b32 off, v33, s33 offset:140
+; GISEL-NEXT:    scratch_store_b32 off, v34, s33 offset:144
+; GISEL-NEXT:    scratch_store_b32 off, v35, s33 offset:148
+; GISEL-NEXT:    scratch_store_b32 off, v36, s33 offset:152
+; GISEL-NEXT:    scratch_store_b32 off, v37, s33 offset:156
+; GISEL-NEXT:    scratch_store_b32 off, v38, s33 offset:160
+; GISEL-NEXT:    scratch_store_b32 off, v39, s33 offset:164
+; GISEL-NEXT:    scratch_store_b32 off, v48, s33 offset:168
+; GISEL-NEXT:    scratch_store_b32 off, v49, s33 offset:172
+; GISEL-NEXT:    scratch_store_b32 off, v50, s33 offset:176
+; GISEL-NEXT:    scratch_store_b32 off, v51, s33 offset:180
+; GISEL-NEXT:    scratch_store_b32 off, v52, s33 offset:184
+; GISEL-NEXT:    scratch_store_b32 off, v53, s33 offset:188
+; GISEL-NEXT:    scratch_store_b32 off, v54, s33 offset:192
+; GISEL-NEXT:    scratch_store_b32 off, v55, s33 offset:196
+; GISEL-NEXT:    scratch_store_b32 off, v64, s33 offset:200
+; GISEL-NEXT:    scratch_store_b32 off, v65, s33 offset:204
+; GISEL-NEXT:    scratch_store_b32 off, v66, s33 offset:208
+; GISEL-NEXT:    scratch_store_b32 off, v67, s33 offset:212
+; GISEL-NEXT:    scratch_store_b32 off, v68, s33 offset:216
+; GISEL-NEXT:    scratch_store_b32 off, v69, s33 offset:220
+; GISEL-NEXT:    scratch_store_b32 off, v70, s33 offset:224
+; GISEL-NEXT:    scratch_store_b32 off, v71, s33 offset:228
+; GISEL-NEXT:    scratch_store_b32 off, v80, s33 offset:232
+; GISEL-NEXT:    scratch_store_b32 off, v81, s33 offset:236
+; GISEL-NEXT:    scratch_store_b32 off, v82, s33 offset:240
+; GISEL-NEXT:    scratch_store_b32 off, v83, s33 offset:244
+; GISEL-NEXT:    scratch_store_b32 off, v84, s33 offset:248
+; GISEL-NEXT:    scratch_store_b32 off, v85, s33 offset:252
+; GISEL-NEXT:    scratch_store_b32 off, v86, s33 offset:256
+; GISEL-NEXT:    scratch_store_b32 off, v87, s33 offset:260
+; GISEL-NEXT:    s_clause 0x1f
+; GISEL-NEXT:    scratch_store_b32 off, v96, s33 offset:264
+; GISEL-NEXT:    scratch_store_b32 off, v97, s33 offset:268
+; GISEL-NEXT:    scratch_store_b32 off, v98, s33 offset:272
+; GISEL-NEXT:    scratch_store_b32 off, v99, s33 offset:276
+; GISEL-NEXT:    scratch_store_b32 off, v100, s33 offset:280
+; GISEL-NEXT:    scratch_store_b32 off, v101, s33 offset:284
+; GISEL-NEXT:    scratch_store_b32 off, v102, s33 offset:288
+; GISEL-NEXT:    scratch_store_b32 off, v103, s33 offset:292
+; GISEL-NEXT:    scratch_store_b32 off, v112, s33 offset:296
+; GISEL-NEXT:    scratch_store_b32 off, v113, s33 offset:300
+; GISEL-NEXT:    scratch_store_b32 off, v114, s33 offset:304
+; GISEL-NEXT:    scratch_store_b32 off, v115, s33 offset:308
+; GISEL-NEXT:    scratch_store_b32 off, v116, s33 offset:312
+; GISEL-NEXT:    scratch_store_b32 off, v117, s33 offset:316
+; GISEL-NEXT:    scratch_store_b32 off, v118, s33 offset:320
+; GISEL-NEXT:    scratch_store_b32 off, v119, s33 offset:324
+; GISEL-NEXT:    scratch_store_b32 off, v128, s33 offset:328
+; GISEL-NEXT:    scratch_store_b32 off, v129, s33 offset:332
+; GISEL-NEXT:    scratch_store_b32 off, v130, s33 offset:336
+; GISEL-NEXT:    scratch_store_b32 off, v131, s33 offset:340
+; GISEL-NEXT:    scratch_store_b32 off, v132, s33 offset:344
+; GISEL-NEXT:    scratch_store_b32 off, v133, s33 offset:348
+; GISEL-NEXT:    scratch_store_b32 off, v134, s33 offset:352
+; GISEL-NEXT:    scratch_store_b32 off, v135, s33 offset:356
+; GISEL-NEXT:    scratch_store_b32 off, v144, s33 offset:360
+; GISEL-NEXT:    scratch_store_b32 off, v145, s33 offset:364
+; GISEL-NEXT:    scratch_store_b32 off, v146, s33 offset:368
+; GISEL-NEXT:    scratch_store_b32 off, v147, s33 offset:372
+; GISEL-NEXT:    scratch_store_b32 off, v148, s33 offset:376
+; GISEL-NEXT:    scratch_store_b32 off, v149, s33 offset:380
+; GISEL-NEXT:    scratch_store_b32 off, v150, s33 offset:384
+; GISEL-NEXT:    scratch_store_b32 off, v151, s33 offset:388
+; GISEL-NEXT:    s_clause 0x1f
+; GISEL-NEXT:    scratch_store_b32 off, v160, s33 offset:392
+; GISEL-NEXT:    scratch_store_b32 off, v161, s33 offset:396
+; GISEL-NEXT:    scratch_store_b32 off, v162, s33 offset:400
+; GISEL-NEXT:    scratch_store_b32 off, v163, s33 offset:404
+; GISEL-NEXT:    scratch_store_b32 off, v164, s33 offset:408
+; GISEL-NEXT:    scratch_store_b32 off, v165, s33 offset:412
+; GISEL-NEXT:    scratch_store_b32 off, v166, s33 offset:416
+; GISEL-NEXT:    scratch_store_b32 off, v167, s33 offset:420
+; GISEL-NEXT:    scratch_store_b32 off, v176, s33 offset:424
+; GISEL-NEXT:    scratch_store_b32 off, v177, s33 offset:428
+; GISEL-NEXT:    scratch_store_b32 off, v178, s33 offset:432
+; GISEL-NEXT:    scratch_store_b32 off, v179, s33 offset:436
+; GISEL-NEXT:    scratch_store_b32 off, v180, s33 offset:440
+; GISEL-NEXT:    scratch_store_b32 off, v181, s33 offset:444
+; GISEL-NEXT:    scratch_store_b32 off, v182, s33 offset:448
+; GISEL-NEXT:    scratch_store_b32 off, v183, s33 offset:452
+; GISEL-NEXT:    scratch_store_b32 off, v192, s33 offset:456
+; GISEL-NEXT:    scratch_store_b32 off, v193, s33 offset:460
+; GISEL-NEXT:    scratch_store_b32 off, v194, s33 offset:464
+; GISEL-NEXT:    scratch_store_b32 off, v195, s33 offset:468
+; GISEL-NEXT:    scratch_store_b32 off, v196, s33 offset:472
+; GISEL-NEXT:    scratch_store_b32 off, v197, s33 offset:476
+; GISEL-NEXT:    scratch_store_b32 off, v198, s33 offset:480
+; GISEL-NEXT:    scratch_store_b32 off, v199, s33 offset:484
+; GISEL-NEXT:    scratch_store_b32 off, v208, s33 offset:488
+; GISEL-NEXT:    scratch_store_b32 off, v209, s33 offset:492
+; GISEL-NEXT:    scratch_store_b32 off, v210, s33 offset:496
+; GISEL-NEXT:    scratch_store_b32 off, v211, s33 offset:500
+; GISEL-NEXT:    scratch_store_b32 off, v212, s33 offset:504
+; GISEL-NEXT:    scratch_store_b32 off, v213, s33 offset:508
+; GISEL-NEXT:    scratch_store_b32 off, v214, s33 offset:512
+; GISEL-NEXT:    scratch_store_b32 off, v215, s33 offset:516
+; GISEL-NEXT:    s_clause 0xf
+; GISEL-NEXT:    scratch_store_b32 off, v224, s33 offset:520
+; GISEL-NEXT:    scratch_store_b32 off, v225, s33 offset:524
+; GISEL-NEXT:    scratch_store_b32 off, v226, s33 offset:528
+; GISEL-NEXT:    scratch_store_b32 off, v227, s33 offset:532
+; GISEL-NEXT:    scratch_store_b32 off, v228, s33 offset:536
+; GISEL-NEXT:    scratch_store_b32 off, v229, s33 offset:540
+; GISEL-NEXT:    scratch_store_b32 off, v230, s33 offset:544
+; GISEL-NEXT:    scratch_store_b32 off, v231, s33 offset:548
+; GISEL-NEXT:    scratch_store_b32 off, v240, s33 offset:552
+; GISEL-NEXT:    scratch_store_b32 off, v241, s33 offset:556
+; GISEL-NEXT:    scratch_store_b32 off, v242, s33 offset:560
+; GISEL-NEXT:    scratch_store_b32 off, v243, s33 offset:564
+; GISEL-NEXT:    scratch_store_b32 off, v244, s33 offset:568
+; GISEL-NEXT:    scratch_store_b32 off, v245, s33 offset:572
+; GISEL-NEXT:    scratch_store_b32 off, v246, s33 offset:576
+; GISEL-NEXT:    scratch_store_b32 off, v247, s33 offset:580
+; GISEL-NEXT:    s_mov_b32 exec_lo, -1
+; GISEL-NEXT:    s_clause 0x1
+; GISEL-NEXT:    scratch_store_b32 off, v40, s33
+; GISEL-NEXT:    scratch_store_b32 off, v41, s33 offset:4
+; GISEL-NEXT:    v_writelane_b32 v40, s4, 0
+; GISEL-NEXT:    v_writelane_b32 v41, s76, 0
+; GISEL-NEXT:    v_mov_b32_e32 v2, v0
+; GISEL-NEXT:    v_swap_b32 v0, v1
+; GISEL-NEXT:    v_writelane_b32 v40, s5, 1
+; GISEL-NEXT:    v_writelane_b32 v41, s77, 1
+; GISEL-NEXT:    s_mov_b32 s0, gfx_callee at abs32@lo
+; GISEL-NEXT:    s_mov_b32 s1, gfx_callee at abs32@hi
+; GISEL-NEXT:    s_addk_co_i32 s32, 0x250
+; GISEL-NEXT:    v_writelane_b32 v40, s6, 2
+; GISEL-NEXT:    v_writelane_b32 v41, s78, 2
+; GISEL-NEXT:    v_writelane_b32 v40, s7, 3
+; GISEL-NEXT:    v_writelane_b32 v41, s79, 3
+; GISEL-NEXT:    v_writelane_b32 v40, s8, 4
+; GISEL-NEXT:    v_writelane_b32 v41, s88, 4
+; GISEL-NEXT:    v_writelane_b32 v40, s9, 5
+; GISEL-NEXT:    v_writelane_b32 v41, s89, 5
+; GISEL-NEXT:    s_mov_b64 s[8:9], 0
+; GISEL-NEXT:    v_writelane_b32 v40, s10, 6
+; GISEL-NEXT:    v_writelane_b32 v41, s90, 6
+; GISEL-NEXT:    v_writelane_b32 v40, s11, 7
+; GISEL-NEXT:    v_writelane_b32 v41, s91, 7
+; GISEL-NEXT:    v_writelane_b32 v40, s12, 8
+; GISEL-NEXT:    v_writelane_b32 v41, s92, 8
+; GISEL-NEXT:    v_writelane_b32 v40, s13, 9
+; GISEL-NEXT:    v_writelane_b32 v41, s93, 9
+; GISEL-NEXT:    v_writelane_b32 v40, s14, 10
+; GISEL-NEXT:    v_writelane_b32 v41, s94, 10
+; GISEL-NEXT:    v_writelane_b32 v40, s15, 11
+; GISEL-NEXT:    v_writelane_b32 v41, s95, 11
+; GISEL-NEXT:    v_writelane_b32 v40, s16, 12
+; GISEL-NEXT:    v_writelane_b32 v40, s17, 13
+; GISEL-NEXT:    v_writelane_b32 v40, s18, 14
+; GISEL-NEXT:    v_writelane_b32 v40, s19, 15
+; GISEL-NEXT:    v_writelane_b32 v40, s20, 16
+; GISEL-NEXT:    v_writelane_b32 v40, s21, 17
+; GISEL-NEXT:    v_writelane_b32 v40, s22, 18
+; GISEL-NEXT:    v_writelane_b32 v40, s23, 19
+; GISEL-NEXT:    v_writelane_b32 v40, s24, 20
+; GISEL-NEXT:    v_writelane_b32 v40, s25, 21
+; GISEL-NEXT:    v_writelane_b32 v40, s26, 22
+; GISEL-NEXT:    v_writelane_b32 v40, s27, 23
+; GISEL-NEXT:    v_writelane_b32 v40, s28, 24
+; GISEL-NEXT:    v_writelane_b32 v40, s29, 25
+; GISEL-NEXT:    v_writelane_b32 v40, s30, 26
+; GISEL-NEXT:    v_writelane_b32 v40, s31, 27
+; GISEL-NEXT:    v_writelane_b32 v40, s72, 28
+; GISEL-NEXT:    v_writelane_b32 v40, s73, 29
+; GISEL-NEXT:    v_writelane_b32 v40, s74, 30
+; GISEL-NEXT:    v_writelane_b32 v40, s75, 31
+; GISEL-NEXT:    s_wait_alu 0xfffe
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[0:1]
+; GISEL-NEXT:    v_readlane_b32 s95, v41, 11
+; GISEL-NEXT:    v_readlane_b32 s94, v41, 10
+; GISEL-NEXT:    v_readlane_b32 s93, v41, 9
+; GISEL-NEXT:    v_readlane_b32 s92, v41, 8
+; GISEL-NEXT:    v_readlane_b32 s91, v41, 7
+; GISEL-NEXT:    v_readlane_b32 s90, v41, 6
+; GISEL-NEXT:    v_readlane_b32 s89, v41, 5
+; GISEL-NEXT:    v_readlane_b32 s88, v41, 4
+; GISEL-NEXT:    v_readlane_b32 s79, v41, 3
+; GISEL-NEXT:    v_readlane_b32 s78, v41, 2
+; GISEL-NEXT:    v_readlane_b32 s77, v41, 1
+; GISEL-NEXT:    v_readlane_b32 s76, v41, 0
+; GISEL-NEXT:    v_readlane_b32 s75, v40, 31
+; GISEL-NEXT:    v_readlane_b32 s74, v40, 30
+; GISEL-NEXT:    v_readlane_b32 s73, v40, 29
+; GISEL-NEXT:    v_readlane_b32 s72, v40, 28
+; GISEL-NEXT:    v_readlane_b32 s31, v40, 27
+; GISEL-NEXT:    v_readlane_b32 s30, v40, 26
+; GISEL-NEXT:    v_readlane_b32 s29, v40, 25
+; GISEL-NEXT:    v_readlane_b32 s28, v40, 24
+; GISEL-NEXT:    v_readlane_b32 s27, v40, 23
+; GISEL-NEXT:    v_readlane_b32 s26, v40, 22
+; GISEL-NEXT:    v_readlane_b32 s25, v40, 21
+; GISEL-NEXT:    v_readlane_b32 s24, v40, 20
+; GISEL-NEXT:    v_readlane_b32 s23, v40, 19
+; GISEL-NEXT:    v_readlane_b32 s22, v40, 18
+; GISEL-NEXT:    v_readlane_b32 s21, v40, 17
+; GISEL-NEXT:    v_readlane_b32 s20, v40, 16
+; GISEL-NEXT:    v_readlane_b32 s19, v40, 15
+; GISEL-NEXT:    v_readlane_b32 s18, v40, 14
+; GISEL-NEXT:    v_readlane_b32 s17, v40, 13
+; GISEL-NEXT:    v_readlane_b32 s16, v40, 12
+; GISEL-NEXT:    v_readlane_b32 s15, v40, 11
+; GISEL-NEXT:    v_readlane_b32 s14, v40, 10
+; GISEL-NEXT:    v_readlane_b32 s13, v40, 9
+; GISEL-NEXT:    v_readlane_b32 s12, v40, 8
+; GISEL-NEXT:    v_readlane_b32 s11, v40, 7
+; GISEL-NEXT:    v_readlane_b32 s10, v40, 6
+; GISEL-NEXT:    v_readlane_b32 s9, v40, 5
+; GISEL-NEXT:    v_readlane_b32 s8, v40, 4
+; GISEL-NEXT:    v_readlane_b32 s7, v40, 3
+; GISEL-NEXT:    v_readlane_b32 s6, v40, 2
+; GISEL-NEXT:    v_readlane_b32 s5, v40, 1
+; GISEL-NEXT:    v_readlane_b32 s4, v40, 0
+; GISEL-NEXT:    s_clause 0x1
+; GISEL-NEXT:    scratch_load_b32 v40, off, s33
+; GISEL-NEXT:    scratch_load_b32 v41, off, s33 offset:4
+; GISEL-NEXT:    s_mov_b32 s32, s33
+; GISEL-NEXT:    s_xor_b32 exec_lo, s34, -1
+; GISEL-NEXT:    s_clause 0x1f
+; GISEL-NEXT:    scratch_load_b32 v0, off, s33 offset:8
+; GISEL-NEXT:    scratch_load_b32 v1, off, s33 offset:12
+; GISEL-NEXT:    scratch_load_b32 v2, off, s33 offset:16
+; GISEL-NEXT:    scratch_load_b32 v3, off, s33 offset:20
+; GISEL-NEXT:    scratch_load_b32 v4, off, s33 offset:24
+; GISEL-NEXT:    scratch_load_b32 v5, off, s33 offset:28
+; GISEL-NEXT:    scratch_load_b32 v6, off, s33 offset:32
+; GISEL-NEXT:    scratch_load_b32 v7, off, s33 offset:36
+; GISEL-NEXT:    scratch_load_b32 v8, off, s33 offset:40
+; GISEL-NEXT:    scratch_load_b32 v9, off, s33 offset:44
+; GISEL-NEXT:    scratch_load_b32 v10, off, s33 offset:48
+; GISEL-NEXT:    scratch_load_b32 v11, off, s33 offset:52
+; GISEL-NEXT:    scratch_load_b32 v12, off, s33 offset:56
+; GISEL-NEXT:    scratch_load_b32 v13, off, s33 offset:60
+; GISEL-NEXT:    scratch_load_b32 v14, off, s33 offset:64
+; GISEL-NEXT:    scratch_load_b32 v15, off, s33 offset:68
+; GISEL-NEXT:    scratch_load_b32 v16, off, s33 offset:72
+; GISEL-NEXT:    scratch_load_b32 v17, off, s33 offset:76
+; GISEL-NEXT:    scratch_load_b32 v18, off, s33 offset:80
+; GISEL-NEXT:    scratch_load_b32 v19, off, s33 offset:84
+; GISEL-NEXT:    scratch_load_b32 v20, off, s33 offset:88
+; GISEL-NEXT:    scratch_load_b32 v21, off, s33 offset:92
+; GISEL-NEXT:    scratch_load_b32 v22, off, s33 offset:96
+; GISEL-NEXT:    scratch_load_b32 v23, off, s33 offset:100
+; GISEL-NEXT:    scratch_load_b32 v24, off, s33 offset:104
+; GISEL-NEXT:    scratch_load_b32 v25, off, s33 offset:108
+; GISEL-NEXT:    scratch_load_b32 v26, off, s33 offset:112
+; GISEL-NEXT:    scratch_load_b32 v27, off, s33 offset:116
+; GISEL-NEXT:    scratch_load_b32 v28, off, s33 offset:120
+; GISEL-NEXT:    scratch_load_b32 v29, off, s33 offset:124
+; GISEL-NEXT:    scratch_load_b32 v30, off, s33 offset:128
+; GISEL-NEXT:    scratch_load_b32 v31, off, s33 offset:132
+; GISEL-NEXT:    s_clause 0x1f
+; GISEL-NEXT:    scratch_load_b32 v32, off, s33 offset:136
+; GISEL-NEXT:    scratch_load_b32 v33, off, s33 offset:140
+; GISEL-NEXT:    scratch_load_b32 v34, off, s33 offset:144
+; GISEL-NEXT:    scratch_load_b32 v35, off, s33 offset:148
+; GISEL-NEXT:    scratch_load_b32 v36, off, s33 offset:152
+; GISEL-NEXT:    scratch_load_b32 v37, off, s33 offset:156
+; GISEL-NEXT:    scratch_load_b32 v38, off, s33 offset:160
+; GISEL-NEXT:    scratch_load_b32 v39, off, s33 offset:164
+; GISEL-NEXT:    scratch_load_b32 v48, off, s33 offset:168
+; GISEL-NEXT:    scratch_load_b32 v49, off, s33 offset:172
+; GISEL-NEXT:    scratch_load_b32 v50, off, s33 offset:176
+; GISEL-NEXT:    scratch_load_b32 v51, off, s33 offset:180
+; GISEL-NEXT:    scratch_load_b32 v52, off, s33 offset:184
+; GISEL-NEXT:    scratch_load_b32 v53, off, s33 offset:188
+; GISEL-NEXT:    scratch_load_b32 v54, off, s33 offset:192
+; GISEL-NEXT:    scratch_load_b32 v55, off, s33 offset:196
+; GISEL-NEXT:    scratch_load_b32 v64, off, s33 offset:200
+; GISEL-NEXT:    scratch_load_b32 v65, off, s33 offset:204
+; GISEL-NEXT:    scratch_load_b32 v66, off, s33 offset:208
+; GISEL-NEXT:    scratch_load_b32 v67, off, s33 offset:212
+; GISEL-NEXT:    scratch_load_b32 v68, off, s33 offset:216
+; GISEL-NEXT:    scratch_load_b32 v69, off, s33 offset:220
+; GISEL-NEXT:    scratch_load_b32 v70, off, s33 offset:224
+; GISEL-NEXT:    scratch_load_b32 v71, off, s33 offset:228
+; GISEL-NEXT:    scratch_load_b32 v80, off, s33 offset:232
+; GISEL-NEXT:    scratch_load_b32 v81, off, s33 offset:236
+; GISEL-NEXT:    scratch_load_b32 v82, off, s33 offset:240
+; GISEL-NEXT:    scratch_load_b32 v83, off, s33 offset:244
+; GISEL-NEXT:    scratch_load_b32 v84, off, s33 offset:248
+; GISEL-NEXT:    scratch_load_b32 v85, off, s33 offset:252
+; GISEL-NEXT:    scratch_load_b32 v86, off, s33 offset:256
+; GISEL-NEXT:    scratch_load_b32 v87, off, s33 offset:260
+; GISEL-NEXT:    s_clause 0x1f
+; GISEL-NEXT:    scratch_load_b32 v96, off, s33 offset:264
+; GISEL-NEXT:    scratch_load_b32 v97, off, s33 offset:268
+; GISEL-NEXT:    scratch_load_b32 v98, off, s33 offset:272
+; GISEL-NEXT:    scratch_load_b32 v99, off, s33 offset:276
+; GISEL-NEXT:    scratch_load_b32 v100, off, s33 offset:280
+; GISEL-NEXT:    scratch_load_b32 v101, off, s33 offset:284
+; GISEL-NEXT:    scratch_load_b32 v102, off, s33 offset:288
+; GISEL-NEXT:    scratch_load_b32 v103, off, s33 offset:292
+; GISEL-NEXT:    scratch_load_b32 v112, off, s33 offset:296
+; GISEL-NEXT:    scratch_load_b32 v113, off, s33 offset:300
+; GISEL-NEXT:    scratch_load_b32 v114, off, s33 offset:304
+; GISEL-NEXT:    scratch_load_b32 v115, off, s33 offset:308
+; GISEL-NEXT:    scratch_load_b32 v116, off, s33 offset:312
+; GISEL-NEXT:    scratch_load_b32 v117, off, s33 offset:316
+; GISEL-NEXT:    scratch_load_b32 v118, off, s33 offset:320
+; GISEL-NEXT:    scratch_load_b32 v119, off, s33 offset:324
+; GISEL-NEXT:    scratch_load_b32 v128, off, s33 offset:328
+; GISEL-NEXT:    scratch_load_b32 v129, off, s33 offset:332
+; GISEL-NEXT:    scratch_load_b32 v130, off, s33 offset:336
+; GISEL-NEXT:    scratch_load_b32 v131, off, s33 offset:340
+; GISEL-NEXT:    scratch_load_b32 v132, off, s33 offset:344
+; GISEL-NEXT:    scratch_load_b32 v133, off, s33 offset:348
+; GISEL-NEXT:    scratch_load_b32 v134, off, s33 offset:352
+; GISEL-NEXT:    scratch_load_b32 v135, off, s33 offset:356
+; GISEL-NEXT:    scratch_load_b32 v144, off, s33 offset:360
+; GISEL-NEXT:    scratch_load_b32 v145, off, s33 offset:364
+; GISEL-NEXT:    scratch_load_b32 v146, off, s33 offset:368
+; GISEL-NEXT:    scratch_load_b32 v147, off, s33 offset:372
+; GISEL-NEXT:    scratch_load_b32 v148, off, s33 offset:376
+; GISEL-NEXT:    scratch_load_b32 v149, off, s33 offset:380
+; GISEL-NEXT:    scratch_load_b32 v150, off, s33 offset:384
+; GISEL-NEXT:    scratch_load_b32 v151, off, s33 offset:388
+; GISEL-NEXT:    s_clause 0x1f
+; GISEL-NEXT:    scratch_load_b32 v160, off, s33 offset:392
+; GISEL-NEXT:    scratch_load_b32 v161, off, s33 offset:396
+; GISEL-NEXT:    scratch_load_b32 v162, off, s33 offset:400
+; GISEL-NEXT:    scratch_load_b32 v163, off, s33 offset:404
+; GISEL-NEXT:    scratch_load_b32 v164, off, s33 offset:408
+; GISEL-NEXT:    scratch_load_b32 v165, off, s33 offset:412
+; GISEL-NEXT:    scratch_load_b32 v166, off, s33 offset:416
+; GISEL-NEXT:    scratch_load_b32 v167, off, s33 offset:420
+; GISEL-NEXT:    scratch_load_b32 v176, off, s33 offset:424
+; GISEL-NEXT:    scratch_load_b32 v177, off, s33 offset:428
+; GISEL-NEXT:    scratch_load_b32 v178, off, s33 offset:432
+; GISEL-NEXT:    scratch_load_b32 v179, off, s33 offset:436
+; GISEL-NEXT:    scratch_load_b32 v180, off, s33 offset:440
+; GISEL-NEXT:    scratch_load_b32 v181, off, s33 offset:444
+; GISEL-NEXT:    scratch_load_b32 v182, off, s33 offset:448
+; GISEL-NEXT:    scratch_load_b32 v183, off, s33 offset:452
+; GISEL-NEXT:    scratch_load_b32 v192, off, s33 offset:456
+; GISEL-NEXT:    scratch_load_b32 v193, off, s33 offset:460
+; GISEL-NEXT:    scratch_load_b32 v194, off, s33 offset:464
+; GISEL-NEXT:    scratch_load_b32 v195, off, s33 offset:468
+; GISEL-NEXT:    scratch_load_b32 v196, off, s33 offset:472
+; GISEL-NEXT:    scratch_load_b32 v197, off, s33 offset:476
+; GISEL-NEXT:    scratch_load_b32 v198, off, s33 offset:480
+; GISEL-NEXT:    scratch_load_b32 v199, off, s33 offset:484
+; GISEL-NEXT:    scratch_load_b32 v208, off, s33 offset:488
+; GISEL-NEXT:    scratch_load_b32 v209, off, s33 offset:492
+; GISEL-NEXT:    scratch_load_b32 v210, off, s33 offset:496
+; GISEL-NEXT:    scratch_load_b32 v211, off, s33 offset:500
+; GISEL-NEXT:    scratch_load_b32 v212, off, s33 offset:504
+; GISEL-NEXT:    scratch_load_b32 v213, off, s33 offset:508
+; GISEL-NEXT:    scratch_load_b32 v214, off, s33 offset:512
+; GISEL-NEXT:    scratch_load_b32 v215, off, s33 offset:516
+; GISEL-NEXT:    s_clause 0xf
+; GISEL-NEXT:    scratch_load_b32 v224, off, s33 offset:520
+; GISEL-NEXT:    scratch_load_b32 v225, off, s33 offset:524
+; GISEL-NEXT:    scratch_load_b32 v226, off, s33 offset:528
+; GISEL-NEXT:    scratch_load_b32 v227, off, s33 offset:532
+; GISEL-NEXT:    scratch_load_b32 v228, off, s33 offset:536
+; GISEL-NEXT:    scratch_load_b32 v229, off, s33 offset:540
+; GISEL-NEXT:    scratch_load_b32 v230, off, s33 offset:544
+; GISEL-NEXT:    scratch_load_b32 v231, off, s33 offset:548
+; GISEL-NEXT:    scratch_load_b32 v240, off, s33 offset:552
+; GISEL-NEXT:    scratch_load_b32 v241, off, s33 offset:556
+; GISEL-NEXT:    scratch_load_b32 v242, off, s33 offset:560
+; GISEL-NEXT:    scratch_load_b32 v243, off, s33 offset:564
+; GISEL-NEXT:    scratch_load_b32 v244, off, s33 offset:568
+; GISEL-NEXT:    scratch_load_b32 v245, off, s33 offset:572
+; GISEL-NEXT:    scratch_load_b32 v246, off, s33 offset:576
+; GISEL-NEXT:    scratch_load_b32 v247, off, s33 offset:580
+; GISEL-NEXT:    s_mov_b32 exec_lo, s34
+; GISEL-NEXT:    s_mov_b32 s33, s35
+; GISEL-NEXT:    s_wait_loadcnt 0x0
+; GISEL-NEXT:    s_wait_alu 0xfffe
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; DAGISEL64-LABEL: tail_call_gfx_from_whole_wave:
+; DAGISEL64:       ; %bb.0:
+; DAGISEL64-NEXT:    s_wait_loadcnt_dscnt 0x0
+; DAGISEL64-NEXT:    s_wait_expcnt 0x0
+; DAGISEL64-NEXT:    s_wait_samplecnt 0x0
+; DAGISEL64-NEXT:    s_wait_bvhcnt 0x0
+; DAGISEL64-NEXT:    s_wait_kmcnt 0x0
+; DAGISEL64-NEXT:    s_mov_b32 s36, s33
+; DAGISEL64-NEXT:    s_mov_b32 s33, s32
+; DAGISEL64-NEXT:    s_xor_saveexec_b64 s[34:35], -1
+; DAGISEL64-NEXT:    s_clause 0x1f
+; DAGISEL64-NEXT:    scratch_store_b32 off, v0, s33 offset:4
+; DAGISEL64-NEXT:    scratch_store_b32 off, v1, s33 offset:8
+; DAGISEL64-NEXT:    scratch_store_b32 off, v2, s33 offset:12
+; DAGISEL64-NEXT:    scratch_store_b32 off, v3, s33 offset:16
+; DAGISEL64-NEXT:    scratch_store_b32 off, v4, s33 offset:20
+; DAGISEL64-NEXT:    scratch_store_b32 off, v5, s33 offset:24
+; DAGISEL64-NEXT:    scratch_store_b32 off, v6, s33 offset:28
+; DAGISEL64-NEXT:    scratch_store_b32 off, v7, s33 offset:32
+; DAGISEL64-NEXT:    scratch_store_b32 off, v8, s33 offset:36
+; DAGISEL64-NEXT:    scratch_store_b32 off, v9, s33 offset:40
+; DAGISEL64-NEXT:    scratch_store_b32 off, v10, s33 offset:44
+; DAGISEL64-NEXT:    scratch_store_b32 off, v11, s33 offset:48
+; DAGISEL64-NEXT:    scratch_store_b32 off, v12, s33 offset:52
+; DAGISEL64-NEXT:    scratch_store_b32 off, v13, s33 offset:56
+; DAGISEL64-NEXT:    scratch_store_b32 off, v14, s33 offset:60
+; DAGISEL64-NEXT:    scratch_store_b32 off, v15, s33 offset:64
+; DAGISEL64-NEXT:    scratch_store_b32 off, v16, s33 offset:68
+; DAGISEL64-NEXT:    scratch_store_b32 off, v17, s33 offset:72
+; DAGISEL64-NEXT:    scratch_store_b32 off, v18, s33 offset:76
+; DAGISEL64-NEXT:    scratch_store_b32 off, v19, s33 offset:80
+; DAGISEL64-NEXT:    scratch_store_b32 off, v20, s33 offset:84
+; DAGISEL64-NEXT:    scratch_store_b32 off, v21, s33 offset:88
+; DAGISEL64-NEXT:    scratch_store_b32 off, v22, s33 offset:92
+; DAGISEL64-NEXT:    scratch_store_b32 off, v23, s33 offset:96
+; DAGISEL64-NEXT:    scratch_store_b32 off, v24, s33 offset:100
+; DAGISEL64-NEXT:    scratch_store_b32 off, v25, s33 offset:104
+; DAGISEL64-NEXT:    scratch_store_b32 off, v26, s33 offset:108
+; DAGISEL64-NEXT:    scratch_store_b32 off, v27, s33 offset:112
+; DAGISEL64-NEXT:    scratch_store_b32 off, v28, s33 offset:116
+; DAGISEL64-NEXT:    scratch_store_b32 off, v29, s33 offset:120
+; DAGISEL64-NEXT:    scratch_store_b32 off, v30, s33 offset:124
+; DAGISEL64-NEXT:    scratch_store_b32 off, v31, s33 offset:128
+; DAGISEL64-NEXT:    s_clause 0x1f
+; DAGISEL64-NEXT:    scratch_store_b32 off, v32, s33 offset:132
+; DAGISEL64-NEXT:    scratch_store_b32 off, v33, s33 offset:136
+; DAGISEL64-NEXT:    scratch_store_b32 off, v34, s33 offset:140
+; DAGISEL64-NEXT:    scratch_store_b32 off, v35, s33 offset:144
+; DAGISEL64-NEXT:    scratch_store_b32 off, v36, s33 offset:148
+; DAGISEL64-NEXT:    scratch_store_b32 off, v37, s33 offset:152
+; DAGISEL64-NEXT:    scratch_store_b32 off, v38, s33 offset:156
+; DAGISEL64-NEXT:    scratch_store_b32 off, v39, s33 offset:160
+; DAGISEL64-NEXT:    scratch_store_b32 off, v48, s33 offset:164
+; DAGISEL64-NEXT:    scratch_store_b32 off, v49, s33 offset:168
+; DAGISEL64-NEXT:    scratch_store_b32 off, v50, s33 offset:172
+; DAGISEL64-NEXT:    scratch_store_b32 off, v51, s33 offset:176
+; DAGISEL64-NEXT:    scratch_store_b32 off, v52, s33 offset:180
+; DAGISEL64-NEXT:    scratch_store_b32 off, v53, s33 offset:184
+; DAGISEL64-NEXT:    scratch_store_b32 off, v54, s33 offset:188
+; DAGISEL64-NEXT:    scratch_store_b32 off, v55, s33 offset:192
+; DAGISEL64-NEXT:    scratch_store_b32 off, v64, s33 offset:196
+; DAGISEL64-NEXT:    scratch_store_b32 off, v65, s33 offset:200
+; DAGISEL64-NEXT:    scratch_store_b32 off, v66, s33 offset:204
+; DAGISEL64-NEXT:    scratch_store_b32 off, v67, s33 offset:208
+; DAGISEL64-NEXT:    scratch_store_b32 off, v68, s33 offset:212
+; DAGISEL64-NEXT:    scratch_store_b32 off, v69, s33 offset:216
+; DAGISEL64-NEXT:    scratch_store_b32 off, v70, s33 offset:220
+; DAGISEL64-NEXT:    scratch_store_b32 off, v71, s33 offset:224
+; DAGISEL64-NEXT:    scratch_store_b32 off, v80, s33 offset:228
+; DAGISEL64-NEXT:    scratch_store_b32 off, v81, s33 offset:232
+; DAGISEL64-NEXT:    scratch_store_b32 off, v82, s33 offset:236
+; DAGISEL64-NEXT:    scratch_store_b32 off, v83, s33 offset:240
+; DAGISEL64-NEXT:    scratch_store_b32 off, v84, s33 offset:244
+; DAGISEL64-NEXT:    scratch_store_b32 off, v85, s33 offset:248
+; DAGISEL64-NEXT:    scratch_store_b32 off, v86, s33 offset:252
+; DAGISEL64-NEXT:    scratch_store_b32 off, v87, s33 offset:256
+; DAGISEL64-NEXT:    s_clause 0x1f
+; DAGISEL64-NEXT:    scratch_store_b32 off, v96, s33 offset:260
+; DAGISEL64-NEXT:    scratch_store_b32 off, v97, s33 offset:264
+; DAGISEL64-NEXT:    scratch_store_b32 off, v98, s33 offset:268
+; DAGISEL64-NEXT:    scratch_store_b32 off, v99, s33 offset:272
+; DAGISEL64-NEXT:    scratch_store_b32 off, v100, s33 offset:276
+; DAGISEL64-NEXT:    scratch_store_b32 off, v101, s33 offset:280
+; DAGISEL64-NEXT:    scratch_store_b32 off, v102, s33 offset:284
+; DAGISEL64-NEXT:    scratch_store_b32 off, v103, s33 offset:288
+; DAGISEL64-NEXT:    scratch_store_b32 off, v112, s33 offset:292
+; DAGISEL64-NEXT:    scratch_store_b32 off, v113, s33 offset:296
+; DAGISEL64-NEXT:    scratch_store_b32 off, v114, s33 offset:300
+; DAGISEL64-NEXT:    scratch_store_b32 off, v115, s33 offset:304
+; DAGISEL64-NEXT:    scratch_store_b32 off, v116, s33 offset:308
+; DAGISEL64-NEXT:    scratch_store_b32 off, v117, s33 offset:312
+; DAGISEL64-NEXT:    scratch_store_b32 off, v118, s33 offset:316
+; DAGISEL64-NEXT:    scratch_store_b32 off, v119, s33 offset:320
+; DAGISEL64-NEXT:    scratch_store_b32 off, v128, s33 offset:324
+; DAGISEL64-NEXT:    scratch_store_b32 off, v129, s33 offset:328
+; DAGISEL64-NEXT:    scratch_store_b32 off, v130, s33 offset:332
+; DAGISEL64-NEXT:    scratch_store_b32 off, v131, s33 offset:336
+; DAGISEL64-NEXT:    scratch_store_b32 off, v132, s33 offset:340
+; DAGISEL64-NEXT:    scratch_store_b32 off, v133, s33 offset:344
+; DAGISEL64-NEXT:    scratch_store_b32 off, v134, s33 offset:348
+; DAGISEL64-NEXT:    scratch_store_b32 off, v135, s33 offset:352
+; DAGISEL64-NEXT:    scratch_store_b32 off, v144, s33 offset:356
+; DAGISEL64-NEXT:    scratch_store_b32 off, v145, s33 offset:360
+; DAGISEL64-NEXT:    scratch_store_b32 off, v146, s33 offset:364
+; DAGISEL64-NEXT:    scratch_store_b32 off, v147, s33 offset:368
+; DAGISEL64-NEXT:    scratch_store_b32 off, v148, s33 offset:372
+; DAGISEL64-NEXT:    scratch_store_b32 off, v149, s33 offset:376
+; DAGISEL64-NEXT:    scratch_store_b32 off, v150, s33 offset:380
+; DAGISEL64-NEXT:    scratch_store_b32 off, v151, s33 offset:384
+; DAGISEL64-NEXT:    s_clause 0x1f
+; DAGISEL64-NEXT:    scratch_store_b32 off, v160, s33 offset:388
+; DAGISEL64-NEXT:    scratch_store_b32 off, v161, s33 offset:392
+; DAGISEL64-NEXT:    scratch_store_b32 off, v162, s33 offset:396
+; DAGISEL64-NEXT:    scratch_store_b32 off, v163, s33 offset:400
+; DAGISEL64-NEXT:    scratch_store_b32 off, v164, s33 offset:404
+; DAGISEL64-NEXT:    scratch_store_b32 off, v165, s33 offset:408
+; DAGISEL64-NEXT:    scratch_store_b32 off, v166, s33 offset:412
+; DAGISEL64-NEXT:    scratch_store_b32 off, v167, s33 offset:416
+; DAGISEL64-NEXT:    scratch_store_b32 off, v176, s33 offset:420
+; DAGISEL64-NEXT:    scratch_store_b32 off, v177, s33 offset:424
+; DAGISEL64-NEXT:    scratch_store_b32 off, v178, s33 offset:428
+; DAGISEL64-NEXT:    scratch_store_b32 off, v179, s33 offset:432
+; DAGISEL64-NEXT:    scratch_store_b32 off, v180, s33 offset:436
+; DAGISEL64-NEXT:    scratch_store_b32 off, v181, s33 offset:440
+; DAGISEL64-NEXT:    scratch_store_b32 off, v182, s33 offset:444
+; DAGISEL64-NEXT:    scratch_store_b32 off, v183, s33 offset:448
+; DAGISEL64-NEXT:    scratch_store_b32 off, v192, s33 offset:452
+; DAGISEL64-NEXT:    scratch_store_b32 off, v193, s33 offset:456
+; DAGISEL64-NEXT:    scratch_store_b32 off, v194, s33 offset:460
+; DAGISEL64-NEXT:    scratch_store_b32 off, v195, s33 offset:464
+; DAGISEL64-NEXT:    scratch_store_b32 off, v196, s33 offset:468
+; DAGISEL64-NEXT:    scratch_store_b32 off, v197, s33 offset:472
+; DAGISEL64-NEXT:    scratch_store_b32 off, v198, s33 offset:476
+; DAGISEL64-NEXT:    scratch_store_b32 off, v199, s33 offset:480
+; DAGISEL64-NEXT:    scratch_store_b32 off, v208, s33 offset:484
+; DAGISEL64-NEXT:    scratch_store_b32 off, v209, s33 offset:488
+; DAGISEL64-NEXT:    scratch_store_b32 off, v210, s33 offset:492
+; DAGISEL64-NEXT:    scratch_store_b32 off, v211, s33 offset:496
+; DAGISEL64-NEXT:    scratch_store_b32 off, v212, s33 offset:500
+; DAGISEL64-NEXT:    scratch_store_b32 off, v213, s33 offset:504
+; DAGISEL64-NEXT:    scratch_store_b32 off, v214, s33 offset:508
+; DAGISEL64-NEXT:    scratch_store_b32 off, v215, s33 offset:512
+; DAGISEL64-NEXT:    s_clause 0xf
+; DAGISEL64-NEXT:    scratch_store_b32 off, v224, s33 offset:516
+; DAGISEL64-NEXT:    scratch_store_b32 off, v225, s33 offset:520
+; DAGISEL64-NEXT:    scratch_store_b32 off, v226, s33 offset:524
+; DAGISEL64-NEXT:    scratch_store_b32 off, v227, s33 offset:528
+; DAGISEL64-NEXT:    scratch_store_b32 off, v228, s33 offset:532
+; DAGISEL64-NEXT:    scratch_store_b32 off, v229, s33 offset:536
+; DAGISEL64-NEXT:    scratch_store_b32 off, v230, s33 offset:540
+; DAGISEL64-NEXT:    scratch_store_b32 off, v231, s33 offset:544
+; DAGISEL64-NEXT:    scratch_store_b32 off, v240, s33 offset:548
+; DAGISEL64-NEXT:    scratch_store_b32 off, v241, s33 offset:552
+; DAGISEL64-NEXT:    scratch_store_b32 off, v242, s33 offset:556
+; DAGISEL64-NEXT:    scratch_store_b32 off, v243, s33 offset:560
+; DAGISEL64-NEXT:    scratch_store_b32 off, v244, s33 offset:564
+; DAGISEL64-NEXT:    scratch_store_b32 off, v245, s33 offset:568
+; DAGISEL64-NEXT:    scratch_store_b32 off, v246, s33 offset:572
+; DAGISEL64-NEXT:    scratch_store_b32 off, v247, s33 offset:576
+; DAGISEL64-NEXT:    s_mov_b64 exec, -1
+; DAGISEL64-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s4, 0
+; DAGISEL64-NEXT:    v_mov_b32_e32 v2, v0
+; DAGISEL64-NEXT:    v_swap_b32 v0, v1
+; DAGISEL64-NEXT:    s_mov_b32 s1, gfx_callee at abs32@hi
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s5, 1
+; DAGISEL64-NEXT:    s_mov_b32 s0, gfx_callee at abs32@lo
+; DAGISEL64-NEXT:    s_addk_co_i32 s32, 0x250
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s6, 2
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s7, 3
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s8, 4
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s9, 5
+; DAGISEL64-NEXT:    s_mov_b64 s[8:9], 0
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s10, 6
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s11, 7
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s12, 8
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s13, 9
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s14, 10
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s15, 11
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s16, 12
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s17, 13
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s18, 14
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s19, 15
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s20, 16
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s21, 17
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s22, 18
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s23, 19
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s24, 20
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s25, 21
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s26, 22
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s27, 23
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s28, 24
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s29, 25
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s30, 26
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s31, 27
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s72, 28
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s73, 29
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s74, 30
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s75, 31
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s76, 32
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s77, 33
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s78, 34
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s79, 35
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s88, 36
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s89, 37
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s90, 38
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s91, 39
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s92, 40
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s93, 41
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s94, 42
+; DAGISEL64-NEXT:    v_writelane_b32 v40, s95, 43
+; DAGISEL64-NEXT:    s_wait_alu 0xfffe
+; DAGISEL64-NEXT:    s_swappc_b64 s[30:31], s[0:1]
+; DAGISEL64-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; DAGISEL64-NEXT:    v_readlane_b32 s95, v40, 43
+; DAGISEL64-NEXT:    v_readlane_b32 s94, v40, 42
+; DAGISEL64-NEXT:    v_readlane_b32 s93, v40, 41
+; DAGISEL64-NEXT:    v_readlane_b32 s92, v40, 40
+; DAGISEL64-NEXT:    v_readlane_b32 s91, v40, 39
+; DAGISEL64-NEXT:    v_readlane_b32 s90, v40, 38
+; DAGISEL64-NEXT:    v_readlane_b32 s89, v40, 37
+; DAGISEL64-NEXT:    v_readlane_b32 s88, v40, 36
+; DAGISEL64-NEXT:    v_readlane_b32 s79, v40, 35
+; DAGISEL64-NEXT:    v_readlane_b32 s78, v40, 34
+; DAGISEL64-NEXT:    v_readlane_b32 s77, v40, 33
+; DAGISEL64-NEXT:    v_readlane_b32 s76, v40, 32
+; DAGISEL64-NEXT:    v_readlane_b32 s75, v40, 31
+; DAGISEL64-NEXT:    v_readlane_b32 s74, v40, 30
+; DAGISEL64-NEXT:    v_readlane_b32 s73, v40, 29
+; DAGISEL64-NEXT:    v_readlane_b32 s72, v40, 28
+; DAGISEL64-NEXT:    v_readlane_b32 s31, v40, 27
+; DAGISEL64-NEXT:    v_readlane_b32 s30, v40, 26
+; DAGISEL64-NEXT:    v_readlane_b32 s29, v40, 25
+; DAGISEL64-NEXT:    v_readlane_b32 s28, v40, 24
+; DAGISEL64-NEXT:    v_readlane_b32 s27, v40, 23
+; DAGISEL64-NEXT:    v_readlane_b32 s26, v40, 22
+; DAGISEL64-NEXT:    v_readlane_b32 s25, v40, 21
+; DAGISEL64-NEXT:    v_readlane_b32 s24, v40, 20
+; DAGISEL64-NEXT:    v_readlane_b32 s23, v40, 19
+; DAGISEL64-NEXT:    v_readlane_b32 s22, v40, 18
+; DAGISEL64-NEXT:    v_readlane_b32 s21, v40, 17
+; DAGISEL64-NEXT:    v_readlane_b32 s20, v40, 16
+; DAGISEL64-NEXT:    v_readlane_b32 s19, v40, 15
+; DAGISEL64-NEXT:    v_readlane_b32 s18, v40, 14
+; DAGISEL64-NEXT:    v_readlane_b32 s17, v40, 13
+; DAGISEL64-NEXT:    v_readlane_b32 s16, v40, 12
+; DAGISEL64-NEXT:    v_readlane_b32 s15, v40, 11
+; DAGISEL64-NEXT:    v_readlane_b32 s14, v40, 10
+; DAGISEL64-NEXT:    v_readlane_b32 s13, v40, 9
+; DAGISEL64-NEXT:    v_readlane_b32 s12, v40, 8
+; DAGISEL64-NEXT:    v_readlane_b32 s11, v40, 7
+; DAGISEL64-NEXT:    v_readlane_b32 s10, v40, 6
+; DAGISEL64-NEXT:    v_readlane_b32 s9, v40, 5
+; DAGISEL64-NEXT:    v_readlane_b32 s8, v40, 4
+; DAGISEL64-NEXT:    v_readlane_b32 s7, v40, 3
+; DAGISEL64-NEXT:    v_readlane_b32 s6, v40, 2
+; DAGISEL64-NEXT:    v_readlane_b32 s5, v40, 1
+; DAGISEL64-NEXT:    v_readlane_b32 s4, v40, 0
+; DAGISEL64-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
+; DAGISEL64-NEXT:    s_mov_b32 s32, s33
+; DAGISEL64-NEXT:    s_xor_b64 exec, s[34:35], -1
+; DAGISEL64-NEXT:    s_clause 0x1f
+; DAGISEL64-NEXT:    scratch_load_b32 v0, off, s33 offset:4
+; DAGISEL64-NEXT:    scratch_load_b32 v1, off, s33 offset:8
+; DAGISEL64-NEXT:    scratch_load_b32 v2, off, s33 offset:12
+; DAGISEL64-NEXT:    scratch_load_b32 v3, off, s33 offset:16
+; DAGISEL64-NEXT:    scratch_load_b32 v4, off, s33 offset:20
+; DAGISEL64-NEXT:    scratch_load_b32 v5, off, s33 offset:24
+; DAGISEL64-NEXT:    scratch_load_b32 v6, off, s33 offset:28
+; DAGISEL64-NEXT:    scratch_load_b32 v7, off, s33 offset:32
+; DAGISEL64-NEXT:    scratch_load_b32 v8, off, s33 offset:36
+; DAGISEL64-NEXT:    scratch_load_b32 v9, off, s33 offset:40
+; DAGISEL64-NEXT:    scratch_load_b32 v10, off, s33 offset:44
+; DAGISEL64-NEXT:    scratch_load_b32 v11, off, s33 offset:48
+; DAGISEL64-NEXT:    scratch_load_b32 v12, off, s33 offset:52
+; DAGISEL64-NEXT:    scratch_load_b32 v13, off, s33 offset:56
+; DAGISEL64-NEXT:    scratch_load_b32 v14, off, s33 offset:60
+; DAGISEL64-NEXT:    scratch_load_b32 v15, off, s33 offset:64
+; DAGISEL64-NEXT:    scratch_load_b32 v16, off, s33 offset:68
+; DAGISEL64-NEXT:    scratch_load_b32 v17, off, s33 offset:72
+; DAGISEL64-NEXT:    scratch_load_b32 v18, off, s33 offset:76
+; DAGISEL64-NEXT:    scratch_load_b32 v19, off, s33 offset:80
+; DAGISEL64-NEXT:    scratch_load_b32 v20, off, s33 offset:84
+; DAGISEL64-NEXT:    scratch_load_b32 v21, off, s33 offset:88
+; DAGISEL64-NEXT:    scratch_load_b32 v22, off, s33 offset:92
+; DAGISEL64-NEXT:    scratch_load_b32 v23, off, s33 offset:96
+; DAGISEL64-NEXT:    scratch_load_b32 v24, off, s33 offset:100
+; DAGISEL64-NEXT:    scratch_load_b32 v25, off, s33 offset:104
+; DAGISEL64-NEXT:    scratch_load_b32 v26, off, s33 offset:108
+; DAGISEL64-NEXT:    scratch_load_b32 v27, off, s33 offset:112
+; DAGISEL64-NEXT:    scratch_load_b32 v28, off, s33 offset:116
+; DAGISEL64-NEXT:    scratch_load_b32 v29, off, s33 offset:120
+; DAGISEL64-NEXT:    scratch_load_b32 v30, off, s33 offset:124
+; DAGISEL64-NEXT:    scratch_load_b32 v31, off, s33 offset:128
+; DAGISEL64-NEXT:    s_clause 0x1f
+; DAGISEL64-NEXT:    scratch_load_b32 v32, off, s33 offset:132
+; DAGISEL64-NEXT:    scratch_load_b32 v33, off, s33 offset:136
+; DAGISEL64-NEXT:    scratch_load_b32 v34, off, s33 offset:140
+; DAGISEL64-NEXT:    scratch_load_b32 v35, off, s33 offset:144
+; DAGISEL64-NEXT:    scratch_load_b32 v36, off, s33 offset:148
+; DAGISEL64-NEXT:    scratch_load_b32 v37, off, s33 offset:152
+; DAGISEL64-NEXT:    scratch_load_b32 v38, off, s33 offset:156
+; DAGISEL64-NEXT:    scratch_load_b32 v39, off, s33 offset:160
+; DAGISEL64-NEXT:    scratch_load_b32 v48, off, s33 offset:164
+; DAGISEL64-NEXT:    scratch_load_b32 v49, off, s33 offset:168
+; DAGISEL64-NEXT:    scratch_load_b32 v50, off, s33 offset:172
+; DAGISEL64-NEXT:    scratch_load_b32 v51, off, s33 offset:176
+; DAGISEL64-NEXT:    scratch_load_b32 v52, off, s33 offset:180
+; DAGISEL64-NEXT:    scratch_load_b32 v53, off, s33 offset:184
+; DAGISEL64-NEXT:    scratch_load_b32 v54, off, s33 offset:188
+; DAGISEL64-NEXT:    scratch_load_b32 v55, off, s33 offset:192
+; DAGISEL64-NEXT:    scratch_load_b32 v64, off, s33 offset:196
+; DAGISEL64-NEXT:    scratch_load_b32 v65, off, s33 offset:200
+; DAGISEL64-NEXT:    scratch_load_b32 v66, off, s33 offset:204
+; DAGISEL64-NEXT:    scratch_load_b32 v67, off, s33 offset:208
+; DAGISEL64-NEXT:    scratch_load_b32 v68, off, s33 offset:212
+; DAGISEL64-NEXT:    scratch_load_b32 v69, off, s33 offset:216
+; DAGISEL64-NEXT:    scratch_load_b32 v70, off, s33 offset:220
+; DAGISEL64-NEXT:    scratch_load_b32 v71, off, s33 offset:224
+; DAGISEL64-NEXT:    scratch_load_b32 v80, off, s33 offset:228
+; DAGISEL64-NEXT:    scratch_load_b32 v81, off, s33 offset:232
+; DAGISEL64-NEXT:    scratch_load_b32 v82, off, s33 offset:236
+; DAGISEL64-NEXT:    scratch_load_b32 v83, off, s33 offset:240
+; DAGISEL64-NEXT:    scratch_load_b32 v84, off, s33 offset:244
+; DAGISEL64-NEXT:    scratch_load_b32 v85, off, s33 offset:248
+; DAGISEL64-NEXT:    scratch_load_b32 v86, off, s33 offset:252
+; DAGISEL64-NEXT:    scratch_load_b32 v87, off, s33 offset:256
+; DAGISEL64-NEXT:    s_clause 0x1f
+; DAGISEL64-NEXT:    scratch_load_b32 v96, off, s33 offset:260
+; DAGISEL64-NEXT:    scratch_load_b32 v97, off, s33 offset:264
+; DAGISEL64-NEXT:    scratch_load_b32 v98, off, s33 offset:268
+; DAGISEL64-NEXT:    scratch_load_b32 v99, off, s33 offset:272
+; DAGISEL64-NEXT:    scratch_load_b32 v100, off, s33 offset:276
+; DAGISEL64-NEXT:    scratch_load_b32 v101, off, s33 offset:280
+; DAGISEL64-NEXT:    scratch_load_b32 v102, off, s33 offset:284
+; DAGISEL64-NEXT:    scratch_load_b32 v103, off, s33 offset:288
+; DAGISEL64-NEXT:    scratch_load_b32 v112, off, s33 offset:292
+; DAGISEL64-NEXT:    scratch_load_b32 v113, off, s33 offset:296
+; DAGISEL64-NEXT:    scratch_load_b32 v114, off, s33 offset:300
+; DAGISEL64-NEXT:    scratch_load_b32 v115, off, s33 offset:304
+; DAGISEL64-NEXT:    scratch_load_b32 v116, off, s33 offset:308
+; DAGISEL64-NEXT:    scratch_load_b32 v117, off, s33 offset:312
+; DAGISEL64-NEXT:    scratch_load_b32 v118, off, s33 offset:316
+; DAGISEL64-NEXT:    scratch_load_b32 v119, off, s33 offset:320
+; DAGISEL64-NEXT:    scratch_load_b32 v128, off, s33 offset:324
+; DAGISEL64-NEXT:    scratch_load_b32 v129, off, s33 offset:328
+; DAGISEL64-NEXT:    scratch_load_b32 v130, off, s33 offset:332
+; DAGISEL64-NEXT:    scratch_load_b32 v131, off, s33 offset:336
+; DAGISEL64-NEXT:    scratch_load_b32 v132, off, s33 offset:340
+; DAGISEL64-NEXT:    scratch_load_b32 v133, off, s33 offset:344
+; DAGISEL64-NEXT:    scratch_load_b32 v134, off, s33 offset:348
+; DAGISEL64-NEXT:    scratch_load_b32 v135, off, s33 offset:352
+; DAGISEL64-NEXT:    scratch_load_b32 v144, off, s33 offset:356
+; DAGISEL64-NEXT:    scratch_load_b32 v145, off, s33 offset:360
+; DAGISEL64-NEXT:    scratch_load_b32 v146, off, s33 offset:364
+; DAGISEL64-NEXT:    scratch_load_b32 v147, off, s33 offset:368
+; DAGISEL64-NEXT:    scratch_load_b32 v148, off, s33 offset:372
+; DAGISEL64-NEXT:    scratch_load_b32 v149, off, s33 offset:376
+; DAGISEL64-NEXT:    scratch_load_b32 v150, off, s33 offset:380
+; DAGISEL64-NEXT:    scratch_load_b32 v151, off, s33 offset:384
+; DAGISEL64-NEXT:    s_clause 0x1f
+; DAGISEL64-NEXT:    scratch_load_b32 v160, off, s33 offset:388
+; DAGISEL64-NEXT:    scratch_load_b32 v161, off, s33 offset:392
+; DAGISEL64-NEXT:    scratch_load_b32 v162, off, s33 offset:396
+; DAGISEL64-NEXT:    scratch_load_b32 v163, off, s33 offset:400
+; DAGISEL64-NEXT:    scratch_load_b32 v164, off, s33 offset:404
+; DAGISEL64-NEXT:    scratch_load_b32 v165, off, s33 offset:408
+; DAGISEL64-NEXT:    scratch_load_b32 v166, off, s33 offset:412
+; DAGISEL64-NEXT:    scratch_load_b32 v167, off, s33 offset:416
+; DAGISEL64-NEXT:    scratch_load_b32 v176, off, s33 offset:420
+; DAGISEL64-NEXT:    scratch_load_b32 v177, off, s33 offset:424
+; DAGISEL64-NEXT:    scratch_load_b32 v178, off, s33 offset:428
+; DAGISEL64-NEXT:    scratch_load_b32 v179, off, s33 offset:432
+; DAGISEL64-NEXT:    scratch_load_b32 v180, off, s33 offset:436
+; DAGISEL64-NEXT:    scratch_load_b32 v181, off, s33 offset:440
+; DAGISEL64-NEXT:    scratch_load_b32 v182, off, s33 offset:444
+; DAGISEL64-NEXT:    scratch_load_b32 v183, off, s33 offset:448
+; DAGISEL64-NEXT:    scratch_load_b32 v192, off, s33 offset:452
+; DAGISEL64-NEXT:    scratch_load_b32 v193, off, s33 offset:456
+; DAGISEL64-NEXT:    scratch_load_b32 v194, off, s33 offset:460
+; DAGISEL64-NEXT:    scratch_load_b32 v195, off, s33 offset:464
+; DAGISEL64-NEXT:    scratch_load_b32 v196, off, s33 offset:468
+; DAGISEL64-NEXT:    scratch_load_b32 v197, off, s33 offset:472
+; DAGISEL64-NEXT:    scratch_load_b32 v198, off, s33 offset:476
+; DAGISEL64-NEXT:    scratch_load_b32 v199, off, s33 offset:480
+; DAGISEL64-NEXT:    scratch_load_b32 v208, off, s33 offset:484
+; DAGISEL64-NEXT:    scratch_load_b32 v209, off, s33 offset:488
+; DAGISEL64-NEXT:    scratch_load_b32 v210, off, s33 offset:492
+; DAGISEL64-NEXT:    scratch_load_b32 v211, off, s33 offset:496
+; DAGISEL64-NEXT:    scratch_load_b32 v212, off, s33 offset:500
+; DAGISEL64-NEXT:    scratch_load_b32 v213, off, s33 offset:504
+; DAGISEL64-NEXT:    scratch_load_b32 v214, off, s33 offset:508
+; DAGISEL64-NEXT:    scratch_load_b32 v215, off, s33 offset:512
+; DAGISEL64-NEXT:    s_clause 0xf
+; DAGISEL64-NEXT:    scratch_load_b32 v224, off, s33 offset:516
+; DAGISEL64-NEXT:    scratch_load_b32 v225, off, s33 offset:520
+; DAGISEL64-NEXT:    scratch_load_b32 v226, off, s33 offset:524
+; DAGISEL64-NEXT:    scratch_load_b32 v227, off, s33 offset:528
+; DAGISEL64-NEXT:    scratch_load_b32 v228, off, s33 offset:532
+; DAGISEL64-NEXT:    scratch_load_b32 v229, off, s33 offset:536
+; DAGISEL64-NEXT:    scratch_load_b32 v230, off, s33 offset:540
+; DAGISEL64-NEXT:    scratch_load_b32 v231, off, s33 offset:544
+; DAGISEL64-NEXT:    scratch_load_b32 v240, off, s33 offset:548
+; DAGISEL64-NEXT:    scratch_load_b32 v241, off, s33 offset:552
+; DAGISEL64-NEXT:    scratch_load_b32 v242, off, s33 offset:556
+; DAGISEL64-NEXT:    scratch_load_b32 v243, off, s33 offset:560
+; DAGISEL64-NEXT:    scratch_load_b32 v244, off, s33 offset:564
+; DAGISEL64-NEXT:    scratch_load_b32 v245, off, s33 offset:568
+; DAGISEL64-NEXT:    scratch_load_b32 v246, off, s33 offset:572
+; DAGISEL64-NEXT:    scratch_load_b32 v247, off, s33 offset:576
+; DAGISEL64-NEXT:    s_mov_b64 exec, s[34:35]
+; DAGISEL64-NEXT:    s_mov_b32 s33, s36
+; DAGISEL64-NEXT:    s_wait_loadcnt 0x0
+; DAGISEL64-NEXT:    s_wait_alu 0xfffe
+; DAGISEL64-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL64-LABEL: tail_call_gfx_from_whole_wave:
+; GISEL64:       ; %bb.0:
+; GISEL64-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL64-NEXT:    s_wait_expcnt 0x0
+; GISEL64-NEXT:    s_wait_samplecnt 0x0
+; GISEL64-NEXT:    s_wait_bvhcnt 0x0
+; GISEL64-NEXT:    s_wait_kmcnt 0x0
+; GISEL64-NEXT:    s_mov_b32 s36, s33
+; GISEL64-NEXT:    s_mov_b32 s33, s32
+; GISEL64-NEXT:    s_xor_saveexec_b64 s[34:35], -1
+; GISEL64-NEXT:    s_clause 0x1f
+; GISEL64-NEXT:    scratch_store_b32 off, v0, s33 offset:4
+; GISEL64-NEXT:    scratch_store_b32 off, v1, s33 offset:8
+; GISEL64-NEXT:    scratch_store_b32 off, v2, s33 offset:12
+; GISEL64-NEXT:    scratch_store_b32 off, v3, s33 offset:16
+; GISEL64-NEXT:    scratch_store_b32 off, v4, s33 offset:20
+; GISEL64-NEXT:    scratch_store_b32 off, v5, s33 offset:24
+; GISEL64-NEXT:    scratch_store_b32 off, v6, s33 offset:28
+; GISEL64-NEXT:    scratch_store_b32 off, v7, s33 offset:32
+; GISEL64-NEXT:    scratch_store_b32 off, v8, s33 offset:36
+; GISEL64-NEXT:    scratch_store_b32 off, v9, s33 offset:40
+; GISEL64-NEXT:    scratch_store_b32 off, v10, s33 offset:44
+; GISEL64-NEXT:    scratch_store_b32 off, v11, s33 offset:48
+; GISEL64-NEXT:    scratch_store_b32 off, v12, s33 offset:52
+; GISEL64-NEXT:    scratch_store_b32 off, v13, s33 offset:56
+; GISEL64-NEXT:    scratch_store_b32 off, v14, s33 offset:60
+; GISEL64-NEXT:    scratch_store_b32 off, v15, s33 offset:64
+; GISEL64-NEXT:    scratch_store_b32 off, v16, s33 offset:68
+; GISEL64-NEXT:    scratch_store_b32 off, v17, s33 offset:72
+; GISEL64-NEXT:    scratch_store_b32 off, v18, s33 offset:76
+; GISEL64-NEXT:    scratch_store_b32 off, v19, s33 offset:80
+; GISEL64-NEXT:    scratch_store_b32 off, v20, s33 offset:84
+; GISEL64-NEXT:    scratch_store_b32 off, v21, s33 offset:88
+; GISEL64-NEXT:    scratch_store_b32 off, v22, s33 offset:92
+; GISEL64-NEXT:    scratch_store_b32 off, v23, s33 offset:96
+; GISEL64-NEXT:    scratch_store_b32 off, v24, s33 offset:100
+; GISEL64-NEXT:    scratch_store_b32 off, v25, s33 offset:104
+; GISEL64-NEXT:    scratch_store_b32 off, v26, s33 offset:108
+; GISEL64-NEXT:    scratch_store_b32 off, v27, s33 offset:112
+; GISEL64-NEXT:    scratch_store_b32 off, v28, s33 offset:116
+; GISEL64-NEXT:    scratch_store_b32 off, v29, s33 offset:120
+; GISEL64-NEXT:    scratch_store_b32 off, v30, s33 offset:124
+; GISEL64-NEXT:    scratch_store_b32 off, v31, s33 offset:128
+; GISEL64-NEXT:    s_clause 0x1f
+; GISEL64-NEXT:    scratch_store_b32 off, v32, s33 offset:132
+; GISEL64-NEXT:    scratch_store_b32 off, v33, s33 offset:136
+; GISEL64-NEXT:    scratch_store_b32 off, v34, s33 offset:140
+; GISEL64-NEXT:    scratch_store_b32 off, v35, s33 offset:144
+; GISEL64-NEXT:    scratch_store_b32 off, v36, s33 offset:148
+; GISEL64-NEXT:    scratch_store_b32 off, v37, s33 offset:152
+; GISEL64-NEXT:    scratch_store_b32 off, v38, s33 offset:156
+; GISEL64-NEXT:    scratch_store_b32 off, v39, s33 offset:160
+; GISEL64-NEXT:    scratch_store_b32 off, v48, s33 offset:164
+; GISEL64-NEXT:    scratch_store_b32 off, v49, s33 offset:168
+; GISEL64-NEXT:    scratch_store_b32 off, v50, s33 offset:172
+; GISEL64-NEXT:    scratch_store_b32 off, v51, s33 offset:176
+; GISEL64-NEXT:    scratch_store_b32 off, v52, s33 offset:180
+; GISEL64-NEXT:    scratch_store_b32 off, v53, s33 offset:184
+; GISEL64-NEXT:    scratch_store_b32 off, v54, s33 offset:188
+; GISEL64-NEXT:    scratch_store_b32 off, v55, s33 offset:192
+; GISEL64-NEXT:    scratch_store_b32 off, v64, s33 offset:196
+; GISEL64-NEXT:    scratch_store_b32 off, v65, s33 offset:200
+; GISEL64-NEXT:    scratch_store_b32 off, v66, s33 offset:204
+; GISEL64-NEXT:    scratch_store_b32 off, v67, s33 offset:208
+; GISEL64-NEXT:    scratch_store_b32 off, v68, s33 offset:212
+; GISEL64-NEXT:    scratch_store_b32 off, v69, s33 offset:216
+; GISEL64-NEXT:    scratch_store_b32 off, v70, s33 offset:220
+; GISEL64-NEXT:    scratch_store_b32 off, v71, s33 offset:224
+; GISEL64-NEXT:    scratch_store_b32 off, v80, s33 offset:228
+; GISEL64-NEXT:    scratch_store_b32 off, v81, s33 offset:232
+; GISEL64-NEXT:    scratch_store_b32 off, v82, s33 offset:236
+; GISEL64-NEXT:    scratch_store_b32 off, v83, s33 offset:240
+; GISEL64-NEXT:    scratch_store_b32 off, v84, s33 offset:244
+; GISEL64-NEXT:    scratch_store_b32 off, v85, s33 offset:248
+; GISEL64-NEXT:    scratch_store_b32 off, v86, s33 offset:252
+; GISEL64-NEXT:    scratch_store_b32 off, v87, s33 offset:256
+; GISEL64-NEXT:    s_clause 0x1f
+; GISEL64-NEXT:    scratch_store_b32 off, v96, s33 offset:260
+; GISEL64-NEXT:    scratch_store_b32 off, v97, s33 offset:264
+; GISEL64-NEXT:    scratch_store_b32 off, v98, s33 offset:268
+; GISEL64-NEXT:    scratch_store_b32 off, v99, s33 offset:272
+; GISEL64-NEXT:    scratch_store_b32 off, v100, s33 offset:276
+; GISEL64-NEXT:    scratch_store_b32 off, v101, s33 offset:280
+; GISEL64-NEXT:    scratch_store_b32 off, v102, s33 offset:284
+; GISEL64-NEXT:    scratch_store_b32 off, v103, s33 offset:288
+; GISEL64-NEXT:    scratch_store_b32 off, v112, s33 offset:292
+; GISEL64-NEXT:    scratch_store_b32 off, v113, s33 offset:296
+; GISEL64-NEXT:    scratch_store_b32 off, v114, s33 offset:300
+; GISEL64-NEXT:    scratch_store_b32 off, v115, s33 offset:304
+; GISEL64-NEXT:    scratch_store_b32 off, v116, s33 offset:308
+; GISEL64-NEXT:    scratch_store_b32 off, v117, s33 offset:312
+; GISEL64-NEXT:    scratch_store_b32 off, v118, s33 offset:316
+; GISEL64-NEXT:    scratch_store_b32 off, v119, s33 offset:320
+; GISEL64-NEXT:    scratch_store_b32 off, v128, s33 offset:324
+; GISEL64-NEXT:    scratch_store_b32 off, v129, s33 offset:328
+; GISEL64-NEXT:    scratch_store_b32 off, v130, s33 offset:332
+; GISEL64-NEXT:    scratch_store_b32 off, v131, s33 offset:336
+; GISEL64-NEXT:    scratch_store_b32 off, v132, s33 offset:340
+; GISEL64-NEXT:    scratch_store_b32 off, v133, s33 offset:344
+; GISEL64-NEXT:    scratch_store_b32 off, v134, s33 offset:348
+; GISEL64-NEXT:    scratch_store_b32 off, v135, s33 offset:352
+; GISEL64-NEXT:    scratch_store_b32 off, v144, s33 offset:356
+; GISEL64-NEXT:    scratch_store_b32 off, v145, s33 offset:360
+; GISEL64-NEXT:    scratch_store_b32 off, v146, s33 offset:364
+; GISEL64-NEXT:    scratch_store_b32 off, v147, s33 offset:368
+; GISEL64-NEXT:    scratch_store_b32 off, v148, s33 offset:372
+; GISEL64-NEXT:    scratch_store_b32 off, v149, s33 offset:376
+; GISEL64-NEXT:    scratch_store_b32 off, v150, s33 offset:380
+; GISEL64-NEXT:    scratch_store_b32 off, v151, s33 offset:384
+; GISEL64-NEXT:    s_clause 0x1f
+; GISEL64-NEXT:    scratch_store_b32 off, v160, s33 offset:388
+; GISEL64-NEXT:    scratch_store_b32 off, v161, s33 offset:392
+; GISEL64-NEXT:    scratch_store_b32 off, v162, s33 offset:396
+; GISEL64-NEXT:    scratch_store_b32 off, v163, s33 offset:400
+; GISEL64-NEXT:    scratch_store_b32 off, v164, s33 offset:404
+; GISEL64-NEXT:    scratch_store_b32 off, v165, s33 offset:408
+; GISEL64-NEXT:    scratch_store_b32 off, v166, s33 offset:412
+; GISEL64-NEXT:    scratch_store_b32 off, v167, s33 offset:416
+; GISEL64-NEXT:    scratch_store_b32 off, v176, s33 offset:420
+; GISEL64-NEXT:    scratch_store_b32 off, v177, s33 offset:424
+; GISEL64-NEXT:    scratch_store_b32 off, v178, s33 offset:428
+; GISEL64-NEXT:    scratch_store_b32 off, v179, s33 offset:432
+; GISEL64-NEXT:    scratch_store_b32 off, v180, s33 offset:436
+; GISEL64-NEXT:    scratch_store_b32 off, v181, s33 offset:440
+; GISEL64-NEXT:    scratch_store_b32 off, v182, s33 offset:444
+; GISEL64-NEXT:    scratch_store_b32 off, v183, s33 offset:448
+; GISEL64-NEXT:    scratch_store_b32 off, v192, s33 offset:452
+; GISEL64-NEXT:    scratch_store_b32 off, v193, s33 offset:456
+; GISEL64-NEXT:    scratch_store_b32 off, v194, s33 offset:460
+; GISEL64-NEXT:    scratch_store_b32 off, v195, s33 offset:464
+; GISEL64-NEXT:    scratch_store_b32 off, v196, s33 offset:468
+; GISEL64-NEXT:    scratch_store_b32 off, v197, s33 offset:472
+; GISEL64-NEXT:    scratch_store_b32 off, v198, s33 offset:476
+; GISEL64-NEXT:    scratch_store_b32 off, v199, s33 offset:480
+; GISEL64-NEXT:    scratch_store_b32 off, v208, s33 offset:484
+; GISEL64-NEXT:    scratch_store_b32 off, v209, s33 offset:488
+; GISEL64-NEXT:    scratch_store_b32 off, v210, s33 offset:492
+; GISEL64-NEXT:    scratch_store_b32 off, v211, s33 offset:496
+; GISEL64-NEXT:    scratch_store_b32 off, v212, s33 offset:500
+; GISEL64-NEXT:    scratch_store_b32 off, v213, s33 offset:504
+; GISEL64-NEXT:    scratch_store_b32 off, v214, s33 offset:508
+; GISEL64-NEXT:    scratch_store_b32 off, v215, s33 offset:512
+; GISEL64-NEXT:    s_clause 0xf
+; GISEL64-NEXT:    scratch_store_b32 off, v224, s33 offset:516
+; GISEL64-NEXT:    scratch_store_b32 off, v225, s33 offset:520
+; GISEL64-NEXT:    scratch_store_b32 off, v226, s33 offset:524
+; GISEL64-NEXT:    scratch_store_b32 off, v227, s33 offset:528
+; GISEL64-NEXT:    scratch_store_b32 off, v228, s33 offset:532
+; GISEL64-NEXT:    scratch_store_b32 off, v229, s33 offset:536
+; GISEL64-NEXT:    scratch_store_b32 off, v230, s33 offset:540
+; GISEL64-NEXT:    scratch_store_b32 off, v231, s33 offset:544
+; GISEL64-NEXT:    scratch_store_b32 off, v240, s33 offset:548
+; GISEL64-NEXT:    scratch_store_b32 off, v241, s33 offset:552
+; GISEL64-NEXT:    scratch_store_b32 off, v242, s33 offset:556
+; GISEL64-NEXT:    scratch_store_b32 off, v243, s33 offset:560
+; GISEL64-NEXT:    scratch_store_b32 off, v244, s33 offset:564
+; GISEL64-NEXT:    scratch_store_b32 off, v245, s33 offset:568
+; GISEL64-NEXT:    scratch_store_b32 off, v246, s33 offset:572
+; GISEL64-NEXT:    scratch_store_b32 off, v247, s33 offset:576
+; GISEL64-NEXT:    s_mov_b64 exec, -1
+; GISEL64-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
+; GISEL64-NEXT:    v_writelane_b32 v40, s4, 0
+; GISEL64-NEXT:    v_mov_b32_e32 v2, v0
+; GISEL64-NEXT:    v_swap_b32 v0, v1
+; GISEL64-NEXT:    s_mov_b32 s0, gfx_callee at abs32@lo
+; GISEL64-NEXT:    v_writelane_b32 v40, s5, 1
+; GISEL64-NEXT:    s_mov_b32 s1, gfx_callee at abs32@hi
+; GISEL64-NEXT:    s_addk_co_i32 s32, 0x250
+; GISEL64-NEXT:    v_writelane_b32 v40, s6, 2
+; GISEL64-NEXT:    v_writelane_b32 v40, s7, 3
+; GISEL64-NEXT:    v_writelane_b32 v40, s8, 4
+; GISEL64-NEXT:    v_writelane_b32 v40, s9, 5
+; GISEL64-NEXT:    s_mov_b64 s[8:9], 0
+; GISEL64-NEXT:    v_writelane_b32 v40, s10, 6
+; GISEL64-NEXT:    v_writelane_b32 v40, s11, 7
+; GISEL64-NEXT:    v_writelane_b32 v40, s12, 8
+; GISEL64-NEXT:    v_writelane_b32 v40, s13, 9
+; GISEL64-NEXT:    v_writelane_b32 v40, s14, 10
+; GISEL64-NEXT:    v_writelane_b32 v40, s15, 11
+; GISEL64-NEXT:    v_writelane_b32 v40, s16, 12
+; GISEL64-NEXT:    v_writelane_b32 v40, s17, 13
+; GISEL64-NEXT:    v_writelane_b32 v40, s18, 14
+; GISEL64-NEXT:    v_writelane_b32 v40, s19, 15
+; GISEL64-NEXT:    v_writelane_b32 v40, s20, 16
+; GISEL64-NEXT:    v_writelane_b32 v40, s21, 17
+; GISEL64-NEXT:    v_writelane_b32 v40, s22, 18
+; GISEL64-NEXT:    v_writelane_b32 v40, s23, 19
+; GISEL64-NEXT:    v_writelane_b32 v40, s24, 20
+; GISEL64-NEXT:    v_writelane_b32 v40, s25, 21
+; GISEL64-NEXT:    v_writelane_b32 v40, s26, 22
+; GISEL64-NEXT:    v_writelane_b32 v40, s27, 23
+; GISEL64-NEXT:    v_writelane_b32 v40, s28, 24
+; GISEL64-NEXT:    v_writelane_b32 v40, s29, 25
+; GISEL64-NEXT:    v_writelane_b32 v40, s30, 26
+; GISEL64-NEXT:    v_writelane_b32 v40, s31, 27
+; GISEL64-NEXT:    v_writelane_b32 v40, s72, 28
+; GISEL64-NEXT:    v_writelane_b32 v40, s73, 29
+; GISEL64-NEXT:    v_writelane_b32 v40, s74, 30
+; GISEL64-NEXT:    v_writelane_b32 v40, s75, 31
+; GISEL64-NEXT:    v_writelane_b32 v40, s76, 32
+; GISEL64-NEXT:    v_writelane_b32 v40, s77, 33
+; GISEL64-NEXT:    v_writelane_b32 v40, s78, 34
+; GISEL64-NEXT:    v_writelane_b32 v40, s79, 35
+; GISEL64-NEXT:    v_writelane_b32 v40, s88, 36
+; GISEL64-NEXT:    v_writelane_b32 v40, s89, 37
+; GISEL64-NEXT:    v_writelane_b32 v40, s90, 38
+; GISEL64-NEXT:    v_writelane_b32 v40, s91, 39
+; GISEL64-NEXT:    v_writelane_b32 v40, s92, 40
+; GISEL64-NEXT:    v_writelane_b32 v40, s93, 41
+; GISEL64-NEXT:    v_writelane_b32 v40, s94, 42
+; GISEL64-NEXT:    v_writelane_b32 v40, s95, 43
+; GISEL64-NEXT:    s_wait_alu 0xfffe
+; GISEL64-NEXT:    s_swappc_b64 s[30:31], s[0:1]
+; GISEL64-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL64-NEXT:    v_readlane_b32 s95, v40, 43
+; GISEL64-NEXT:    v_readlane_b32 s94, v40, 42
+; GISEL64-NEXT:    v_readlane_b32 s93, v40, 41
+; GISEL64-NEXT:    v_readlane_b32 s92, v40, 40
+; GISEL64-NEXT:    v_readlane_b32 s91, v40, 39
+; GISEL64-NEXT:    v_readlane_b32 s90, v40, 38
+; GISEL64-NEXT:    v_readlane_b32 s89, v40, 37
+; GISEL64-NEXT:    v_readlane_b32 s88, v40, 36
+; GISEL64-NEXT:    v_readlane_b32 s79, v40, 35
+; GISEL64-NEXT:    v_readlane_b32 s78, v40, 34
+; GISEL64-NEXT:    v_readlane_b32 s77, v40, 33
+; GISEL64-NEXT:    v_readlane_b32 s76, v40, 32
+; GISEL64-NEXT:    v_readlane_b32 s75, v40, 31
+; GISEL64-NEXT:    v_readlane_b32 s74, v40, 30
+; GISEL64-NEXT:    v_readlane_b32 s73, v40, 29
+; GISEL64-NEXT:    v_readlane_b32 s72, v40, 28
+; GISEL64-NEXT:    v_readlane_b32 s31, v40, 27
+; GISEL64-NEXT:    v_readlane_b32 s30, v40, 26
+; GISEL64-NEXT:    v_readlane_b32 s29, v40, 25
+; GISEL64-NEXT:    v_readlane_b32 s28, v40, 24
+; GISEL64-NEXT:    v_readlane_b32 s27, v40, 23
+; GISEL64-NEXT:    v_readlane_b32 s26, v40, 22
+; GISEL64-NEXT:    v_readlane_b32 s25, v40, 21
+; GISEL64-NEXT:    v_readlane_b32 s24, v40, 20
+; GISEL64-NEXT:    v_readlane_b32 s23, v40, 19
+; GISEL64-NEXT:    v_readlane_b32 s22, v40, 18
+; GISEL64-NEXT:    v_readlane_b32 s21, v40, 17
+; GISEL64-NEXT:    v_readlane_b32 s20, v40, 16
+; GISEL64-NEXT:    v_readlane_b32 s19, v40, 15
+; GISEL64-NEXT:    v_readlane_b32 s18, v40, 14
+; GISEL64-NEXT:    v_readlane_b32 s17, v40, 13
+; GISEL64-NEXT:    v_readlane_b32 s16, v40, 12
+; GISEL64-NEXT:    v_readlane_b32 s15, v40, 11
+; GISEL64-NEXT:    v_readlane_b32 s14, v40, 10
+; GISEL64-NEXT:    v_readlane_b32 s13, v40, 9
+; GISEL64-NEXT:    v_readlane_b32 s12, v40, 8
+; GISEL64-NEXT:    v_readlane_b32 s11, v40, 7
+; GISEL64-NEXT:    v_readlane_b32 s10, v40, 6
+; GISEL64-NEXT:    v_readlane_b32 s9, v40, 5
+; GISEL64-NEXT:    v_readlane_b32 s8, v40, 4
+; GISEL64-NEXT:    v_readlane_b32 s7, v40, 3
+; GISEL64-NEXT:    v_readlane_b32 s6, v40, 2
+; GISEL64-NEXT:    v_readlane_b32 s5, v40, 1
+; GISEL64-NEXT:    v_readlane_b32 s4, v40, 0
+; GISEL64-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
+; GISEL64-NEXT:    s_mov_b32 s32, s33
+; GISEL64-NEXT:    s_xor_b64 exec, s[34:35], -1
+; GISEL64-NEXT:    s_clause 0x1f
+; GISEL64-NEXT:    scratch_load_b32 v0, off, s33 offset:4
+; GISEL64-NEXT:    scratch_load_b32 v1, off, s33 offset:8
+; GISEL64-NEXT:    scratch_load_b32 v2, off, s33 offset:12
+; GISEL64-NEXT:    scratch_load_b32 v3, off, s33 offset:16
+; GISEL64-NEXT:    scratch_load_b32 v4, off, s33 offset:20
+; GISEL64-NEXT:    scratch_load_b32 v5, off, s33 offset:24
+; GISEL64-NEXT:    scratch_load_b32 v6, off, s33 offset:28
+; GISEL64-NEXT:    scratch_load_b32 v7, off, s33 offset:32
+; GISEL64-NEXT:    scratch_load_b32 v8, off, s33 offset:36
+; GISEL64-NEXT:    scratch_load_b32 v9, off, s33 offset:40
+; GISEL64-NEXT:    scratch_load_b32 v10, off, s33 offset:44
+; GISEL64-NEXT:    scratch_load_b32 v11, off, s33 offset:48
+; GISEL64-NEXT:    scratch_load_b32 v12, off, s33 offset:52
+; GISEL64-NEXT:    scratch_load_b32 v13, off, s33 offset:56
+; GISEL64-NEXT:    scratch_load_b32 v14, off, s33 offset:60
+; GISEL64-NEXT:    scratch_load_b32 v15, off, s33 offset:64
+; GISEL64-NEXT:    scratch_load_b32 v16, off, s33 offset:68
+; GISEL64-NEXT:    scratch_load_b32 v17, off, s33 offset:72
+; GISEL64-NEXT:    scratch_load_b32 v18, off, s33 offset:76
+; GISEL64-NEXT:    scratch_load_b32 v19, off, s33 offset:80
+; GISEL64-NEXT:    scratch_load_b32 v20, off, s33 offset:84
+; GISEL64-NEXT:    scratch_load_b32 v21, off, s33 offset:88
+; GISEL64-NEXT:    scratch_load_b32 v22, off, s33 offset:92
+; GISEL64-NEXT:    scratch_load_b32 v23, off, s33 offset:96
+; GISEL64-NEXT:    scratch_load_b32 v24, off, s33 offset:100
+; GISEL64-NEXT:    scratch_load_b32 v25, off, s33 offset:104
+; GISEL64-NEXT:    scratch_load_b32 v26, off, s33 offset:108
+; GISEL64-NEXT:    scratch_load_b32 v27, off, s33 offset:112
+; GISEL64-NEXT:    scratch_load_b32 v28, off, s33 offset:116
+; GISEL64-NEXT:    scratch_load_b32 v29, off, s33 offset:120
+; GISEL64-NEXT:    scratch_load_b32 v30, off, s33 offset:124
+; GISEL64-NEXT:    scratch_load_b32 v31, off, s33 offset:128
+; GISEL64-NEXT:    s_clause 0x1f
+; GISEL64-NEXT:    scratch_load_b32 v32, off, s33 offset:132
+; GISEL64-NEXT:    scratch_load_b32 v33, off, s33 offset:136
+; GISEL64-NEXT:    scratch_load_b32 v34, off, s33 offset:140
+; GISEL64-NEXT:    scratch_load_b32 v35, off, s33 offset:144
+; GISEL64-NEXT:    scratch_load_b32 v36, off, s33 offset:148
+; GISEL64-NEXT:    scratch_load_b32 v37, off, s33 offset:152
+; GISEL64-NEXT:    scratch_load_b32 v38, off, s33 offset:156
+; GISEL64-NEXT:    scratch_load_b32 v39, off, s33 offset:160
+; GISEL64-NEXT:    scratch_load_b32 v48, off, s33 offset:164
+; GISEL64-NEXT:    scratch_load_b32 v49, off, s33 offset:168
+; GISEL64-NEXT:    scratch_load_b32 v50, off, s33 offset:172
+; GISEL64-NEXT:    scratch_load_b32 v51, off, s33 offset:176
+; GISEL64-NEXT:    scratch_load_b32 v52, off, s33 offset:180
+; GISEL64-NEXT:    scratch_load_b32 v53, off, s33 offset:184
+; GISEL64-NEXT:    scratch_load_b32 v54, off, s33 offset:188
+; GISEL64-NEXT:    scratch_load_b32 v55, off, s33 offset:192
+; GISEL64-NEXT:    scratch_load_b32 v64, off, s33 offset:196
+; GISEL64-NEXT:    scratch_load_b32 v65, off, s33 offset:200
+; GISEL64-NEXT:    scratch_load_b32 v66, off, s33 offset:204
+; GISEL64-NEXT:    scratch_load_b32 v67, off, s33 offset:208
+; GISEL64-NEXT:    scratch_load_b32 v68, off, s33 offset:212
+; GISEL64-NEXT:    scratch_load_b32 v69, off, s33 offset:216
+; GISEL64-NEXT:    scratch_load_b32 v70, off, s33 offset:220
+; GISEL64-NEXT:    scratch_load_b32 v71, off, s33 offset:224
+; GISEL64-NEXT:    scratch_load_b32 v80, off, s33 offset:228
+; GISEL64-NEXT:    scratch_load_b32 v81, off, s33 offset:232
+; GISEL64-NEXT:    scratch_load_b32 v82, off, s33 offset:236
+; GISEL64-NEXT:    scratch_load_b32 v83, off, s33 offset:240
+; GISEL64-NEXT:    scratch_load_b32 v84, off, s33 offset:244
+; GISEL64-NEXT:    scratch_load_b32 v85, off, s33 offset:248
+; GISEL64-NEXT:    scratch_load_b32 v86, off, s33 offset:252
+; GISEL64-NEXT:    scratch_load_b32 v87, off, s33 offset:256
+; GISEL64-NEXT:    s_clause 0x1f
+; GISEL64-NEXT:    scratch_load_b32 v96, off, s33 offset:260
+; GISEL64-NEXT:    scratch_load_b32 v97, off, s33 offset:264
+; GISEL64-NEXT:    scratch_load_b32 v98, off, s33 offset:268
+; GISEL64-NEXT:    scratch_load_b32 v99, off, s33 offset:272
+; GISEL64-NEXT:    scratch_load_b32 v100, off, s33 offset:276
+; GISEL64-NEXT:    scratch_load_b32 v101, off, s33 offset:280
+; GISEL64-NEXT:    scratch_load_b32 v102, off, s33 offset:284
+; GISEL64-NEXT:    scratch_load_b32 v103, off, s33 offset:288
+; GISEL64-NEXT:    scratch_load_b32 v112, off, s33 offset:292
+; GISEL64-NEXT:    scratch_load_b32 v113, off, s33 offset:296
+; GISEL64-NEXT:    scratch_load_b32 v114, off, s33 offset:300
+; GISEL64-NEXT:    scratch_load_b32 v115, off, s33 offset:304
+; GISEL64-NEXT:    scratch_load_b32 v116, off, s33 offset:308
+; GISEL64-NEXT:    scratch_load_b32 v117, off, s33 offset:312
+; GISEL64-NEXT:    scratch_load_b32 v118, off, s33 offset:316
+; GISEL64-NEXT:    scratch_load_b32 v119, off, s33 offset:320
+; GISEL64-NEXT:    scratch_load_b32 v128, off, s33 offset:324
+; GISEL64-NEXT:    scratch_load_b32 v129, off, s33 offset:328
+; GISEL64-NEXT:    scratch_load_b32 v130, off, s33 offset:332
+; GISEL64-NEXT:    scratch_load_b32 v131, off, s33 offset:336
+; GISEL64-NEXT:    scratch_load_b32 v132, off, s33 offset:340
+; GISEL64-NEXT:    scratch_load_b32 v133, off, s33 offset:344
+; GISEL64-NEXT:    scratch_load_b32 v134, off, s33 offset:348
+; GISEL64-NEXT:    scratch_load_b32 v135, off, s33 offset:352
+; GISEL64-NEXT:    scratch_load_b32 v144, off, s33 offset:356
+; GISEL64-NEXT:    scratch_load_b32 v145, off, s33 offset:360
+; GISEL64-NEXT:    scratch_load_b32 v146, off, s33 offset:364
+; GISEL64-NEXT:    scratch_load_b32 v147, off, s33 offset:368
+; GISEL64-NEXT:    scratch_load_b32 v148, off, s33 offset:372
+; GISEL64-NEXT:    scratch_load_b32 v149, off, s33 offset:376
+; GISEL64-NEXT:    scratch_load_b32 v150, off, s33 offset:380
+; GISEL64-NEXT:    scratch_load_b32 v151, off, s33 offset:384
+; GISEL64-NEXT:    s_clause 0x1f
+; GISEL64-NEXT:    scratch_load_b32 v160, off, s33 offset:388
+; GISEL64-NEXT:    scratch_load_b32 v161, off, s33 offset:392
+; GISEL64-NEXT:    scratch_load_b32 v162, off, s33 offset:396
+; GISEL64-NEXT:    scratch_load_b32 v163, off, s33 offset:400
+; GISEL64-NEXT:    scratch_load_b32 v164, off, s33 offset:404
+; GISEL64-NEXT:    scratch_load_b32 v165, off, s33 offset:408
+; GISEL64-NEXT:    scratch_load_b32 v166, off, s33 offset:412
+; GISEL64-NEXT:    scratch_load_b32 v167, off, s33 offset:416
+; GISEL64-NEXT:    scratch_load_b32 v176, off, s33 offset:420
+; GISEL64-NEXT:    scratch_load_b32 v177, off, s33 offset:424
+; GISEL64-NEXT:    scratch_load_b32 v178, off, s33 offset:428
+; GISEL64-NEXT:    scratch_load_b32 v179, off, s33 offset:432
+; GISEL64-NEXT:    scratch_load_b32 v180, off, s33 offset:436
+; GISEL64-NEXT:    scratch_load_b32 v181, off, s33 offset:440
+; GISEL64-NEXT:    scratch_load_b32 v182, off, s33 offset:444
+; GISEL64-NEXT:    scratch_load_b32 v183, off, s33 offset:448
+; GISEL64-NEXT:    scratch_load_b32 v192, off, s33 offset:452
+; GISEL64-NEXT:    scratch_load_b32 v193, off, s33 offset:456
+; GISEL64-NEXT:    scratch_load_b32 v194, off, s33 offset:460
+; GISEL64-NEXT:    scratch_load_b32 v195, off, s33 offset:464
+; GISEL64-NEXT:    scratch_load_b32 v196, off, s33 offset:468
+; GISEL64-NEXT:    scratch_load_b32 v197, off, s33 offset:472
+; GISEL64-NEXT:    scratch_load_b32 v198, off, s33 offset:476
+; GISEL64-NEXT:    scratch_load_b32 v199, off, s33 offset:480
+; GISEL64-NEXT:    scratch_load_b32 v208, off, s33 offset:484
+; GISEL64-NEXT:    scratch_load_b32 v209, off, s33 offset:488
+; GISEL64-NEXT:    scratch_load_b32 v210, off, s33 offset:492
+; GISEL64-NEXT:    scratch_load_b32 v211, off, s33 offset:496
+; GISEL64-NEXT:    scratch_load_b32 v212, off, s33 offset:500
+; GISEL64-NEXT:    scratch_load_b32 v213, off, s33 offset:504
+; GISEL64-NEXT:    scratch_load_b32 v214, off, s33 offset:508
+; GISEL64-NEXT:    scratch_load_b32 v215, off, s33 offset:512
+; GISEL64-NEXT:    s_clause 0xf
+; GISEL64-NEXT:    scratch_load_b32 v224, off, s33 offset:516
+; GISEL64-NEXT:    scratch_load_b32 v225, off, s33 offset:520
+; GISEL64-NEXT:    scratch_load_b32 v226, off, s33 offset:524
+; GISEL64-NEXT:    scratch_load_b32 v227, off, s33 offset:528
+; GISEL64-NEXT:    scratch_load_b32 v228, off, s33 offset:532
+; GISEL64-NEXT:    scratch_load_b32 v229, off, s33 offset:536
+; GISEL64-NEXT:    scratch_load_b32 v230, off, s33 offset:540
+; GISEL64-NEXT:    scratch_load_b32 v231, off, s33 offset:544
+; GISEL64-NEXT:    scratch_load_b32 v240, off, s33 offset:548
+; GISEL64-NEXT:    scratch_load_b32 v241, off, s33 offset:552
+; GISEL64-NEXT:    scratch_load_b32 v242, off, s33 offset:556
+; GISEL64-NEXT:    scratch_load_b32 v243, off, s33 offset:560
+; GISEL64-NEXT:    scratch_load_b32 v244, off, s33 offset:564
+; GISEL64-NEXT:    scratch_load_b32 v245, off, s33 offset:568
+; GISEL64-NEXT:    scratch_load_b32 v246, off, s33 offset:572
+; GISEL64-NEXT:    scratch_load_b32 v247, off, s33 offset:576
+; GISEL64-NEXT:    s_mov_b64 exec, s[34:35]
+; GISEL64-NEXT:    s_mov_b32 s33, s36
+; GISEL64-NEXT:    s_wait_loadcnt 0x0
+; GISEL64-NEXT:    s_wait_alu 0xfffe
+; GISEL64-NEXT:    s_setpc_b64 s[30:31]
+  %ret = tail call <2 x half>(<2 x half>, <2 x half>) @gfx_callee(<2 x half> %y, <2 x half> %x) convergent
+  ret <2 x half> %ret
+}
+
 declare amdgpu_gfx_whole_wave float @callee(i1 %active, <8 x float> %x)
 
 define amdgpu_cs void @call_from_entry(<8 x float> %x, ptr %p) {

>From 7b68ddf3868934113521002cc0edeea39565fc8c Mon Sep 17 00:00:00 2001
From: Diana Picus <diana-magda.picus at amd.com>
Date: Fri, 27 Jun 2025 13:45:14 +0200
Subject: [PATCH 2/2] Fix CC in test

---
 .../CodeGen/AMDGPU/whole-wave-functions.ll    | 2760 +++++++----------
 1 file changed, 1177 insertions(+), 1583 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
index c478f6f3c8dfb..9ca57c6a2a246 100644
--- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
+++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
@@ -2422,417 +2422,314 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ
 ; DAGISEL-NEXT:    s_wait_samplecnt 0x0
 ; DAGISEL-NEXT:    s_wait_bvhcnt 0x0
 ; DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; DAGISEL-NEXT:    s_mov_b32 s35, s33
-; DAGISEL-NEXT:    s_mov_b32 s33, s32
-; DAGISEL-NEXT:    s_xor_saveexec_b32 s34, -1
+; DAGISEL-NEXT:    s_xor_saveexec_b32 s0, -1
 ; DAGISEL-NEXT:    s_clause 0x1f
-; DAGISEL-NEXT:    scratch_store_b32 off, v0, s33 offset:8
-; DAGISEL-NEXT:    scratch_store_b32 off, v1, s33 offset:12
-; DAGISEL-NEXT:    scratch_store_b32 off, v2, s33 offset:16
-; DAGISEL-NEXT:    scratch_store_b32 off, v3, s33 offset:20
-; DAGISEL-NEXT:    scratch_store_b32 off, v4, s33 offset:24
-; DAGISEL-NEXT:    scratch_store_b32 off, v5, s33 offset:28
-; DAGISEL-NEXT:    scratch_store_b32 off, v6, s33 offset:32
-; DAGISEL-NEXT:    scratch_store_b32 off, v7, s33 offset:36
-; DAGISEL-NEXT:    scratch_store_b32 off, v8, s33 offset:40
-; DAGISEL-NEXT:    scratch_store_b32 off, v9, s33 offset:44
-; DAGISEL-NEXT:    scratch_store_b32 off, v10, s33 offset:48
-; DAGISEL-NEXT:    scratch_store_b32 off, v11, s33 offset:52
-; DAGISEL-NEXT:    scratch_store_b32 off, v12, s33 offset:56
-; DAGISEL-NEXT:    scratch_store_b32 off, v13, s33 offset:60
-; DAGISEL-NEXT:    scratch_store_b32 off, v14, s33 offset:64
-; DAGISEL-NEXT:    scratch_store_b32 off, v15, s33 offset:68
-; DAGISEL-NEXT:    scratch_store_b32 off, v16, s33 offset:72
-; DAGISEL-NEXT:    scratch_store_b32 off, v17, s33 offset:76
-; DAGISEL-NEXT:    scratch_store_b32 off, v18, s33 offset:80
-; DAGISEL-NEXT:    scratch_store_b32 off, v19, s33 offset:84
-; DAGISEL-NEXT:    scratch_store_b32 off, v20, s33 offset:88
-; DAGISEL-NEXT:    scratch_store_b32 off, v21, s33 offset:92
-; DAGISEL-NEXT:    scratch_store_b32 off, v22, s33 offset:96
-; DAGISEL-NEXT:    scratch_store_b32 off, v23, s33 offset:100
-; DAGISEL-NEXT:    scratch_store_b32 off, v24, s33 offset:104
-; DAGISEL-NEXT:    scratch_store_b32 off, v25, s33 offset:108
-; DAGISEL-NEXT:    scratch_store_b32 off, v26, s33 offset:112
-; DAGISEL-NEXT:    scratch_store_b32 off, v27, s33 offset:116
-; DAGISEL-NEXT:    scratch_store_b32 off, v28, s33 offset:120
-; DAGISEL-NEXT:    scratch_store_b32 off, v29, s33 offset:124
-; DAGISEL-NEXT:    scratch_store_b32 off, v30, s33 offset:128
-; DAGISEL-NEXT:    scratch_store_b32 off, v31, s33 offset:132
+; DAGISEL-NEXT:    scratch_store_b32 off, v0, s32
+; DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4
+; DAGISEL-NEXT:    scratch_store_b32 off, v2, s32 offset:8
+; DAGISEL-NEXT:    scratch_store_b32 off, v3, s32 offset:12
+; DAGISEL-NEXT:    scratch_store_b32 off, v4, s32 offset:16
+; DAGISEL-NEXT:    scratch_store_b32 off, v5, s32 offset:20
+; DAGISEL-NEXT:    scratch_store_b32 off, v6, s32 offset:24
+; DAGISEL-NEXT:    scratch_store_b32 off, v7, s32 offset:28
+; DAGISEL-NEXT:    scratch_store_b32 off, v8, s32 offset:32
+; DAGISEL-NEXT:    scratch_store_b32 off, v9, s32 offset:36
+; DAGISEL-NEXT:    scratch_store_b32 off, v10, s32 offset:40
+; DAGISEL-NEXT:    scratch_store_b32 off, v11, s32 offset:44
+; DAGISEL-NEXT:    scratch_store_b32 off, v12, s32 offset:48
+; DAGISEL-NEXT:    scratch_store_b32 off, v13, s32 offset:52
+; DAGISEL-NEXT:    scratch_store_b32 off, v14, s32 offset:56
+; DAGISEL-NEXT:    scratch_store_b32 off, v15, s32 offset:60
+; DAGISEL-NEXT:    scratch_store_b32 off, v16, s32 offset:64
+; DAGISEL-NEXT:    scratch_store_b32 off, v17, s32 offset:68
+; DAGISEL-NEXT:    scratch_store_b32 off, v18, s32 offset:72
+; DAGISEL-NEXT:    scratch_store_b32 off, v19, s32 offset:76
+; DAGISEL-NEXT:    scratch_store_b32 off, v20, s32 offset:80
+; DAGISEL-NEXT:    scratch_store_b32 off, v21, s32 offset:84
+; DAGISEL-NEXT:    scratch_store_b32 off, v22, s32 offset:88
+; DAGISEL-NEXT:    scratch_store_b32 off, v23, s32 offset:92
+; DAGISEL-NEXT:    scratch_store_b32 off, v24, s32 offset:96
+; DAGISEL-NEXT:    scratch_store_b32 off, v25, s32 offset:100
+; DAGISEL-NEXT:    scratch_store_b32 off, v26, s32 offset:104
+; DAGISEL-NEXT:    scratch_store_b32 off, v27, s32 offset:108
+; DAGISEL-NEXT:    scratch_store_b32 off, v28, s32 offset:112
+; DAGISEL-NEXT:    scratch_store_b32 off, v29, s32 offset:116
+; DAGISEL-NEXT:    scratch_store_b32 off, v30, s32 offset:120
+; DAGISEL-NEXT:    scratch_store_b32 off, v31, s32 offset:124
 ; DAGISEL-NEXT:    s_clause 0x1f
-; DAGISEL-NEXT:    scratch_store_b32 off, v32, s33 offset:136
-; DAGISEL-NEXT:    scratch_store_b32 off, v33, s33 offset:140
-; DAGISEL-NEXT:    scratch_store_b32 off, v34, s33 offset:144
-; DAGISEL-NEXT:    scratch_store_b32 off, v35, s33 offset:148
-; DAGISEL-NEXT:    scratch_store_b32 off, v36, s33 offset:152
-; DAGISEL-NEXT:    scratch_store_b32 off, v37, s33 offset:156
-; DAGISEL-NEXT:    scratch_store_b32 off, v38, s33 offset:160
-; DAGISEL-NEXT:    scratch_store_b32 off, v39, s33 offset:164
-; DAGISEL-NEXT:    scratch_store_b32 off, v48, s33 offset:168
-; DAGISEL-NEXT:    scratch_store_b32 off, v49, s33 offset:172
-; DAGISEL-NEXT:    scratch_store_b32 off, v50, s33 offset:176
-; DAGISEL-NEXT:    scratch_store_b32 off, v51, s33 offset:180
-; DAGISEL-NEXT:    scratch_store_b32 off, v52, s33 offset:184
-; DAGISEL-NEXT:    scratch_store_b32 off, v53, s33 offset:188
-; DAGISEL-NEXT:    scratch_store_b32 off, v54, s33 offset:192
-; DAGISEL-NEXT:    scratch_store_b32 off, v55, s33 offset:196
-; DAGISEL-NEXT:    scratch_store_b32 off, v64, s33 offset:200
-; DAGISEL-NEXT:    scratch_store_b32 off, v65, s33 offset:204
-; DAGISEL-NEXT:    scratch_store_b32 off, v66, s33 offset:208
-; DAGISEL-NEXT:    scratch_store_b32 off, v67, s33 offset:212
-; DAGISEL-NEXT:    scratch_store_b32 off, v68, s33 offset:216
-; DAGISEL-NEXT:    scratch_store_b32 off, v69, s33 offset:220
-; DAGISEL-NEXT:    scratch_store_b32 off, v70, s33 offset:224
-; DAGISEL-NEXT:    scratch_store_b32 off, v71, s33 offset:228
-; DAGISEL-NEXT:    scratch_store_b32 off, v80, s33 offset:232
-; DAGISEL-NEXT:    scratch_store_b32 off, v81, s33 offset:236
-; DAGISEL-NEXT:    scratch_store_b32 off, v82, s33 offset:240
-; DAGISEL-NEXT:    scratch_store_b32 off, v83, s33 offset:244
-; DAGISEL-NEXT:    scratch_store_b32 off, v84, s33 offset:248
-; DAGISEL-NEXT:    scratch_store_b32 off, v85, s33 offset:252
-; DAGISEL-NEXT:    scratch_store_b32 off, v86, s33 offset:256
-; DAGISEL-NEXT:    scratch_store_b32 off, v87, s33 offset:260
+; DAGISEL-NEXT:    scratch_store_b32 off, v32, s32 offset:128
+; DAGISEL-NEXT:    scratch_store_b32 off, v33, s32 offset:132
+; DAGISEL-NEXT:    scratch_store_b32 off, v34, s32 offset:136
+; DAGISEL-NEXT:    scratch_store_b32 off, v35, s32 offset:140
+; DAGISEL-NEXT:    scratch_store_b32 off, v36, s32 offset:144
+; DAGISEL-NEXT:    scratch_store_b32 off, v37, s32 offset:148
+; DAGISEL-NEXT:    scratch_store_b32 off, v38, s32 offset:152
+; DAGISEL-NEXT:    scratch_store_b32 off, v39, s32 offset:156
+; DAGISEL-NEXT:    scratch_store_b32 off, v48, s32 offset:160
+; DAGISEL-NEXT:    scratch_store_b32 off, v49, s32 offset:164
+; DAGISEL-NEXT:    scratch_store_b32 off, v50, s32 offset:168
+; DAGISEL-NEXT:    scratch_store_b32 off, v51, s32 offset:172
+; DAGISEL-NEXT:    scratch_store_b32 off, v52, s32 offset:176
+; DAGISEL-NEXT:    scratch_store_b32 off, v53, s32 offset:180
+; DAGISEL-NEXT:    scratch_store_b32 off, v54, s32 offset:184
+; DAGISEL-NEXT:    scratch_store_b32 off, v55, s32 offset:188
+; DAGISEL-NEXT:    scratch_store_b32 off, v64, s32 offset:192
+; DAGISEL-NEXT:    scratch_store_b32 off, v65, s32 offset:196
+; DAGISEL-NEXT:    scratch_store_b32 off, v66, s32 offset:200
+; DAGISEL-NEXT:    scratch_store_b32 off, v67, s32 offset:204
+; DAGISEL-NEXT:    scratch_store_b32 off, v68, s32 offset:208
+; DAGISEL-NEXT:    scratch_store_b32 off, v69, s32 offset:212
+; DAGISEL-NEXT:    scratch_store_b32 off, v70, s32 offset:216
+; DAGISEL-NEXT:    scratch_store_b32 off, v71, s32 offset:220
+; DAGISEL-NEXT:    scratch_store_b32 off, v80, s32 offset:224
+; DAGISEL-NEXT:    scratch_store_b32 off, v81, s32 offset:228
+; DAGISEL-NEXT:    scratch_store_b32 off, v82, s32 offset:232
+; DAGISEL-NEXT:    scratch_store_b32 off, v83, s32 offset:236
+; DAGISEL-NEXT:    scratch_store_b32 off, v84, s32 offset:240
+; DAGISEL-NEXT:    scratch_store_b32 off, v85, s32 offset:244
+; DAGISEL-NEXT:    scratch_store_b32 off, v86, s32 offset:248
+; DAGISEL-NEXT:    scratch_store_b32 off, v87, s32 offset:252
 ; DAGISEL-NEXT:    s_clause 0x1f
-; DAGISEL-NEXT:    scratch_store_b32 off, v96, s33 offset:264
-; DAGISEL-NEXT:    scratch_store_b32 off, v97, s33 offset:268
-; DAGISEL-NEXT:    scratch_store_b32 off, v98, s33 offset:272
-; DAGISEL-NEXT:    scratch_store_b32 off, v99, s33 offset:276
-; DAGISEL-NEXT:    scratch_store_b32 off, v100, s33 offset:280
-; DAGISEL-NEXT:    scratch_store_b32 off, v101, s33 offset:284
-; DAGISEL-NEXT:    scratch_store_b32 off, v102, s33 offset:288
-; DAGISEL-NEXT:    scratch_store_b32 off, v103, s33 offset:292
-; DAGISEL-NEXT:    scratch_store_b32 off, v112, s33 offset:296
-; DAGISEL-NEXT:    scratch_store_b32 off, v113, s33 offset:300
-; DAGISEL-NEXT:    scratch_store_b32 off, v114, s33 offset:304
-; DAGISEL-NEXT:    scratch_store_b32 off, v115, s33 offset:308
-; DAGISEL-NEXT:    scratch_store_b32 off, v116, s33 offset:312
-; DAGISEL-NEXT:    scratch_store_b32 off, v117, s33 offset:316
-; DAGISEL-NEXT:    scratch_store_b32 off, v118, s33 offset:320
-; DAGISEL-NEXT:    scratch_store_b32 off, v119, s33 offset:324
-; DAGISEL-NEXT:    scratch_store_b32 off, v128, s33 offset:328
-; DAGISEL-NEXT:    scratch_store_b32 off, v129, s33 offset:332
-; DAGISEL-NEXT:    scratch_store_b32 off, v130, s33 offset:336
-; DAGISEL-NEXT:    scratch_store_b32 off, v131, s33 offset:340
-; DAGISEL-NEXT:    scratch_store_b32 off, v132, s33 offset:344
-; DAGISEL-NEXT:    scratch_store_b32 off, v133, s33 offset:348
-; DAGISEL-NEXT:    scratch_store_b32 off, v134, s33 offset:352
-; DAGISEL-NEXT:    scratch_store_b32 off, v135, s33 offset:356
-; DAGISEL-NEXT:    scratch_store_b32 off, v144, s33 offset:360
-; DAGISEL-NEXT:    scratch_store_b32 off, v145, s33 offset:364
-; DAGISEL-NEXT:    scratch_store_b32 off, v146, s33 offset:368
-; DAGISEL-NEXT:    scratch_store_b32 off, v147, s33 offset:372
-; DAGISEL-NEXT:    scratch_store_b32 off, v148, s33 offset:376
-; DAGISEL-NEXT:    scratch_store_b32 off, v149, s33 offset:380
-; DAGISEL-NEXT:    scratch_store_b32 off, v150, s33 offset:384
-; DAGISEL-NEXT:    scratch_store_b32 off, v151, s33 offset:388
+; DAGISEL-NEXT:    scratch_store_b32 off, v96, s32 offset:256
+; DAGISEL-NEXT:    scratch_store_b32 off, v97, s32 offset:260
+; DAGISEL-NEXT:    scratch_store_b32 off, v98, s32 offset:264
+; DAGISEL-NEXT:    scratch_store_b32 off, v99, s32 offset:268
+; DAGISEL-NEXT:    scratch_store_b32 off, v100, s32 offset:272
+; DAGISEL-NEXT:    scratch_store_b32 off, v101, s32 offset:276
+; DAGISEL-NEXT:    scratch_store_b32 off, v102, s32 offset:280
+; DAGISEL-NEXT:    scratch_store_b32 off, v103, s32 offset:284
+; DAGISEL-NEXT:    scratch_store_b32 off, v112, s32 offset:288
+; DAGISEL-NEXT:    scratch_store_b32 off, v113, s32 offset:292
+; DAGISEL-NEXT:    scratch_store_b32 off, v114, s32 offset:296
+; DAGISEL-NEXT:    scratch_store_b32 off, v115, s32 offset:300
+; DAGISEL-NEXT:    scratch_store_b32 off, v116, s32 offset:304
+; DAGISEL-NEXT:    scratch_store_b32 off, v117, s32 offset:308
+; DAGISEL-NEXT:    scratch_store_b32 off, v118, s32 offset:312
+; DAGISEL-NEXT:    scratch_store_b32 off, v119, s32 offset:316
+; DAGISEL-NEXT:    scratch_store_b32 off, v128, s32 offset:320
+; DAGISEL-NEXT:    scratch_store_b32 off, v129, s32 offset:324
+; DAGISEL-NEXT:    scratch_store_b32 off, v130, s32 offset:328
+; DAGISEL-NEXT:    scratch_store_b32 off, v131, s32 offset:332
+; DAGISEL-NEXT:    scratch_store_b32 off, v132, s32 offset:336
+; DAGISEL-NEXT:    scratch_store_b32 off, v133, s32 offset:340
+; DAGISEL-NEXT:    scratch_store_b32 off, v134, s32 offset:344
+; DAGISEL-NEXT:    scratch_store_b32 off, v135, s32 offset:348
+; DAGISEL-NEXT:    scratch_store_b32 off, v144, s32 offset:352
+; DAGISEL-NEXT:    scratch_store_b32 off, v145, s32 offset:356
+; DAGISEL-NEXT:    scratch_store_b32 off, v146, s32 offset:360
+; DAGISEL-NEXT:    scratch_store_b32 off, v147, s32 offset:364
+; DAGISEL-NEXT:    scratch_store_b32 off, v148, s32 offset:368
+; DAGISEL-NEXT:    scratch_store_b32 off, v149, s32 offset:372
+; DAGISEL-NEXT:    scratch_store_b32 off, v150, s32 offset:376
+; DAGISEL-NEXT:    scratch_store_b32 off, v151, s32 offset:380
 ; DAGISEL-NEXT:    s_clause 0x1f
-; DAGISEL-NEXT:    scratch_store_b32 off, v160, s33 offset:392
-; DAGISEL-NEXT:    scratch_store_b32 off, v161, s33 offset:396
-; DAGISEL-NEXT:    scratch_store_b32 off, v162, s33 offset:400
-; DAGISEL-NEXT:    scratch_store_b32 off, v163, s33 offset:404
-; DAGISEL-NEXT:    scratch_store_b32 off, v164, s33 offset:408
-; DAGISEL-NEXT:    scratch_store_b32 off, v165, s33 offset:412
-; DAGISEL-NEXT:    scratch_store_b32 off, v166, s33 offset:416
-; DAGISEL-NEXT:    scratch_store_b32 off, v167, s33 offset:420
-; DAGISEL-NEXT:    scratch_store_b32 off, v176, s33 offset:424
-; DAGISEL-NEXT:    scratch_store_b32 off, v177, s33 offset:428
-; DAGISEL-NEXT:    scratch_store_b32 off, v178, s33 offset:432
-; DAGISEL-NEXT:    scratch_store_b32 off, v179, s33 offset:436
-; DAGISEL-NEXT:    scratch_store_b32 off, v180, s33 offset:440
-; DAGISEL-NEXT:    scratch_store_b32 off, v181, s33 offset:444
-; DAGISEL-NEXT:    scratch_store_b32 off, v182, s33 offset:448
-; DAGISEL-NEXT:    scratch_store_b32 off, v183, s33 offset:452
-; DAGISEL-NEXT:    scratch_store_b32 off, v192, s33 offset:456
-; DAGISEL-NEXT:    scratch_store_b32 off, v193, s33 offset:460
-; DAGISEL-NEXT:    scratch_store_b32 off, v194, s33 offset:464
-; DAGISEL-NEXT:    scratch_store_b32 off, v195, s33 offset:468
-; DAGISEL-NEXT:    scratch_store_b32 off, v196, s33 offset:472
-; DAGISEL-NEXT:    scratch_store_b32 off, v197, s33 offset:476
-; DAGISEL-NEXT:    scratch_store_b32 off, v198, s33 offset:480
-; DAGISEL-NEXT:    scratch_store_b32 off, v199, s33 offset:484
-; DAGISEL-NEXT:    scratch_store_b32 off, v208, s33 offset:488
-; DAGISEL-NEXT:    scratch_store_b32 off, v209, s33 offset:492
-; DAGISEL-NEXT:    scratch_store_b32 off, v210, s33 offset:496
-; DAGISEL-NEXT:    scratch_store_b32 off, v211, s33 offset:500
-; DAGISEL-NEXT:    scratch_store_b32 off, v212, s33 offset:504
-; DAGISEL-NEXT:    scratch_store_b32 off, v213, s33 offset:508
-; DAGISEL-NEXT:    scratch_store_b32 off, v214, s33 offset:512
-; DAGISEL-NEXT:    scratch_store_b32 off, v215, s33 offset:516
+; DAGISEL-NEXT:    scratch_store_b32 off, v160, s32 offset:384
+; DAGISEL-NEXT:    scratch_store_b32 off, v161, s32 offset:388
+; DAGISEL-NEXT:    scratch_store_b32 off, v162, s32 offset:392
+; DAGISEL-NEXT:    scratch_store_b32 off, v163, s32 offset:396
+; DAGISEL-NEXT:    scratch_store_b32 off, v164, s32 offset:400
+; DAGISEL-NEXT:    scratch_store_b32 off, v165, s32 offset:404
+; DAGISEL-NEXT:    scratch_store_b32 off, v166, s32 offset:408
+; DAGISEL-NEXT:    scratch_store_b32 off, v167, s32 offset:412
+; DAGISEL-NEXT:    scratch_store_b32 off, v176, s32 offset:416
+; DAGISEL-NEXT:    scratch_store_b32 off, v177, s32 offset:420
+; DAGISEL-NEXT:    scratch_store_b32 off, v178, s32 offset:424
+; DAGISEL-NEXT:    scratch_store_b32 off, v179, s32 offset:428
+; DAGISEL-NEXT:    scratch_store_b32 off, v180, s32 offset:432
+; DAGISEL-NEXT:    scratch_store_b32 off, v181, s32 offset:436
+; DAGISEL-NEXT:    scratch_store_b32 off, v182, s32 offset:440
+; DAGISEL-NEXT:    scratch_store_b32 off, v183, s32 offset:444
+; DAGISEL-NEXT:    scratch_store_b32 off, v192, s32 offset:448
+; DAGISEL-NEXT:    scratch_store_b32 off, v193, s32 offset:452
+; DAGISEL-NEXT:    scratch_store_b32 off, v194, s32 offset:456
+; DAGISEL-NEXT:    scratch_store_b32 off, v195, s32 offset:460
+; DAGISEL-NEXT:    scratch_store_b32 off, v196, s32 offset:464
+; DAGISEL-NEXT:    scratch_store_b32 off, v197, s32 offset:468
+; DAGISEL-NEXT:    scratch_store_b32 off, v198, s32 offset:472
+; DAGISEL-NEXT:    scratch_store_b32 off, v199, s32 offset:476
+; DAGISEL-NEXT:    scratch_store_b32 off, v208, s32 offset:480
+; DAGISEL-NEXT:    scratch_store_b32 off, v209, s32 offset:484
+; DAGISEL-NEXT:    scratch_store_b32 off, v210, s32 offset:488
+; DAGISEL-NEXT:    scratch_store_b32 off, v211, s32 offset:492
+; DAGISEL-NEXT:    scratch_store_b32 off, v212, s32 offset:496
+; DAGISEL-NEXT:    scratch_store_b32 off, v213, s32 offset:500
+; DAGISEL-NEXT:    scratch_store_b32 off, v214, s32 offset:504
+; DAGISEL-NEXT:    scratch_store_b32 off, v215, s32 offset:508
 ; DAGISEL-NEXT:    s_clause 0xf
-; DAGISEL-NEXT:    scratch_store_b32 off, v224, s33 offset:520
-; DAGISEL-NEXT:    scratch_store_b32 off, v225, s33 offset:524
-; DAGISEL-NEXT:    scratch_store_b32 off, v226, s33 offset:528
-; DAGISEL-NEXT:    scratch_store_b32 off, v227, s33 offset:532
-; DAGISEL-NEXT:    scratch_store_b32 off, v228, s33 offset:536
-; DAGISEL-NEXT:    scratch_store_b32 off, v229, s33 offset:540
-; DAGISEL-NEXT:    scratch_store_b32 off, v230, s33 offset:544
-; DAGISEL-NEXT:    scratch_store_b32 off, v231, s33 offset:548
-; DAGISEL-NEXT:    scratch_store_b32 off, v240, s33 offset:552
-; DAGISEL-NEXT:    scratch_store_b32 off, v241, s33 offset:556
-; DAGISEL-NEXT:    scratch_store_b32 off, v242, s33 offset:560
-; DAGISEL-NEXT:    scratch_store_b32 off, v243, s33 offset:564
-; DAGISEL-NEXT:    scratch_store_b32 off, v244, s33 offset:568
-; DAGISEL-NEXT:    scratch_store_b32 off, v245, s33 offset:572
-; DAGISEL-NEXT:    scratch_store_b32 off, v246, s33 offset:576
-; DAGISEL-NEXT:    scratch_store_b32 off, v247, s33 offset:580
+; DAGISEL-NEXT:    scratch_store_b32 off, v224, s32 offset:512
+; DAGISEL-NEXT:    scratch_store_b32 off, v225, s32 offset:516
+; DAGISEL-NEXT:    scratch_store_b32 off, v226, s32 offset:520
+; DAGISEL-NEXT:    scratch_store_b32 off, v227, s32 offset:524
+; DAGISEL-NEXT:    scratch_store_b32 off, v228, s32 offset:528
+; DAGISEL-NEXT:    scratch_store_b32 off, v229, s32 offset:532
+; DAGISEL-NEXT:    scratch_store_b32 off, v230, s32 offset:536
+; DAGISEL-NEXT:    scratch_store_b32 off, v231, s32 offset:540
+; DAGISEL-NEXT:    scratch_store_b32 off, v240, s32 offset:544
+; DAGISEL-NEXT:    scratch_store_b32 off, v241, s32 offset:548
+; DAGISEL-NEXT:    scratch_store_b32 off, v242, s32 offset:552
+; DAGISEL-NEXT:    scratch_store_b32 off, v243, s32 offset:556
+; DAGISEL-NEXT:    scratch_store_b32 off, v244, s32 offset:560
+; DAGISEL-NEXT:    scratch_store_b32 off, v245, s32 offset:564
+; DAGISEL-NEXT:    scratch_store_b32 off, v246, s32 offset:568
+; DAGISEL-NEXT:    scratch_store_b32 off, v247, s32 offset:572
 ; DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
-; DAGISEL-NEXT:    s_clause 0x1
-; DAGISEL-NEXT:    scratch_store_b32 off, v40, s33
-; DAGISEL-NEXT:    scratch_store_b32 off, v41, s33 offset:4
-; DAGISEL-NEXT:    v_writelane_b32 v40, s4, 0
-; DAGISEL-NEXT:    v_writelane_b32 v41, s76, 0
 ; DAGISEL-NEXT:    v_mov_b32_e32 v2, v0
+; DAGISEL-NEXT:    s_mov_b32 s37, gfx_callee at abs32@hi
+; DAGISEL-NEXT:    s_mov_b32 s36, gfx_callee at abs32@lo
 ; DAGISEL-NEXT:    v_swap_b32 v0, v1
-; DAGISEL-NEXT:    v_writelane_b32 v40, s5, 1
-; DAGISEL-NEXT:    v_writelane_b32 v41, s77, 1
-; DAGISEL-NEXT:    s_mov_b32 s1, gfx_callee at abs32@hi
-; DAGISEL-NEXT:    s_mov_b32 s0, gfx_callee at abs32@lo
-; DAGISEL-NEXT:    s_addk_co_i32 s32, 0x250
-; DAGISEL-NEXT:    v_writelane_b32 v40, s6, 2
-; DAGISEL-NEXT:    v_writelane_b32 v41, s78, 2
-; DAGISEL-NEXT:    v_writelane_b32 v40, s7, 3
-; DAGISEL-NEXT:    v_writelane_b32 v41, s79, 3
-; DAGISEL-NEXT:    v_writelane_b32 v40, s8, 4
-; DAGISEL-NEXT:    v_writelane_b32 v41, s88, 4
-; DAGISEL-NEXT:    v_writelane_b32 v40, s9, 5
-; DAGISEL-NEXT:    v_writelane_b32 v41, s89, 5
-; DAGISEL-NEXT:    s_mov_b64 s[8:9], 0
-; DAGISEL-NEXT:    v_writelane_b32 v40, s10, 6
-; DAGISEL-NEXT:    v_writelane_b32 v41, s90, 6
-; DAGISEL-NEXT:    v_writelane_b32 v40, s11, 7
-; DAGISEL-NEXT:    v_writelane_b32 v41, s91, 7
-; DAGISEL-NEXT:    v_writelane_b32 v40, s12, 8
-; DAGISEL-NEXT:    v_writelane_b32 v41, s92, 8
-; DAGISEL-NEXT:    v_writelane_b32 v40, s13, 9
-; DAGISEL-NEXT:    v_writelane_b32 v41, s93, 9
-; DAGISEL-NEXT:    v_writelane_b32 v40, s14, 10
-; DAGISEL-NEXT:    v_writelane_b32 v41, s94, 10
-; DAGISEL-NEXT:    v_writelane_b32 v40, s15, 11
-; DAGISEL-NEXT:    v_writelane_b32 v41, s95, 11
-; DAGISEL-NEXT:    v_writelane_b32 v40, s16, 12
-; DAGISEL-NEXT:    v_writelane_b32 v40, s17, 13
-; DAGISEL-NEXT:    v_writelane_b32 v40, s18, 14
-; DAGISEL-NEXT:    v_writelane_b32 v40, s19, 15
-; DAGISEL-NEXT:    v_writelane_b32 v40, s20, 16
-; DAGISEL-NEXT:    v_writelane_b32 v40, s21, 17
-; DAGISEL-NEXT:    v_writelane_b32 v40, s22, 18
-; DAGISEL-NEXT:    v_writelane_b32 v40, s23, 19
-; DAGISEL-NEXT:    v_writelane_b32 v40, s24, 20
-; DAGISEL-NEXT:    v_writelane_b32 v40, s25, 21
-; DAGISEL-NEXT:    v_writelane_b32 v40, s26, 22
-; DAGISEL-NEXT:    v_writelane_b32 v40, s27, 23
-; DAGISEL-NEXT:    v_writelane_b32 v40, s28, 24
-; DAGISEL-NEXT:    v_writelane_b32 v40, s29, 25
-; DAGISEL-NEXT:    v_writelane_b32 v40, s30, 26
-; DAGISEL-NEXT:    v_writelane_b32 v40, s31, 27
-; DAGISEL-NEXT:    v_writelane_b32 v40, s72, 28
-; DAGISEL-NEXT:    v_writelane_b32 v40, s73, 29
-; DAGISEL-NEXT:    v_writelane_b32 v40, s74, 30
-; DAGISEL-NEXT:    v_writelane_b32 v40, s75, 31
 ; DAGISEL-NEXT:    s_wait_alu 0xfffe
-; DAGISEL-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-; DAGISEL-NEXT:    v_readlane_b32 s95, v41, 11
-; DAGISEL-NEXT:    v_readlane_b32 s94, v41, 10
-; DAGISEL-NEXT:    v_readlane_b32 s93, v41, 9
-; DAGISEL-NEXT:    v_readlane_b32 s92, v41, 8
-; DAGISEL-NEXT:    v_readlane_b32 s91, v41, 7
-; DAGISEL-NEXT:    v_readlane_b32 s90, v41, 6
-; DAGISEL-NEXT:    v_readlane_b32 s89, v41, 5
-; DAGISEL-NEXT:    v_readlane_b32 s88, v41, 4
-; DAGISEL-NEXT:    v_readlane_b32 s79, v41, 3
-; DAGISEL-NEXT:    v_readlane_b32 s78, v41, 2
-; DAGISEL-NEXT:    v_readlane_b32 s77, v41, 1
-; DAGISEL-NEXT:    v_readlane_b32 s76, v41, 0
-; DAGISEL-NEXT:    v_readlane_b32 s75, v40, 31
-; DAGISEL-NEXT:    v_readlane_b32 s74, v40, 30
-; DAGISEL-NEXT:    v_readlane_b32 s73, v40, 29
-; DAGISEL-NEXT:    v_readlane_b32 s72, v40, 28
-; DAGISEL-NEXT:    v_readlane_b32 s31, v40, 27
-; DAGISEL-NEXT:    v_readlane_b32 s30, v40, 26
-; DAGISEL-NEXT:    v_readlane_b32 s29, v40, 25
-; DAGISEL-NEXT:    v_readlane_b32 s28, v40, 24
-; DAGISEL-NEXT:    v_readlane_b32 s27, v40, 23
-; DAGISEL-NEXT:    v_readlane_b32 s26, v40, 22
-; DAGISEL-NEXT:    v_readlane_b32 s25, v40, 21
-; DAGISEL-NEXT:    v_readlane_b32 s24, v40, 20
-; DAGISEL-NEXT:    v_readlane_b32 s23, v40, 19
-; DAGISEL-NEXT:    v_readlane_b32 s22, v40, 18
-; DAGISEL-NEXT:    v_readlane_b32 s21, v40, 17
-; DAGISEL-NEXT:    v_readlane_b32 s20, v40, 16
-; DAGISEL-NEXT:    v_readlane_b32 s19, v40, 15
-; DAGISEL-NEXT:    v_readlane_b32 s18, v40, 14
-; DAGISEL-NEXT:    v_readlane_b32 s17, v40, 13
-; DAGISEL-NEXT:    v_readlane_b32 s16, v40, 12
-; DAGISEL-NEXT:    v_readlane_b32 s15, v40, 11
-; DAGISEL-NEXT:    v_readlane_b32 s14, v40, 10
-; DAGISEL-NEXT:    v_readlane_b32 s13, v40, 9
-; DAGISEL-NEXT:    v_readlane_b32 s12, v40, 8
-; DAGISEL-NEXT:    v_readlane_b32 s11, v40, 7
-; DAGISEL-NEXT:    v_readlane_b32 s10, v40, 6
-; DAGISEL-NEXT:    v_readlane_b32 s9, v40, 5
-; DAGISEL-NEXT:    v_readlane_b32 s8, v40, 4
-; DAGISEL-NEXT:    v_readlane_b32 s7, v40, 3
-; DAGISEL-NEXT:    v_readlane_b32 s6, v40, 2
-; DAGISEL-NEXT:    v_readlane_b32 s5, v40, 1
-; DAGISEL-NEXT:    v_readlane_b32 s4, v40, 0
-; DAGISEL-NEXT:    s_clause 0x1
-; DAGISEL-NEXT:    scratch_load_b32 v40, off, s33
-; DAGISEL-NEXT:    scratch_load_b32 v41, off, s33 offset:4
-; DAGISEL-NEXT:    s_mov_b32 s32, s33
-; DAGISEL-NEXT:    s_xor_b32 exec_lo, s34, -1
+; DAGISEL-NEXT:    s_xor_b32 exec_lo, s0, -1
 ; DAGISEL-NEXT:    s_clause 0x1f
-; DAGISEL-NEXT:    scratch_load_b32 v0, off, s33 offset:8
-; DAGISEL-NEXT:    scratch_load_b32 v1, off, s33 offset:12
-; DAGISEL-NEXT:    scratch_load_b32 v2, off, s33 offset:16
-; DAGISEL-NEXT:    scratch_load_b32 v3, off, s33 offset:20
-; DAGISEL-NEXT:    scratch_load_b32 v4, off, s33 offset:24
-; DAGISEL-NEXT:    scratch_load_b32 v5, off, s33 offset:28
-; DAGISEL-NEXT:    scratch_load_b32 v6, off, s33 offset:32
-; DAGISEL-NEXT:    scratch_load_b32 v7, off, s33 offset:36
-; DAGISEL-NEXT:    scratch_load_b32 v8, off, s33 offset:40
-; DAGISEL-NEXT:    scratch_load_b32 v9, off, s33 offset:44
-; DAGISEL-NEXT:    scratch_load_b32 v10, off, s33 offset:48
-; DAGISEL-NEXT:    scratch_load_b32 v11, off, s33 offset:52
-; DAGISEL-NEXT:    scratch_load_b32 v12, off, s33 offset:56
-; DAGISEL-NEXT:    scratch_load_b32 v13, off, s33 offset:60
-; DAGISEL-NEXT:    scratch_load_b32 v14, off, s33 offset:64
-; DAGISEL-NEXT:    scratch_load_b32 v15, off, s33 offset:68
-; DAGISEL-NEXT:    scratch_load_b32 v16, off, s33 offset:72
-; DAGISEL-NEXT:    scratch_load_b32 v17, off, s33 offset:76
-; DAGISEL-NEXT:    scratch_load_b32 v18, off, s33 offset:80
-; DAGISEL-NEXT:    scratch_load_b32 v19, off, s33 offset:84
-; DAGISEL-NEXT:    scratch_load_b32 v20, off, s33 offset:88
-; DAGISEL-NEXT:    scratch_load_b32 v21, off, s33 offset:92
-; DAGISEL-NEXT:    scratch_load_b32 v22, off, s33 offset:96
-; DAGISEL-NEXT:    scratch_load_b32 v23, off, s33 offset:100
-; DAGISEL-NEXT:    scratch_load_b32 v24, off, s33 offset:104
-; DAGISEL-NEXT:    scratch_load_b32 v25, off, s33 offset:108
-; DAGISEL-NEXT:    scratch_load_b32 v26, off, s33 offset:112
-; DAGISEL-NEXT:    scratch_load_b32 v27, off, s33 offset:116
-; DAGISEL-NEXT:    scratch_load_b32 v28, off, s33 offset:120
-; DAGISEL-NEXT:    scratch_load_b32 v29, off, s33 offset:124
-; DAGISEL-NEXT:    scratch_load_b32 v30, off, s33 offset:128
-; DAGISEL-NEXT:    scratch_load_b32 v31, off, s33 offset:132
+; DAGISEL-NEXT:    scratch_load_b32 v0, off, s32
+; DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4
+; DAGISEL-NEXT:    scratch_load_b32 v2, off, s32 offset:8
+; DAGISEL-NEXT:    scratch_load_b32 v3, off, s32 offset:12
+; DAGISEL-NEXT:    scratch_load_b32 v4, off, s32 offset:16
+; DAGISEL-NEXT:    scratch_load_b32 v5, off, s32 offset:20
+; DAGISEL-NEXT:    scratch_load_b32 v6, off, s32 offset:24
+; DAGISEL-NEXT:    scratch_load_b32 v7, off, s32 offset:28
+; DAGISEL-NEXT:    scratch_load_b32 v8, off, s32 offset:32
+; DAGISEL-NEXT:    scratch_load_b32 v9, off, s32 offset:36
+; DAGISEL-NEXT:    scratch_load_b32 v10, off, s32 offset:40
+; DAGISEL-NEXT:    scratch_load_b32 v11, off, s32 offset:44
+; DAGISEL-NEXT:    scratch_load_b32 v12, off, s32 offset:48
+; DAGISEL-NEXT:    scratch_load_b32 v13, off, s32 offset:52
+; DAGISEL-NEXT:    scratch_load_b32 v14, off, s32 offset:56
+; DAGISEL-NEXT:    scratch_load_b32 v15, off, s32 offset:60
+; DAGISEL-NEXT:    scratch_load_b32 v16, off, s32 offset:64
+; DAGISEL-NEXT:    scratch_load_b32 v17, off, s32 offset:68
+; DAGISEL-NEXT:    scratch_load_b32 v18, off, s32 offset:72
+; DAGISEL-NEXT:    scratch_load_b32 v19, off, s32 offset:76
+; DAGISEL-NEXT:    scratch_load_b32 v20, off, s32 offset:80
+; DAGISEL-NEXT:    scratch_load_b32 v21, off, s32 offset:84
+; DAGISEL-NEXT:    scratch_load_b32 v22, off, s32 offset:88
+; DAGISEL-NEXT:    scratch_load_b32 v23, off, s32 offset:92
+; DAGISEL-NEXT:    scratch_load_b32 v24, off, s32 offset:96
+; DAGISEL-NEXT:    scratch_load_b32 v25, off, s32 offset:100
+; DAGISEL-NEXT:    scratch_load_b32 v26, off, s32 offset:104
+; DAGISEL-NEXT:    scratch_load_b32 v27, off, s32 offset:108
+; DAGISEL-NEXT:    scratch_load_b32 v28, off, s32 offset:112
+; DAGISEL-NEXT:    scratch_load_b32 v29, off, s32 offset:116
+; DAGISEL-NEXT:    scratch_load_b32 v30, off, s32 offset:120
+; DAGISEL-NEXT:    scratch_load_b32 v31, off, s32 offset:124
 ; DAGISEL-NEXT:    s_clause 0x1f
-; DAGISEL-NEXT:    scratch_load_b32 v32, off, s33 offset:136
-; DAGISEL-NEXT:    scratch_load_b32 v33, off, s33 offset:140
-; DAGISEL-NEXT:    scratch_load_b32 v34, off, s33 offset:144
-; DAGISEL-NEXT:    scratch_load_b32 v35, off, s33 offset:148
-; DAGISEL-NEXT:    scratch_load_b32 v36, off, s33 offset:152
-; DAGISEL-NEXT:    scratch_load_b32 v37, off, s33 offset:156
-; DAGISEL-NEXT:    scratch_load_b32 v38, off, s33 offset:160
-; DAGISEL-NEXT:    scratch_load_b32 v39, off, s33 offset:164
-; DAGISEL-NEXT:    scratch_load_b32 v48, off, s33 offset:168
-; DAGISEL-NEXT:    scratch_load_b32 v49, off, s33 offset:172
-; DAGISEL-NEXT:    scratch_load_b32 v50, off, s33 offset:176
-; DAGISEL-NEXT:    scratch_load_b32 v51, off, s33 offset:180
-; DAGISEL-NEXT:    scratch_load_b32 v52, off, s33 offset:184
-; DAGISEL-NEXT:    scratch_load_b32 v53, off, s33 offset:188
-; DAGISEL-NEXT:    scratch_load_b32 v54, off, s33 offset:192
-; DAGISEL-NEXT:    scratch_load_b32 v55, off, s33 offset:196
-; DAGISEL-NEXT:    scratch_load_b32 v64, off, s33 offset:200
-; DAGISEL-NEXT:    scratch_load_b32 v65, off, s33 offset:204
-; DAGISEL-NEXT:    scratch_load_b32 v66, off, s33 offset:208
-; DAGISEL-NEXT:    scratch_load_b32 v67, off, s33 offset:212
-; DAGISEL-NEXT:    scratch_load_b32 v68, off, s33 offset:216
-; DAGISEL-NEXT:    scratch_load_b32 v69, off, s33 offset:220
-; DAGISEL-NEXT:    scratch_load_b32 v70, off, s33 offset:224
-; DAGISEL-NEXT:    scratch_load_b32 v71, off, s33 offset:228
-; DAGISEL-NEXT:    scratch_load_b32 v80, off, s33 offset:232
-; DAGISEL-NEXT:    scratch_load_b32 v81, off, s33 offset:236
-; DAGISEL-NEXT:    scratch_load_b32 v82, off, s33 offset:240
-; DAGISEL-NEXT:    scratch_load_b32 v83, off, s33 offset:244
-; DAGISEL-NEXT:    scratch_load_b32 v84, off, s33 offset:248
-; DAGISEL-NEXT:    scratch_load_b32 v85, off, s33 offset:252
-; DAGISEL-NEXT:    scratch_load_b32 v86, off, s33 offset:256
-; DAGISEL-NEXT:    scratch_load_b32 v87, off, s33 offset:260
+; DAGISEL-NEXT:    scratch_load_b32 v32, off, s32 offset:128
+; DAGISEL-NEXT:    scratch_load_b32 v33, off, s32 offset:132
+; DAGISEL-NEXT:    scratch_load_b32 v34, off, s32 offset:136
+; DAGISEL-NEXT:    scratch_load_b32 v35, off, s32 offset:140
+; DAGISEL-NEXT:    scratch_load_b32 v36, off, s32 offset:144
+; DAGISEL-NEXT:    scratch_load_b32 v37, off, s32 offset:148
+; DAGISEL-NEXT:    scratch_load_b32 v38, off, s32 offset:152
+; DAGISEL-NEXT:    scratch_load_b32 v39, off, s32 offset:156
+; DAGISEL-NEXT:    scratch_load_b32 v48, off, s32 offset:160
+; DAGISEL-NEXT:    scratch_load_b32 v49, off, s32 offset:164
+; DAGISEL-NEXT:    scratch_load_b32 v50, off, s32 offset:168
+; DAGISEL-NEXT:    scratch_load_b32 v51, off, s32 offset:172
+; DAGISEL-NEXT:    scratch_load_b32 v52, off, s32 offset:176
+; DAGISEL-NEXT:    scratch_load_b32 v53, off, s32 offset:180
+; DAGISEL-NEXT:    scratch_load_b32 v54, off, s32 offset:184
+; DAGISEL-NEXT:    scratch_load_b32 v55, off, s32 offset:188
+; DAGISEL-NEXT:    scratch_load_b32 v64, off, s32 offset:192
+; DAGISEL-NEXT:    scratch_load_b32 v65, off, s32 offset:196
+; DAGISEL-NEXT:    scratch_load_b32 v66, off, s32 offset:200
+; DAGISEL-NEXT:    scratch_load_b32 v67, off, s32 offset:204
+; DAGISEL-NEXT:    scratch_load_b32 v68, off, s32 offset:208
+; DAGISEL-NEXT:    scratch_load_b32 v69, off, s32 offset:212
+; DAGISEL-NEXT:    scratch_load_b32 v70, off, s32 offset:216
+; DAGISEL-NEXT:    scratch_load_b32 v71, off, s32 offset:220
+; DAGISEL-NEXT:    scratch_load_b32 v80, off, s32 offset:224
+; DAGISEL-NEXT:    scratch_load_b32 v81, off, s32 offset:228
+; DAGISEL-NEXT:    scratch_load_b32 v82, off, s32 offset:232
+; DAGISEL-NEXT:    scratch_load_b32 v83, off, s32 offset:236
+; DAGISEL-NEXT:    scratch_load_b32 v84, off, s32 offset:240
+; DAGISEL-NEXT:    scratch_load_b32 v85, off, s32 offset:244
+; DAGISEL-NEXT:    scratch_load_b32 v86, off, s32 offset:248
+; DAGISEL-NEXT:    scratch_load_b32 v87, off, s32 offset:252
 ; DAGISEL-NEXT:    s_clause 0x1f
-; DAGISEL-NEXT:    scratch_load_b32 v96, off, s33 offset:264
-; DAGISEL-NEXT:    scratch_load_b32 v97, off, s33 offset:268
-; DAGISEL-NEXT:    scratch_load_b32 v98, off, s33 offset:272
-; DAGISEL-NEXT:    scratch_load_b32 v99, off, s33 offset:276
-; DAGISEL-NEXT:    scratch_load_b32 v100, off, s33 offset:280
-; DAGISEL-NEXT:    scratch_load_b32 v101, off, s33 offset:284
-; DAGISEL-NEXT:    scratch_load_b32 v102, off, s33 offset:288
-; DAGISEL-NEXT:    scratch_load_b32 v103, off, s33 offset:292
-; DAGISEL-NEXT:    scratch_load_b32 v112, off, s33 offset:296
-; DAGISEL-NEXT:    scratch_load_b32 v113, off, s33 offset:300
-; DAGISEL-NEXT:    scratch_load_b32 v114, off, s33 offset:304
-; DAGISEL-NEXT:    scratch_load_b32 v115, off, s33 offset:308
-; DAGISEL-NEXT:    scratch_load_b32 v116, off, s33 offset:312
-; DAGISEL-NEXT:    scratch_load_b32 v117, off, s33 offset:316
-; DAGISEL-NEXT:    scratch_load_b32 v118, off, s33 offset:320
-; DAGISEL-NEXT:    scratch_load_b32 v119, off, s33 offset:324
-; DAGISEL-NEXT:    scratch_load_b32 v128, off, s33 offset:328
-; DAGISEL-NEXT:    scratch_load_b32 v129, off, s33 offset:332
-; DAGISEL-NEXT:    scratch_load_b32 v130, off, s33 offset:336
-; DAGISEL-NEXT:    scratch_load_b32 v131, off, s33 offset:340
-; DAGISEL-NEXT:    scratch_load_b32 v132, off, s33 offset:344
-; DAGISEL-NEXT:    scratch_load_b32 v133, off, s33 offset:348
-; DAGISEL-NEXT:    scratch_load_b32 v134, off, s33 offset:352
-; DAGISEL-NEXT:    scratch_load_b32 v135, off, s33 offset:356
-; DAGISEL-NEXT:    scratch_load_b32 v144, off, s33 offset:360
-; DAGISEL-NEXT:    scratch_load_b32 v145, off, s33 offset:364
-; DAGISEL-NEXT:    scratch_load_b32 v146, off, s33 offset:368
-; DAGISEL-NEXT:    scratch_load_b32 v147, off, s33 offset:372
-; DAGISEL-NEXT:    scratch_load_b32 v148, off, s33 offset:376
-; DAGISEL-NEXT:    scratch_load_b32 v149, off, s33 offset:380
-; DAGISEL-NEXT:    scratch_load_b32 v150, off, s33 offset:384
-; DAGISEL-NEXT:    scratch_load_b32 v151, off, s33 offset:388
+; DAGISEL-NEXT:    scratch_load_b32 v96, off, s32 offset:256
+; DAGISEL-NEXT:    scratch_load_b32 v97, off, s32 offset:260
+; DAGISEL-NEXT:    scratch_load_b32 v98, off, s32 offset:264
+; DAGISEL-NEXT:    scratch_load_b32 v99, off, s32 offset:268
+; DAGISEL-NEXT:    scratch_load_b32 v100, off, s32 offset:272
+; DAGISEL-NEXT:    scratch_load_b32 v101, off, s32 offset:276
+; DAGISEL-NEXT:    scratch_load_b32 v102, off, s32 offset:280
+; DAGISEL-NEXT:    scratch_load_b32 v103, off, s32 offset:284
+; DAGISEL-NEXT:    scratch_load_b32 v112, off, s32 offset:288
+; DAGISEL-NEXT:    scratch_load_b32 v113, off, s32 offset:292
+; DAGISEL-NEXT:    scratch_load_b32 v114, off, s32 offset:296
+; DAGISEL-NEXT:    scratch_load_b32 v115, off, s32 offset:300
+; DAGISEL-NEXT:    scratch_load_b32 v116, off, s32 offset:304
+; DAGISEL-NEXT:    scratch_load_b32 v117, off, s32 offset:308
+; DAGISEL-NEXT:    scratch_load_b32 v118, off, s32 offset:312
+; DAGISEL-NEXT:    scratch_load_b32 v119, off, s32 offset:316
+; DAGISEL-NEXT:    scratch_load_b32 v128, off, s32 offset:320
+; DAGISEL-NEXT:    scratch_load_b32 v129, off, s32 offset:324
+; DAGISEL-NEXT:    scratch_load_b32 v130, off, s32 offset:328
+; DAGISEL-NEXT:    scratch_load_b32 v131, off, s32 offset:332
+; DAGISEL-NEXT:    scratch_load_b32 v132, off, s32 offset:336
+; DAGISEL-NEXT:    scratch_load_b32 v133, off, s32 offset:340
+; DAGISEL-NEXT:    scratch_load_b32 v134, off, s32 offset:344
+; DAGISEL-NEXT:    scratch_load_b32 v135, off, s32 offset:348
+; DAGISEL-NEXT:    scratch_load_b32 v144, off, s32 offset:352
+; DAGISEL-NEXT:    scratch_load_b32 v145, off, s32 offset:356
+; DAGISEL-NEXT:    scratch_load_b32 v146, off, s32 offset:360
+; DAGISEL-NEXT:    scratch_load_b32 v147, off, s32 offset:364
+; DAGISEL-NEXT:    scratch_load_b32 v148, off, s32 offset:368
+; DAGISEL-NEXT:    scratch_load_b32 v149, off, s32 offset:372
+; DAGISEL-NEXT:    scratch_load_b32 v150, off, s32 offset:376
+; DAGISEL-NEXT:    scratch_load_b32 v151, off, s32 offset:380
 ; DAGISEL-NEXT:    s_clause 0x1f
-; DAGISEL-NEXT:    scratch_load_b32 v160, off, s33 offset:392
-; DAGISEL-NEXT:    scratch_load_b32 v161, off, s33 offset:396
-; DAGISEL-NEXT:    scratch_load_b32 v162, off, s33 offset:400
-; DAGISEL-NEXT:    scratch_load_b32 v163, off, s33 offset:404
-; DAGISEL-NEXT:    scratch_load_b32 v164, off, s33 offset:408
-; DAGISEL-NEXT:    scratch_load_b32 v165, off, s33 offset:412
-; DAGISEL-NEXT:    scratch_load_b32 v166, off, s33 offset:416
-; DAGISEL-NEXT:    scratch_load_b32 v167, off, s33 offset:420
-; DAGISEL-NEXT:    scratch_load_b32 v176, off, s33 offset:424
-; DAGISEL-NEXT:    scratch_load_b32 v177, off, s33 offset:428
-; DAGISEL-NEXT:    scratch_load_b32 v178, off, s33 offset:432
-; DAGISEL-NEXT:    scratch_load_b32 v179, off, s33 offset:436
-; DAGISEL-NEXT:    scratch_load_b32 v180, off, s33 offset:440
-; DAGISEL-NEXT:    scratch_load_b32 v181, off, s33 offset:444
-; DAGISEL-NEXT:    scratch_load_b32 v182, off, s33 offset:448
-; DAGISEL-NEXT:    scratch_load_b32 v183, off, s33 offset:452
-; DAGISEL-NEXT:    scratch_load_b32 v192, off, s33 offset:456
-; DAGISEL-NEXT:    scratch_load_b32 v193, off, s33 offset:460
-; DAGISEL-NEXT:    scratch_load_b32 v194, off, s33 offset:464
-; DAGISEL-NEXT:    scratch_load_b32 v195, off, s33 offset:468
-; DAGISEL-NEXT:    scratch_load_b32 v196, off, s33 offset:472
-; DAGISEL-NEXT:    scratch_load_b32 v197, off, s33 offset:476
-; DAGISEL-NEXT:    scratch_load_b32 v198, off, s33 offset:480
-; DAGISEL-NEXT:    scratch_load_b32 v199, off, s33 offset:484
-; DAGISEL-NEXT:    scratch_load_b32 v208, off, s33 offset:488
-; DAGISEL-NEXT:    scratch_load_b32 v209, off, s33 offset:492
-; DAGISEL-NEXT:    scratch_load_b32 v210, off, s33 offset:496
-; DAGISEL-NEXT:    scratch_load_b32 v211, off, s33 offset:500
-; DAGISEL-NEXT:    scratch_load_b32 v212, off, s33 offset:504
-; DAGISEL-NEXT:    scratch_load_b32 v213, off, s33 offset:508
-; DAGISEL-NEXT:    scratch_load_b32 v214, off, s33 offset:512
-; DAGISEL-NEXT:    scratch_load_b32 v215, off, s33 offset:516
+; DAGISEL-NEXT:    scratch_load_b32 v160, off, s32 offset:384
+; DAGISEL-NEXT:    scratch_load_b32 v161, off, s32 offset:388
+; DAGISEL-NEXT:    scratch_load_b32 v162, off, s32 offset:392
+; DAGISEL-NEXT:    scratch_load_b32 v163, off, s32 offset:396
+; DAGISEL-NEXT:    scratch_load_b32 v164, off, s32 offset:400
+; DAGISEL-NEXT:    scratch_load_b32 v165, off, s32 offset:404
+; DAGISEL-NEXT:    scratch_load_b32 v166, off, s32 offset:408
+; DAGISEL-NEXT:    scratch_load_b32 v167, off, s32 offset:412
+; DAGISEL-NEXT:    scratch_load_b32 v176, off, s32 offset:416
+; DAGISEL-NEXT:    scratch_load_b32 v177, off, s32 offset:420
+; DAGISEL-NEXT:    scratch_load_b32 v178, off, s32 offset:424
+; DAGISEL-NEXT:    scratch_load_b32 v179, off, s32 offset:428
+; DAGISEL-NEXT:    scratch_load_b32 v180, off, s32 offset:432
+; DAGISEL-NEXT:    scratch_load_b32 v181, off, s32 offset:436
+; DAGISEL-NEXT:    scratch_load_b32 v182, off, s32 offset:440
+; DAGISEL-NEXT:    scratch_load_b32 v183, off, s32 offset:444
+; DAGISEL-NEXT:    scratch_load_b32 v192, off, s32 offset:448
+; DAGISEL-NEXT:    scratch_load_b32 v193, off, s32 offset:452
+; DAGISEL-NEXT:    scratch_load_b32 v194, off, s32 offset:456
+; DAGISEL-NEXT:    scratch_load_b32 v195, off, s32 offset:460
+; DAGISEL-NEXT:    scratch_load_b32 v196, off, s32 offset:464
+; DAGISEL-NEXT:    scratch_load_b32 v197, off, s32 offset:468
+; DAGISEL-NEXT:    scratch_load_b32 v198, off, s32 offset:472
+; DAGISEL-NEXT:    scratch_load_b32 v199, off, s32 offset:476
+; DAGISEL-NEXT:    scratch_load_b32 v208, off, s32 offset:480
+; DAGISEL-NEXT:    scratch_load_b32 v209, off, s32 offset:484
+; DAGISEL-NEXT:    scratch_load_b32 v210, off, s32 offset:488
+; DAGISEL-NEXT:    scratch_load_b32 v211, off, s32 offset:492
+; DAGISEL-NEXT:    scratch_load_b32 v212, off, s32 offset:496
+; DAGISEL-NEXT:    scratch_load_b32 v213, off, s32 offset:500
+; DAGISEL-NEXT:    scratch_load_b32 v214, off, s32 offset:504
+; DAGISEL-NEXT:    scratch_load_b32 v215, off, s32 offset:508
 ; DAGISEL-NEXT:    s_clause 0xf
-; DAGISEL-NEXT:    scratch_load_b32 v224, off, s33 offset:520
-; DAGISEL-NEXT:    scratch_load_b32 v225, off, s33 offset:524
-; DAGISEL-NEXT:    scratch_load_b32 v226, off, s33 offset:528
-; DAGISEL-NEXT:    scratch_load_b32 v227, off, s33 offset:532
-; DAGISEL-NEXT:    scratch_load_b32 v228, off, s33 offset:536
-; DAGISEL-NEXT:    scratch_load_b32 v229, off, s33 offset:540
-; DAGISEL-NEXT:    scratch_load_b32 v230, off, s33 offset:544
-; DAGISEL-NEXT:    scratch_load_b32 v231, off, s33 offset:548
-; DAGISEL-NEXT:    scratch_load_b32 v240, off, s33 offset:552
-; DAGISEL-NEXT:    scratch_load_b32 v241, off, s33 offset:556
-; DAGISEL-NEXT:    scratch_load_b32 v242, off, s33 offset:560
-; DAGISEL-NEXT:    scratch_load_b32 v243, off, s33 offset:564
-; DAGISEL-NEXT:    scratch_load_b32 v244, off, s33 offset:568
-; DAGISEL-NEXT:    scratch_load_b32 v245, off, s33 offset:572
-; DAGISEL-NEXT:    scratch_load_b32 v246, off, s33 offset:576
-; DAGISEL-NEXT:    scratch_load_b32 v247, off, s33 offset:580
-; DAGISEL-NEXT:    s_mov_b32 exec_lo, s34
-; DAGISEL-NEXT:    s_mov_b32 s33, s35
-; DAGISEL-NEXT:    s_wait_loadcnt 0x0
-; DAGISEL-NEXT:    s_wait_alu 0xfffe
-; DAGISEL-NEXT:    s_setpc_b64 s[30:31]
+; DAGISEL-NEXT:    scratch_load_b32 v224, off, s32 offset:512
+; DAGISEL-NEXT:    scratch_load_b32 v225, off, s32 offset:516
+; DAGISEL-NEXT:    scratch_load_b32 v226, off, s32 offset:520
+; DAGISEL-NEXT:    scratch_load_b32 v227, off, s32 offset:524
+; DAGISEL-NEXT:    scratch_load_b32 v228, off, s32 offset:528
+; DAGISEL-NEXT:    scratch_load_b32 v229, off, s32 offset:532
+; DAGISEL-NEXT:    scratch_load_b32 v230, off, s32 offset:536
+; DAGISEL-NEXT:    scratch_load_b32 v231, off, s32 offset:540
+; DAGISEL-NEXT:    scratch_load_b32 v240, off, s32 offset:544
+; DAGISEL-NEXT:    scratch_load_b32 v241, off, s32 offset:548
+; DAGISEL-NEXT:    scratch_load_b32 v242, off, s32 offset:552
+; DAGISEL-NEXT:    scratch_load_b32 v243, off, s32 offset:556
+; DAGISEL-NEXT:    scratch_load_b32 v244, off, s32 offset:560
+; DAGISEL-NEXT:    scratch_load_b32 v245, off, s32 offset:564
+; DAGISEL-NEXT:    scratch_load_b32 v246, off, s32 offset:568
+; DAGISEL-NEXT:    scratch_load_b32 v247, off, s32 offset:572
+; DAGISEL-NEXT:    s_mov_b32 exec_lo, s0
+; DAGISEL-NEXT:    s_setpc_b64 s[36:37]
 ;
 ; GISEL-LABEL: tail_call_gfx_from_whole_wave:
 ; GISEL:       ; %bb.0:
@@ -2841,417 +2738,314 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ
 ; GISEL-NEXT:    s_wait_samplecnt 0x0
 ; GISEL-NEXT:    s_wait_bvhcnt 0x0
 ; GISEL-NEXT:    s_wait_kmcnt 0x0
-; GISEL-NEXT:    s_mov_b32 s35, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_xor_saveexec_b32 s34, -1
+; GISEL-NEXT:    s_xor_saveexec_b32 s0, -1
 ; GISEL-NEXT:    s_clause 0x1f
-; GISEL-NEXT:    scratch_store_b32 off, v0, s33 offset:8
-; GISEL-NEXT:    scratch_store_b32 off, v1, s33 offset:12
-; GISEL-NEXT:    scratch_store_b32 off, v2, s33 offset:16
-; GISEL-NEXT:    scratch_store_b32 off, v3, s33 offset:20
-; GISEL-NEXT:    scratch_store_b32 off, v4, s33 offset:24
-; GISEL-NEXT:    scratch_store_b32 off, v5, s33 offset:28
-; GISEL-NEXT:    scratch_store_b32 off, v6, s33 offset:32
-; GISEL-NEXT:    scratch_store_b32 off, v7, s33 offset:36
-; GISEL-NEXT:    scratch_store_b32 off, v8, s33 offset:40
-; GISEL-NEXT:    scratch_store_b32 off, v9, s33 offset:44
-; GISEL-NEXT:    scratch_store_b32 off, v10, s33 offset:48
-; GISEL-NEXT:    scratch_store_b32 off, v11, s33 offset:52
-; GISEL-NEXT:    scratch_store_b32 off, v12, s33 offset:56
-; GISEL-NEXT:    scratch_store_b32 off, v13, s33 offset:60
-; GISEL-NEXT:    scratch_store_b32 off, v14, s33 offset:64
-; GISEL-NEXT:    scratch_store_b32 off, v15, s33 offset:68
-; GISEL-NEXT:    scratch_store_b32 off, v16, s33 offset:72
-; GISEL-NEXT:    scratch_store_b32 off, v17, s33 offset:76
-; GISEL-NEXT:    scratch_store_b32 off, v18, s33 offset:80
-; GISEL-NEXT:    scratch_store_b32 off, v19, s33 offset:84
-; GISEL-NEXT:    scratch_store_b32 off, v20, s33 offset:88
-; GISEL-NEXT:    scratch_store_b32 off, v21, s33 offset:92
-; GISEL-NEXT:    scratch_store_b32 off, v22, s33 offset:96
-; GISEL-NEXT:    scratch_store_b32 off, v23, s33 offset:100
-; GISEL-NEXT:    scratch_store_b32 off, v24, s33 offset:104
-; GISEL-NEXT:    scratch_store_b32 off, v25, s33 offset:108
-; GISEL-NEXT:    scratch_store_b32 off, v26, s33 offset:112
-; GISEL-NEXT:    scratch_store_b32 off, v27, s33 offset:116
-; GISEL-NEXT:    scratch_store_b32 off, v28, s33 offset:120
-; GISEL-NEXT:    scratch_store_b32 off, v29, s33 offset:124
-; GISEL-NEXT:    scratch_store_b32 off, v30, s33 offset:128
-; GISEL-NEXT:    scratch_store_b32 off, v31, s33 offset:132
+; GISEL-NEXT:    scratch_store_b32 off, v0, s32
+; GISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4
+; GISEL-NEXT:    scratch_store_b32 off, v2, s32 offset:8
+; GISEL-NEXT:    scratch_store_b32 off, v3, s32 offset:12
+; GISEL-NEXT:    scratch_store_b32 off, v4, s32 offset:16
+; GISEL-NEXT:    scratch_store_b32 off, v5, s32 offset:20
+; GISEL-NEXT:    scratch_store_b32 off, v6, s32 offset:24
+; GISEL-NEXT:    scratch_store_b32 off, v7, s32 offset:28
+; GISEL-NEXT:    scratch_store_b32 off, v8, s32 offset:32
+; GISEL-NEXT:    scratch_store_b32 off, v9, s32 offset:36
+; GISEL-NEXT:    scratch_store_b32 off, v10, s32 offset:40
+; GISEL-NEXT:    scratch_store_b32 off, v11, s32 offset:44
+; GISEL-NEXT:    scratch_store_b32 off, v12, s32 offset:48
+; GISEL-NEXT:    scratch_store_b32 off, v13, s32 offset:52
+; GISEL-NEXT:    scratch_store_b32 off, v14, s32 offset:56
+; GISEL-NEXT:    scratch_store_b32 off, v15, s32 offset:60
+; GISEL-NEXT:    scratch_store_b32 off, v16, s32 offset:64
+; GISEL-NEXT:    scratch_store_b32 off, v17, s32 offset:68
+; GISEL-NEXT:    scratch_store_b32 off, v18, s32 offset:72
+; GISEL-NEXT:    scratch_store_b32 off, v19, s32 offset:76
+; GISEL-NEXT:    scratch_store_b32 off, v20, s32 offset:80
+; GISEL-NEXT:    scratch_store_b32 off, v21, s32 offset:84
+; GISEL-NEXT:    scratch_store_b32 off, v22, s32 offset:88
+; GISEL-NEXT:    scratch_store_b32 off, v23, s32 offset:92
+; GISEL-NEXT:    scratch_store_b32 off, v24, s32 offset:96
+; GISEL-NEXT:    scratch_store_b32 off, v25, s32 offset:100
+; GISEL-NEXT:    scratch_store_b32 off, v26, s32 offset:104
+; GISEL-NEXT:    scratch_store_b32 off, v27, s32 offset:108
+; GISEL-NEXT:    scratch_store_b32 off, v28, s32 offset:112
+; GISEL-NEXT:    scratch_store_b32 off, v29, s32 offset:116
+; GISEL-NEXT:    scratch_store_b32 off, v30, s32 offset:120
+; GISEL-NEXT:    scratch_store_b32 off, v31, s32 offset:124
 ; GISEL-NEXT:    s_clause 0x1f
-; GISEL-NEXT:    scratch_store_b32 off, v32, s33 offset:136
-; GISEL-NEXT:    scratch_store_b32 off, v33, s33 offset:140
-; GISEL-NEXT:    scratch_store_b32 off, v34, s33 offset:144
-; GISEL-NEXT:    scratch_store_b32 off, v35, s33 offset:148
-; GISEL-NEXT:    scratch_store_b32 off, v36, s33 offset:152
-; GISEL-NEXT:    scratch_store_b32 off, v37, s33 offset:156
-; GISEL-NEXT:    scratch_store_b32 off, v38, s33 offset:160
-; GISEL-NEXT:    scratch_store_b32 off, v39, s33 offset:164
-; GISEL-NEXT:    scratch_store_b32 off, v48, s33 offset:168
-; GISEL-NEXT:    scratch_store_b32 off, v49, s33 offset:172
-; GISEL-NEXT:    scratch_store_b32 off, v50, s33 offset:176
-; GISEL-NEXT:    scratch_store_b32 off, v51, s33 offset:180
-; GISEL-NEXT:    scratch_store_b32 off, v52, s33 offset:184
-; GISEL-NEXT:    scratch_store_b32 off, v53, s33 offset:188
-; GISEL-NEXT:    scratch_store_b32 off, v54, s33 offset:192
-; GISEL-NEXT:    scratch_store_b32 off, v55, s33 offset:196
-; GISEL-NEXT:    scratch_store_b32 off, v64, s33 offset:200
-; GISEL-NEXT:    scratch_store_b32 off, v65, s33 offset:204
-; GISEL-NEXT:    scratch_store_b32 off, v66, s33 offset:208
-; GISEL-NEXT:    scratch_store_b32 off, v67, s33 offset:212
-; GISEL-NEXT:    scratch_store_b32 off, v68, s33 offset:216
-; GISEL-NEXT:    scratch_store_b32 off, v69, s33 offset:220
-; GISEL-NEXT:    scratch_store_b32 off, v70, s33 offset:224
-; GISEL-NEXT:    scratch_store_b32 off, v71, s33 offset:228
-; GISEL-NEXT:    scratch_store_b32 off, v80, s33 offset:232
-; GISEL-NEXT:    scratch_store_b32 off, v81, s33 offset:236
-; GISEL-NEXT:    scratch_store_b32 off, v82, s33 offset:240
-; GISEL-NEXT:    scratch_store_b32 off, v83, s33 offset:244
-; GISEL-NEXT:    scratch_store_b32 off, v84, s33 offset:248
-; GISEL-NEXT:    scratch_store_b32 off, v85, s33 offset:252
-; GISEL-NEXT:    scratch_store_b32 off, v86, s33 offset:256
-; GISEL-NEXT:    scratch_store_b32 off, v87, s33 offset:260
+; GISEL-NEXT:    scratch_store_b32 off, v32, s32 offset:128
+; GISEL-NEXT:    scratch_store_b32 off, v33, s32 offset:132
+; GISEL-NEXT:    scratch_store_b32 off, v34, s32 offset:136
+; GISEL-NEXT:    scratch_store_b32 off, v35, s32 offset:140
+; GISEL-NEXT:    scratch_store_b32 off, v36, s32 offset:144
+; GISEL-NEXT:    scratch_store_b32 off, v37, s32 offset:148
+; GISEL-NEXT:    scratch_store_b32 off, v38, s32 offset:152
+; GISEL-NEXT:    scratch_store_b32 off, v39, s32 offset:156
+; GISEL-NEXT:    scratch_store_b32 off, v48, s32 offset:160
+; GISEL-NEXT:    scratch_store_b32 off, v49, s32 offset:164
+; GISEL-NEXT:    scratch_store_b32 off, v50, s32 offset:168
+; GISEL-NEXT:    scratch_store_b32 off, v51, s32 offset:172
+; GISEL-NEXT:    scratch_store_b32 off, v52, s32 offset:176
+; GISEL-NEXT:    scratch_store_b32 off, v53, s32 offset:180
+; GISEL-NEXT:    scratch_store_b32 off, v54, s32 offset:184
+; GISEL-NEXT:    scratch_store_b32 off, v55, s32 offset:188
+; GISEL-NEXT:    scratch_store_b32 off, v64, s32 offset:192
+; GISEL-NEXT:    scratch_store_b32 off, v65, s32 offset:196
+; GISEL-NEXT:    scratch_store_b32 off, v66, s32 offset:200
+; GISEL-NEXT:    scratch_store_b32 off, v67, s32 offset:204
+; GISEL-NEXT:    scratch_store_b32 off, v68, s32 offset:208
+; GISEL-NEXT:    scratch_store_b32 off, v69, s32 offset:212
+; GISEL-NEXT:    scratch_store_b32 off, v70, s32 offset:216
+; GISEL-NEXT:    scratch_store_b32 off, v71, s32 offset:220
+; GISEL-NEXT:    scratch_store_b32 off, v80, s32 offset:224
+; GISEL-NEXT:    scratch_store_b32 off, v81, s32 offset:228
+; GISEL-NEXT:    scratch_store_b32 off, v82, s32 offset:232
+; GISEL-NEXT:    scratch_store_b32 off, v83, s32 offset:236
+; GISEL-NEXT:    scratch_store_b32 off, v84, s32 offset:240
+; GISEL-NEXT:    scratch_store_b32 off, v85, s32 offset:244
+; GISEL-NEXT:    scratch_store_b32 off, v86, s32 offset:248
+; GISEL-NEXT:    scratch_store_b32 off, v87, s32 offset:252
 ; GISEL-NEXT:    s_clause 0x1f
-; GISEL-NEXT:    scratch_store_b32 off, v96, s33 offset:264
-; GISEL-NEXT:    scratch_store_b32 off, v97, s33 offset:268
-; GISEL-NEXT:    scratch_store_b32 off, v98, s33 offset:272
-; GISEL-NEXT:    scratch_store_b32 off, v99, s33 offset:276
-; GISEL-NEXT:    scratch_store_b32 off, v100, s33 offset:280
-; GISEL-NEXT:    scratch_store_b32 off, v101, s33 offset:284
-; GISEL-NEXT:    scratch_store_b32 off, v102, s33 offset:288
-; GISEL-NEXT:    scratch_store_b32 off, v103, s33 offset:292
-; GISEL-NEXT:    scratch_store_b32 off, v112, s33 offset:296
-; GISEL-NEXT:    scratch_store_b32 off, v113, s33 offset:300
-; GISEL-NEXT:    scratch_store_b32 off, v114, s33 offset:304
-; GISEL-NEXT:    scratch_store_b32 off, v115, s33 offset:308
-; GISEL-NEXT:    scratch_store_b32 off, v116, s33 offset:312
-; GISEL-NEXT:    scratch_store_b32 off, v117, s33 offset:316
-; GISEL-NEXT:    scratch_store_b32 off, v118, s33 offset:320
-; GISEL-NEXT:    scratch_store_b32 off, v119, s33 offset:324
-; GISEL-NEXT:    scratch_store_b32 off, v128, s33 offset:328
-; GISEL-NEXT:    scratch_store_b32 off, v129, s33 offset:332
-; GISEL-NEXT:    scratch_store_b32 off, v130, s33 offset:336
-; GISEL-NEXT:    scratch_store_b32 off, v131, s33 offset:340
-; GISEL-NEXT:    scratch_store_b32 off, v132, s33 offset:344
-; GISEL-NEXT:    scratch_store_b32 off, v133, s33 offset:348
-; GISEL-NEXT:    scratch_store_b32 off, v134, s33 offset:352
-; GISEL-NEXT:    scratch_store_b32 off, v135, s33 offset:356
-; GISEL-NEXT:    scratch_store_b32 off, v144, s33 offset:360
-; GISEL-NEXT:    scratch_store_b32 off, v145, s33 offset:364
-; GISEL-NEXT:    scratch_store_b32 off, v146, s33 offset:368
-; GISEL-NEXT:    scratch_store_b32 off, v147, s33 offset:372
-; GISEL-NEXT:    scratch_store_b32 off, v148, s33 offset:376
-; GISEL-NEXT:    scratch_store_b32 off, v149, s33 offset:380
-; GISEL-NEXT:    scratch_store_b32 off, v150, s33 offset:384
-; GISEL-NEXT:    scratch_store_b32 off, v151, s33 offset:388
+; GISEL-NEXT:    scratch_store_b32 off, v96, s32 offset:256
+; GISEL-NEXT:    scratch_store_b32 off, v97, s32 offset:260
+; GISEL-NEXT:    scratch_store_b32 off, v98, s32 offset:264
+; GISEL-NEXT:    scratch_store_b32 off, v99, s32 offset:268
+; GISEL-NEXT:    scratch_store_b32 off, v100, s32 offset:272
+; GISEL-NEXT:    scratch_store_b32 off, v101, s32 offset:276
+; GISEL-NEXT:    scratch_store_b32 off, v102, s32 offset:280
+; GISEL-NEXT:    scratch_store_b32 off, v103, s32 offset:284
+; GISEL-NEXT:    scratch_store_b32 off, v112, s32 offset:288
+; GISEL-NEXT:    scratch_store_b32 off, v113, s32 offset:292
+; GISEL-NEXT:    scratch_store_b32 off, v114, s32 offset:296
+; GISEL-NEXT:    scratch_store_b32 off, v115, s32 offset:300
+; GISEL-NEXT:    scratch_store_b32 off, v116, s32 offset:304
+; GISEL-NEXT:    scratch_store_b32 off, v117, s32 offset:308
+; GISEL-NEXT:    scratch_store_b32 off, v118, s32 offset:312
+; GISEL-NEXT:    scratch_store_b32 off, v119, s32 offset:316
+; GISEL-NEXT:    scratch_store_b32 off, v128, s32 offset:320
+; GISEL-NEXT:    scratch_store_b32 off, v129, s32 offset:324
+; GISEL-NEXT:    scratch_store_b32 off, v130, s32 offset:328
+; GISEL-NEXT:    scratch_store_b32 off, v131, s32 offset:332
+; GISEL-NEXT:    scratch_store_b32 off, v132, s32 offset:336
+; GISEL-NEXT:    scratch_store_b32 off, v133, s32 offset:340
+; GISEL-NEXT:    scratch_store_b32 off, v134, s32 offset:344
+; GISEL-NEXT:    scratch_store_b32 off, v135, s32 offset:348
+; GISEL-NEXT:    scratch_store_b32 off, v144, s32 offset:352
+; GISEL-NEXT:    scratch_store_b32 off, v145, s32 offset:356
+; GISEL-NEXT:    scratch_store_b32 off, v146, s32 offset:360
+; GISEL-NEXT:    scratch_store_b32 off, v147, s32 offset:364
+; GISEL-NEXT:    scratch_store_b32 off, v148, s32 offset:368
+; GISEL-NEXT:    scratch_store_b32 off, v149, s32 offset:372
+; GISEL-NEXT:    scratch_store_b32 off, v150, s32 offset:376
+; GISEL-NEXT:    scratch_store_b32 off, v151, s32 offset:380
 ; GISEL-NEXT:    s_clause 0x1f
-; GISEL-NEXT:    scratch_store_b32 off, v160, s33 offset:392
-; GISEL-NEXT:    scratch_store_b32 off, v161, s33 offset:396
-; GISEL-NEXT:    scratch_store_b32 off, v162, s33 offset:400
-; GISEL-NEXT:    scratch_store_b32 off, v163, s33 offset:404
-; GISEL-NEXT:    scratch_store_b32 off, v164, s33 offset:408
-; GISEL-NEXT:    scratch_store_b32 off, v165, s33 offset:412
-; GISEL-NEXT:    scratch_store_b32 off, v166, s33 offset:416
-; GISEL-NEXT:    scratch_store_b32 off, v167, s33 offset:420
-; GISEL-NEXT:    scratch_store_b32 off, v176, s33 offset:424
-; GISEL-NEXT:    scratch_store_b32 off, v177, s33 offset:428
-; GISEL-NEXT:    scratch_store_b32 off, v178, s33 offset:432
-; GISEL-NEXT:    scratch_store_b32 off, v179, s33 offset:436
-; GISEL-NEXT:    scratch_store_b32 off, v180, s33 offset:440
-; GISEL-NEXT:    scratch_store_b32 off, v181, s33 offset:444
-; GISEL-NEXT:    scratch_store_b32 off, v182, s33 offset:448
-; GISEL-NEXT:    scratch_store_b32 off, v183, s33 offset:452
-; GISEL-NEXT:    scratch_store_b32 off, v192, s33 offset:456
-; GISEL-NEXT:    scratch_store_b32 off, v193, s33 offset:460
-; GISEL-NEXT:    scratch_store_b32 off, v194, s33 offset:464
-; GISEL-NEXT:    scratch_store_b32 off, v195, s33 offset:468
-; GISEL-NEXT:    scratch_store_b32 off, v196, s33 offset:472
-; GISEL-NEXT:    scratch_store_b32 off, v197, s33 offset:476
-; GISEL-NEXT:    scratch_store_b32 off, v198, s33 offset:480
-; GISEL-NEXT:    scratch_store_b32 off, v199, s33 offset:484
-; GISEL-NEXT:    scratch_store_b32 off, v208, s33 offset:488
-; GISEL-NEXT:    scratch_store_b32 off, v209, s33 offset:492
-; GISEL-NEXT:    scratch_store_b32 off, v210, s33 offset:496
-; GISEL-NEXT:    scratch_store_b32 off, v211, s33 offset:500
-; GISEL-NEXT:    scratch_store_b32 off, v212, s33 offset:504
-; GISEL-NEXT:    scratch_store_b32 off, v213, s33 offset:508
-; GISEL-NEXT:    scratch_store_b32 off, v214, s33 offset:512
-; GISEL-NEXT:    scratch_store_b32 off, v215, s33 offset:516
+; GISEL-NEXT:    scratch_store_b32 off, v160, s32 offset:384
+; GISEL-NEXT:    scratch_store_b32 off, v161, s32 offset:388
+; GISEL-NEXT:    scratch_store_b32 off, v162, s32 offset:392
+; GISEL-NEXT:    scratch_store_b32 off, v163, s32 offset:396
+; GISEL-NEXT:    scratch_store_b32 off, v164, s32 offset:400
+; GISEL-NEXT:    scratch_store_b32 off, v165, s32 offset:404
+; GISEL-NEXT:    scratch_store_b32 off, v166, s32 offset:408
+; GISEL-NEXT:    scratch_store_b32 off, v167, s32 offset:412
+; GISEL-NEXT:    scratch_store_b32 off, v176, s32 offset:416
+; GISEL-NEXT:    scratch_store_b32 off, v177, s32 offset:420
+; GISEL-NEXT:    scratch_store_b32 off, v178, s32 offset:424
+; GISEL-NEXT:    scratch_store_b32 off, v179, s32 offset:428
+; GISEL-NEXT:    scratch_store_b32 off, v180, s32 offset:432
+; GISEL-NEXT:    scratch_store_b32 off, v181, s32 offset:436
+; GISEL-NEXT:    scratch_store_b32 off, v182, s32 offset:440
+; GISEL-NEXT:    scratch_store_b32 off, v183, s32 offset:444
+; GISEL-NEXT:    scratch_store_b32 off, v192, s32 offset:448
+; GISEL-NEXT:    scratch_store_b32 off, v193, s32 offset:452
+; GISEL-NEXT:    scratch_store_b32 off, v194, s32 offset:456
+; GISEL-NEXT:    scratch_store_b32 off, v195, s32 offset:460
+; GISEL-NEXT:    scratch_store_b32 off, v196, s32 offset:464
+; GISEL-NEXT:    scratch_store_b32 off, v197, s32 offset:468
+; GISEL-NEXT:    scratch_store_b32 off, v198, s32 offset:472
+; GISEL-NEXT:    scratch_store_b32 off, v199, s32 offset:476
+; GISEL-NEXT:    scratch_store_b32 off, v208, s32 offset:480
+; GISEL-NEXT:    scratch_store_b32 off, v209, s32 offset:484
+; GISEL-NEXT:    scratch_store_b32 off, v210, s32 offset:488
+; GISEL-NEXT:    scratch_store_b32 off, v211, s32 offset:492
+; GISEL-NEXT:    scratch_store_b32 off, v212, s32 offset:496
+; GISEL-NEXT:    scratch_store_b32 off, v213, s32 offset:500
+; GISEL-NEXT:    scratch_store_b32 off, v214, s32 offset:504
+; GISEL-NEXT:    scratch_store_b32 off, v215, s32 offset:508
 ; GISEL-NEXT:    s_clause 0xf
-; GISEL-NEXT:    scratch_store_b32 off, v224, s33 offset:520
-; GISEL-NEXT:    scratch_store_b32 off, v225, s33 offset:524
-; GISEL-NEXT:    scratch_store_b32 off, v226, s33 offset:528
-; GISEL-NEXT:    scratch_store_b32 off, v227, s33 offset:532
-; GISEL-NEXT:    scratch_store_b32 off, v228, s33 offset:536
-; GISEL-NEXT:    scratch_store_b32 off, v229, s33 offset:540
-; GISEL-NEXT:    scratch_store_b32 off, v230, s33 offset:544
-; GISEL-NEXT:    scratch_store_b32 off, v231, s33 offset:548
-; GISEL-NEXT:    scratch_store_b32 off, v240, s33 offset:552
-; GISEL-NEXT:    scratch_store_b32 off, v241, s33 offset:556
-; GISEL-NEXT:    scratch_store_b32 off, v242, s33 offset:560
-; GISEL-NEXT:    scratch_store_b32 off, v243, s33 offset:564
-; GISEL-NEXT:    scratch_store_b32 off, v244, s33 offset:568
-; GISEL-NEXT:    scratch_store_b32 off, v245, s33 offset:572
-; GISEL-NEXT:    scratch_store_b32 off, v246, s33 offset:576
-; GISEL-NEXT:    scratch_store_b32 off, v247, s33 offset:580
+; GISEL-NEXT:    scratch_store_b32 off, v224, s32 offset:512
+; GISEL-NEXT:    scratch_store_b32 off, v225, s32 offset:516
+; GISEL-NEXT:    scratch_store_b32 off, v226, s32 offset:520
+; GISEL-NEXT:    scratch_store_b32 off, v227, s32 offset:524
+; GISEL-NEXT:    scratch_store_b32 off, v228, s32 offset:528
+; GISEL-NEXT:    scratch_store_b32 off, v229, s32 offset:532
+; GISEL-NEXT:    scratch_store_b32 off, v230, s32 offset:536
+; GISEL-NEXT:    scratch_store_b32 off, v231, s32 offset:540
+; GISEL-NEXT:    scratch_store_b32 off, v240, s32 offset:544
+; GISEL-NEXT:    scratch_store_b32 off, v241, s32 offset:548
+; GISEL-NEXT:    scratch_store_b32 off, v242, s32 offset:552
+; GISEL-NEXT:    scratch_store_b32 off, v243, s32 offset:556
+; GISEL-NEXT:    scratch_store_b32 off, v244, s32 offset:560
+; GISEL-NEXT:    scratch_store_b32 off, v245, s32 offset:564
+; GISEL-NEXT:    scratch_store_b32 off, v246, s32 offset:568
+; GISEL-NEXT:    scratch_store_b32 off, v247, s32 offset:572
 ; GISEL-NEXT:    s_mov_b32 exec_lo, -1
-; GISEL-NEXT:    s_clause 0x1
-; GISEL-NEXT:    scratch_store_b32 off, v40, s33
-; GISEL-NEXT:    scratch_store_b32 off, v41, s33 offset:4
-; GISEL-NEXT:    v_writelane_b32 v40, s4, 0
-; GISEL-NEXT:    v_writelane_b32 v41, s76, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v2, v0
 ; GISEL-NEXT:    v_swap_b32 v0, v1
-; GISEL-NEXT:    v_writelane_b32 v40, s5, 1
-; GISEL-NEXT:    v_writelane_b32 v41, s77, 1
-; GISEL-NEXT:    s_mov_b32 s0, gfx_callee at abs32@lo
-; GISEL-NEXT:    s_mov_b32 s1, gfx_callee at abs32@hi
-; GISEL-NEXT:    s_addk_co_i32 s32, 0x250
-; GISEL-NEXT:    v_writelane_b32 v40, s6, 2
-; GISEL-NEXT:    v_writelane_b32 v41, s78, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s7, 3
-; GISEL-NEXT:    v_writelane_b32 v41, s79, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s8, 4
-; GISEL-NEXT:    v_writelane_b32 v41, s88, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s9, 5
-; GISEL-NEXT:    v_writelane_b32 v41, s89, 5
-; GISEL-NEXT:    s_mov_b64 s[8:9], 0
-; GISEL-NEXT:    v_writelane_b32 v40, s10, 6
-; GISEL-NEXT:    v_writelane_b32 v41, s90, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s11, 7
-; GISEL-NEXT:    v_writelane_b32 v41, s91, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s12, 8
-; GISEL-NEXT:    v_writelane_b32 v41, s92, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s13, 9
-; GISEL-NEXT:    v_writelane_b32 v41, s93, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s14, 10
-; GISEL-NEXT:    v_writelane_b32 v41, s94, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s15, 11
-; GISEL-NEXT:    v_writelane_b32 v41, s95, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s16, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s17, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s18, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s19, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s20, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s21, 17
-; GISEL-NEXT:    v_writelane_b32 v40, s22, 18
-; GISEL-NEXT:    v_writelane_b32 v40, s23, 19
-; GISEL-NEXT:    v_writelane_b32 v40, s24, 20
-; GISEL-NEXT:    v_writelane_b32 v40, s25, 21
-; GISEL-NEXT:    v_writelane_b32 v40, s26, 22
-; GISEL-NEXT:    v_writelane_b32 v40, s27, 23
-; GISEL-NEXT:    v_writelane_b32 v40, s28, 24
-; GISEL-NEXT:    v_writelane_b32 v40, s29, 25
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 26
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 27
-; GISEL-NEXT:    v_writelane_b32 v40, s72, 28
-; GISEL-NEXT:    v_writelane_b32 v40, s73, 29
-; GISEL-NEXT:    v_writelane_b32 v40, s74, 30
-; GISEL-NEXT:    v_writelane_b32 v40, s75, 31
+; GISEL-NEXT:    s_mov_b32 s36, gfx_callee at abs32@lo
+; GISEL-NEXT:    s_mov_b32 s37, gfx_callee at abs32@hi
 ; GISEL-NEXT:    s_wait_alu 0xfffe
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-; GISEL-NEXT:    v_readlane_b32 s95, v41, 11
-; GISEL-NEXT:    v_readlane_b32 s94, v41, 10
-; GISEL-NEXT:    v_readlane_b32 s93, v41, 9
-; GISEL-NEXT:    v_readlane_b32 s92, v41, 8
-; GISEL-NEXT:    v_readlane_b32 s91, v41, 7
-; GISEL-NEXT:    v_readlane_b32 s90, v41, 6
-; GISEL-NEXT:    v_readlane_b32 s89, v41, 5
-; GISEL-NEXT:    v_readlane_b32 s88, v41, 4
-; GISEL-NEXT:    v_readlane_b32 s79, v41, 3
-; GISEL-NEXT:    v_readlane_b32 s78, v41, 2
-; GISEL-NEXT:    v_readlane_b32 s77, v41, 1
-; GISEL-NEXT:    v_readlane_b32 s76, v41, 0
-; GISEL-NEXT:    v_readlane_b32 s75, v40, 31
-; GISEL-NEXT:    v_readlane_b32 s74, v40, 30
-; GISEL-NEXT:    v_readlane_b32 s73, v40, 29
-; GISEL-NEXT:    v_readlane_b32 s72, v40, 28
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 27
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 26
-; GISEL-NEXT:    v_readlane_b32 s29, v40, 25
-; GISEL-NEXT:    v_readlane_b32 s28, v40, 24
-; GISEL-NEXT:    v_readlane_b32 s27, v40, 23
-; GISEL-NEXT:    v_readlane_b32 s26, v40, 22
-; GISEL-NEXT:    v_readlane_b32 s25, v40, 21
-; GISEL-NEXT:    v_readlane_b32 s24, v40, 20
-; GISEL-NEXT:    v_readlane_b32 s23, v40, 19
-; GISEL-NEXT:    v_readlane_b32 s22, v40, 18
-; GISEL-NEXT:    v_readlane_b32 s21, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s20, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s19, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s18, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s17, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s16, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s15, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s14, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s13, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s12, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s11, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s10, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s9, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s8, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s7, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s6, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s5, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s4, v40, 0
-; GISEL-NEXT:    s_clause 0x1
-; GISEL-NEXT:    scratch_load_b32 v40, off, s33
-; GISEL-NEXT:    scratch_load_b32 v41, off, s33 offset:4
-; GISEL-NEXT:    s_mov_b32 s32, s33
-; GISEL-NEXT:    s_xor_b32 exec_lo, s34, -1
+; GISEL-NEXT:    s_xor_b32 exec_lo, s0, -1
 ; GISEL-NEXT:    s_clause 0x1f
-; GISEL-NEXT:    scratch_load_b32 v0, off, s33 offset:8
-; GISEL-NEXT:    scratch_load_b32 v1, off, s33 offset:12
-; GISEL-NEXT:    scratch_load_b32 v2, off, s33 offset:16
-; GISEL-NEXT:    scratch_load_b32 v3, off, s33 offset:20
-; GISEL-NEXT:    scratch_load_b32 v4, off, s33 offset:24
-; GISEL-NEXT:    scratch_load_b32 v5, off, s33 offset:28
-; GISEL-NEXT:    scratch_load_b32 v6, off, s33 offset:32
-; GISEL-NEXT:    scratch_load_b32 v7, off, s33 offset:36
-; GISEL-NEXT:    scratch_load_b32 v8, off, s33 offset:40
-; GISEL-NEXT:    scratch_load_b32 v9, off, s33 offset:44
-; GISEL-NEXT:    scratch_load_b32 v10, off, s33 offset:48
-; GISEL-NEXT:    scratch_load_b32 v11, off, s33 offset:52
-; GISEL-NEXT:    scratch_load_b32 v12, off, s33 offset:56
-; GISEL-NEXT:    scratch_load_b32 v13, off, s33 offset:60
-; GISEL-NEXT:    scratch_load_b32 v14, off, s33 offset:64
-; GISEL-NEXT:    scratch_load_b32 v15, off, s33 offset:68
-; GISEL-NEXT:    scratch_load_b32 v16, off, s33 offset:72
-; GISEL-NEXT:    scratch_load_b32 v17, off, s33 offset:76
-; GISEL-NEXT:    scratch_load_b32 v18, off, s33 offset:80
-; GISEL-NEXT:    scratch_load_b32 v19, off, s33 offset:84
-; GISEL-NEXT:    scratch_load_b32 v20, off, s33 offset:88
-; GISEL-NEXT:    scratch_load_b32 v21, off, s33 offset:92
-; GISEL-NEXT:    scratch_load_b32 v22, off, s33 offset:96
-; GISEL-NEXT:    scratch_load_b32 v23, off, s33 offset:100
-; GISEL-NEXT:    scratch_load_b32 v24, off, s33 offset:104
-; GISEL-NEXT:    scratch_load_b32 v25, off, s33 offset:108
-; GISEL-NEXT:    scratch_load_b32 v26, off, s33 offset:112
-; GISEL-NEXT:    scratch_load_b32 v27, off, s33 offset:116
-; GISEL-NEXT:    scratch_load_b32 v28, off, s33 offset:120
-; GISEL-NEXT:    scratch_load_b32 v29, off, s33 offset:124
-; GISEL-NEXT:    scratch_load_b32 v30, off, s33 offset:128
-; GISEL-NEXT:    scratch_load_b32 v31, off, s33 offset:132
+; GISEL-NEXT:    scratch_load_b32 v0, off, s32
+; GISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4
+; GISEL-NEXT:    scratch_load_b32 v2, off, s32 offset:8
+; GISEL-NEXT:    scratch_load_b32 v3, off, s32 offset:12
+; GISEL-NEXT:    scratch_load_b32 v4, off, s32 offset:16
+; GISEL-NEXT:    scratch_load_b32 v5, off, s32 offset:20
+; GISEL-NEXT:    scratch_load_b32 v6, off, s32 offset:24
+; GISEL-NEXT:    scratch_load_b32 v7, off, s32 offset:28
+; GISEL-NEXT:    scratch_load_b32 v8, off, s32 offset:32
+; GISEL-NEXT:    scratch_load_b32 v9, off, s32 offset:36
+; GISEL-NEXT:    scratch_load_b32 v10, off, s32 offset:40
+; GISEL-NEXT:    scratch_load_b32 v11, off, s32 offset:44
+; GISEL-NEXT:    scratch_load_b32 v12, off, s32 offset:48
+; GISEL-NEXT:    scratch_load_b32 v13, off, s32 offset:52
+; GISEL-NEXT:    scratch_load_b32 v14, off, s32 offset:56
+; GISEL-NEXT:    scratch_load_b32 v15, off, s32 offset:60
+; GISEL-NEXT:    scratch_load_b32 v16, off, s32 offset:64
+; GISEL-NEXT:    scratch_load_b32 v17, off, s32 offset:68
+; GISEL-NEXT:    scratch_load_b32 v18, off, s32 offset:72
+; GISEL-NEXT:    scratch_load_b32 v19, off, s32 offset:76
+; GISEL-NEXT:    scratch_load_b32 v20, off, s32 offset:80
+; GISEL-NEXT:    scratch_load_b32 v21, off, s32 offset:84
+; GISEL-NEXT:    scratch_load_b32 v22, off, s32 offset:88
+; GISEL-NEXT:    scratch_load_b32 v23, off, s32 offset:92
+; GISEL-NEXT:    scratch_load_b32 v24, off, s32 offset:96
+; GISEL-NEXT:    scratch_load_b32 v25, off, s32 offset:100
+; GISEL-NEXT:    scratch_load_b32 v26, off, s32 offset:104
+; GISEL-NEXT:    scratch_load_b32 v27, off, s32 offset:108
+; GISEL-NEXT:    scratch_load_b32 v28, off, s32 offset:112
+; GISEL-NEXT:    scratch_load_b32 v29, off, s32 offset:116
+; GISEL-NEXT:    scratch_load_b32 v30, off, s32 offset:120
+; GISEL-NEXT:    scratch_load_b32 v31, off, s32 offset:124
 ; GISEL-NEXT:    s_clause 0x1f
-; GISEL-NEXT:    scratch_load_b32 v32, off, s33 offset:136
-; GISEL-NEXT:    scratch_load_b32 v33, off, s33 offset:140
-; GISEL-NEXT:    scratch_load_b32 v34, off, s33 offset:144
-; GISEL-NEXT:    scratch_load_b32 v35, off, s33 offset:148
-; GISEL-NEXT:    scratch_load_b32 v36, off, s33 offset:152
-; GISEL-NEXT:    scratch_load_b32 v37, off, s33 offset:156
-; GISEL-NEXT:    scratch_load_b32 v38, off, s33 offset:160
-; GISEL-NEXT:    scratch_load_b32 v39, off, s33 offset:164
-; GISEL-NEXT:    scratch_load_b32 v48, off, s33 offset:168
-; GISEL-NEXT:    scratch_load_b32 v49, off, s33 offset:172
-; GISEL-NEXT:    scratch_load_b32 v50, off, s33 offset:176
-; GISEL-NEXT:    scratch_load_b32 v51, off, s33 offset:180
-; GISEL-NEXT:    scratch_load_b32 v52, off, s33 offset:184
-; GISEL-NEXT:    scratch_load_b32 v53, off, s33 offset:188
-; GISEL-NEXT:    scratch_load_b32 v54, off, s33 offset:192
-; GISEL-NEXT:    scratch_load_b32 v55, off, s33 offset:196
-; GISEL-NEXT:    scratch_load_b32 v64, off, s33 offset:200
-; GISEL-NEXT:    scratch_load_b32 v65, off, s33 offset:204
-; GISEL-NEXT:    scratch_load_b32 v66, off, s33 offset:208
-; GISEL-NEXT:    scratch_load_b32 v67, off, s33 offset:212
-; GISEL-NEXT:    scratch_load_b32 v68, off, s33 offset:216
-; GISEL-NEXT:    scratch_load_b32 v69, off, s33 offset:220
-; GISEL-NEXT:    scratch_load_b32 v70, off, s33 offset:224
-; GISEL-NEXT:    scratch_load_b32 v71, off, s33 offset:228
-; GISEL-NEXT:    scratch_load_b32 v80, off, s33 offset:232
-; GISEL-NEXT:    scratch_load_b32 v81, off, s33 offset:236
-; GISEL-NEXT:    scratch_load_b32 v82, off, s33 offset:240
-; GISEL-NEXT:    scratch_load_b32 v83, off, s33 offset:244
-; GISEL-NEXT:    scratch_load_b32 v84, off, s33 offset:248
-; GISEL-NEXT:    scratch_load_b32 v85, off, s33 offset:252
-; GISEL-NEXT:    scratch_load_b32 v86, off, s33 offset:256
-; GISEL-NEXT:    scratch_load_b32 v87, off, s33 offset:260
+; GISEL-NEXT:    scratch_load_b32 v32, off, s32 offset:128
+; GISEL-NEXT:    scratch_load_b32 v33, off, s32 offset:132
+; GISEL-NEXT:    scratch_load_b32 v34, off, s32 offset:136
+; GISEL-NEXT:    scratch_load_b32 v35, off, s32 offset:140
+; GISEL-NEXT:    scratch_load_b32 v36, off, s32 offset:144
+; GISEL-NEXT:    scratch_load_b32 v37, off, s32 offset:148
+; GISEL-NEXT:    scratch_load_b32 v38, off, s32 offset:152
+; GISEL-NEXT:    scratch_load_b32 v39, off, s32 offset:156
+; GISEL-NEXT:    scratch_load_b32 v48, off, s32 offset:160
+; GISEL-NEXT:    scratch_load_b32 v49, off, s32 offset:164
+; GISEL-NEXT:    scratch_load_b32 v50, off, s32 offset:168
+; GISEL-NEXT:    scratch_load_b32 v51, off, s32 offset:172
+; GISEL-NEXT:    scratch_load_b32 v52, off, s32 offset:176
+; GISEL-NEXT:    scratch_load_b32 v53, off, s32 offset:180
+; GISEL-NEXT:    scratch_load_b32 v54, off, s32 offset:184
+; GISEL-NEXT:    scratch_load_b32 v55, off, s32 offset:188
+; GISEL-NEXT:    scratch_load_b32 v64, off, s32 offset:192
+; GISEL-NEXT:    scratch_load_b32 v65, off, s32 offset:196
+; GISEL-NEXT:    scratch_load_b32 v66, off, s32 offset:200
+; GISEL-NEXT:    scratch_load_b32 v67, off, s32 offset:204
+; GISEL-NEXT:    scratch_load_b32 v68, off, s32 offset:208
+; GISEL-NEXT:    scratch_load_b32 v69, off, s32 offset:212
+; GISEL-NEXT:    scratch_load_b32 v70, off, s32 offset:216
+; GISEL-NEXT:    scratch_load_b32 v71, off, s32 offset:220
+; GISEL-NEXT:    scratch_load_b32 v80, off, s32 offset:224
+; GISEL-NEXT:    scratch_load_b32 v81, off, s32 offset:228
+; GISEL-NEXT:    scratch_load_b32 v82, off, s32 offset:232
+; GISEL-NEXT:    scratch_load_b32 v83, off, s32 offset:236
+; GISEL-NEXT:    scratch_load_b32 v84, off, s32 offset:240
+; GISEL-NEXT:    scratch_load_b32 v85, off, s32 offset:244
+; GISEL-NEXT:    scratch_load_b32 v86, off, s32 offset:248
+; GISEL-NEXT:    scratch_load_b32 v87, off, s32 offset:252
 ; GISEL-NEXT:    s_clause 0x1f
-; GISEL-NEXT:    scratch_load_b32 v96, off, s33 offset:264
-; GISEL-NEXT:    scratch_load_b32 v97, off, s33 offset:268
-; GISEL-NEXT:    scratch_load_b32 v98, off, s33 offset:272
-; GISEL-NEXT:    scratch_load_b32 v99, off, s33 offset:276
-; GISEL-NEXT:    scratch_load_b32 v100, off, s33 offset:280
-; GISEL-NEXT:    scratch_load_b32 v101, off, s33 offset:284
-; GISEL-NEXT:    scratch_load_b32 v102, off, s33 offset:288
-; GISEL-NEXT:    scratch_load_b32 v103, off, s33 offset:292
-; GISEL-NEXT:    scratch_load_b32 v112, off, s33 offset:296
-; GISEL-NEXT:    scratch_load_b32 v113, off, s33 offset:300
-; GISEL-NEXT:    scratch_load_b32 v114, off, s33 offset:304
-; GISEL-NEXT:    scratch_load_b32 v115, off, s33 offset:308
-; GISEL-NEXT:    scratch_load_b32 v116, off, s33 offset:312
-; GISEL-NEXT:    scratch_load_b32 v117, off, s33 offset:316
-; GISEL-NEXT:    scratch_load_b32 v118, off, s33 offset:320
-; GISEL-NEXT:    scratch_load_b32 v119, off, s33 offset:324
-; GISEL-NEXT:    scratch_load_b32 v128, off, s33 offset:328
-; GISEL-NEXT:    scratch_load_b32 v129, off, s33 offset:332
-; GISEL-NEXT:    scratch_load_b32 v130, off, s33 offset:336
-; GISEL-NEXT:    scratch_load_b32 v131, off, s33 offset:340
-; GISEL-NEXT:    scratch_load_b32 v132, off, s33 offset:344
-; GISEL-NEXT:    scratch_load_b32 v133, off, s33 offset:348
-; GISEL-NEXT:    scratch_load_b32 v134, off, s33 offset:352
-; GISEL-NEXT:    scratch_load_b32 v135, off, s33 offset:356
-; GISEL-NEXT:    scratch_load_b32 v144, off, s33 offset:360
-; GISEL-NEXT:    scratch_load_b32 v145, off, s33 offset:364
-; GISEL-NEXT:    scratch_load_b32 v146, off, s33 offset:368
-; GISEL-NEXT:    scratch_load_b32 v147, off, s33 offset:372
-; GISEL-NEXT:    scratch_load_b32 v148, off, s33 offset:376
-; GISEL-NEXT:    scratch_load_b32 v149, off, s33 offset:380
-; GISEL-NEXT:    scratch_load_b32 v150, off, s33 offset:384
-; GISEL-NEXT:    scratch_load_b32 v151, off, s33 offset:388
+; GISEL-NEXT:    scratch_load_b32 v96, off, s32 offset:256
+; GISEL-NEXT:    scratch_load_b32 v97, off, s32 offset:260
+; GISEL-NEXT:    scratch_load_b32 v98, off, s32 offset:264
+; GISEL-NEXT:    scratch_load_b32 v99, off, s32 offset:268
+; GISEL-NEXT:    scratch_load_b32 v100, off, s32 offset:272
+; GISEL-NEXT:    scratch_load_b32 v101, off, s32 offset:276
+; GISEL-NEXT:    scratch_load_b32 v102, off, s32 offset:280
+; GISEL-NEXT:    scratch_load_b32 v103, off, s32 offset:284
+; GISEL-NEXT:    scratch_load_b32 v112, off, s32 offset:288
+; GISEL-NEXT:    scratch_load_b32 v113, off, s32 offset:292
+; GISEL-NEXT:    scratch_load_b32 v114, off, s32 offset:296
+; GISEL-NEXT:    scratch_load_b32 v115, off, s32 offset:300
+; GISEL-NEXT:    scratch_load_b32 v116, off, s32 offset:304
+; GISEL-NEXT:    scratch_load_b32 v117, off, s32 offset:308
+; GISEL-NEXT:    scratch_load_b32 v118, off, s32 offset:312
+; GISEL-NEXT:    scratch_load_b32 v119, off, s32 offset:316
+; GISEL-NEXT:    scratch_load_b32 v128, off, s32 offset:320
+; GISEL-NEXT:    scratch_load_b32 v129, off, s32 offset:324
+; GISEL-NEXT:    scratch_load_b32 v130, off, s32 offset:328
+; GISEL-NEXT:    scratch_load_b32 v131, off, s32 offset:332
+; GISEL-NEXT:    scratch_load_b32 v132, off, s32 offset:336
+; GISEL-NEXT:    scratch_load_b32 v133, off, s32 offset:340
+; GISEL-NEXT:    scratch_load_b32 v134, off, s32 offset:344
+; GISEL-NEXT:    scratch_load_b32 v135, off, s32 offset:348
+; GISEL-NEXT:    scratch_load_b32 v144, off, s32 offset:352
+; GISEL-NEXT:    scratch_load_b32 v145, off, s32 offset:356
+; GISEL-NEXT:    scratch_load_b32 v146, off, s32 offset:360
+; GISEL-NEXT:    scratch_load_b32 v147, off, s32 offset:364
+; GISEL-NEXT:    scratch_load_b32 v148, off, s32 offset:368
+; GISEL-NEXT:    scratch_load_b32 v149, off, s32 offset:372
+; GISEL-NEXT:    scratch_load_b32 v150, off, s32 offset:376
+; GISEL-NEXT:    scratch_load_b32 v151, off, s32 offset:380
 ; GISEL-NEXT:    s_clause 0x1f
-; GISEL-NEXT:    scratch_load_b32 v160, off, s33 offset:392
-; GISEL-NEXT:    scratch_load_b32 v161, off, s33 offset:396
-; GISEL-NEXT:    scratch_load_b32 v162, off, s33 offset:400
-; GISEL-NEXT:    scratch_load_b32 v163, off, s33 offset:404
-; GISEL-NEXT:    scratch_load_b32 v164, off, s33 offset:408
-; GISEL-NEXT:    scratch_load_b32 v165, off, s33 offset:412
-; GISEL-NEXT:    scratch_load_b32 v166, off, s33 offset:416
-; GISEL-NEXT:    scratch_load_b32 v167, off, s33 offset:420
-; GISEL-NEXT:    scratch_load_b32 v176, off, s33 offset:424
-; GISEL-NEXT:    scratch_load_b32 v177, off, s33 offset:428
-; GISEL-NEXT:    scratch_load_b32 v178, off, s33 offset:432
-; GISEL-NEXT:    scratch_load_b32 v179, off, s33 offset:436
-; GISEL-NEXT:    scratch_load_b32 v180, off, s33 offset:440
-; GISEL-NEXT:    scratch_load_b32 v181, off, s33 offset:444
-; GISEL-NEXT:    scratch_load_b32 v182, off, s33 offset:448
-; GISEL-NEXT:    scratch_load_b32 v183, off, s33 offset:452
-; GISEL-NEXT:    scratch_load_b32 v192, off, s33 offset:456
-; GISEL-NEXT:    scratch_load_b32 v193, off, s33 offset:460
-; GISEL-NEXT:    scratch_load_b32 v194, off, s33 offset:464
-; GISEL-NEXT:    scratch_load_b32 v195, off, s33 offset:468
-; GISEL-NEXT:    scratch_load_b32 v196, off, s33 offset:472
-; GISEL-NEXT:    scratch_load_b32 v197, off, s33 offset:476
-; GISEL-NEXT:    scratch_load_b32 v198, off, s33 offset:480
-; GISEL-NEXT:    scratch_load_b32 v199, off, s33 offset:484
-; GISEL-NEXT:    scratch_load_b32 v208, off, s33 offset:488
-; GISEL-NEXT:    scratch_load_b32 v209, off, s33 offset:492
-; GISEL-NEXT:    scratch_load_b32 v210, off, s33 offset:496
-; GISEL-NEXT:    scratch_load_b32 v211, off, s33 offset:500
-; GISEL-NEXT:    scratch_load_b32 v212, off, s33 offset:504
-; GISEL-NEXT:    scratch_load_b32 v213, off, s33 offset:508
-; GISEL-NEXT:    scratch_load_b32 v214, off, s33 offset:512
-; GISEL-NEXT:    scratch_load_b32 v215, off, s33 offset:516
+; GISEL-NEXT:    scratch_load_b32 v160, off, s32 offset:384
+; GISEL-NEXT:    scratch_load_b32 v161, off, s32 offset:388
+; GISEL-NEXT:    scratch_load_b32 v162, off, s32 offset:392
+; GISEL-NEXT:    scratch_load_b32 v163, off, s32 offset:396
+; GISEL-NEXT:    scratch_load_b32 v164, off, s32 offset:400
+; GISEL-NEXT:    scratch_load_b32 v165, off, s32 offset:404
+; GISEL-NEXT:    scratch_load_b32 v166, off, s32 offset:408
+; GISEL-NEXT:    scratch_load_b32 v167, off, s32 offset:412
+; GISEL-NEXT:    scratch_load_b32 v176, off, s32 offset:416
+; GISEL-NEXT:    scratch_load_b32 v177, off, s32 offset:420
+; GISEL-NEXT:    scratch_load_b32 v178, off, s32 offset:424
+; GISEL-NEXT:    scratch_load_b32 v179, off, s32 offset:428
+; GISEL-NEXT:    scratch_load_b32 v180, off, s32 offset:432
+; GISEL-NEXT:    scratch_load_b32 v181, off, s32 offset:436
+; GISEL-NEXT:    scratch_load_b32 v182, off, s32 offset:440
+; GISEL-NEXT:    scratch_load_b32 v183, off, s32 offset:444
+; GISEL-NEXT:    scratch_load_b32 v192, off, s32 offset:448
+; GISEL-NEXT:    scratch_load_b32 v193, off, s32 offset:452
+; GISEL-NEXT:    scratch_load_b32 v194, off, s32 offset:456
+; GISEL-NEXT:    scratch_load_b32 v195, off, s32 offset:460
+; GISEL-NEXT:    scratch_load_b32 v196, off, s32 offset:464
+; GISEL-NEXT:    scratch_load_b32 v197, off, s32 offset:468
+; GISEL-NEXT:    scratch_load_b32 v198, off, s32 offset:472
+; GISEL-NEXT:    scratch_load_b32 v199, off, s32 offset:476
+; GISEL-NEXT:    scratch_load_b32 v208, off, s32 offset:480
+; GISEL-NEXT:    scratch_load_b32 v209, off, s32 offset:484
+; GISEL-NEXT:    scratch_load_b32 v210, off, s32 offset:488
+; GISEL-NEXT:    scratch_load_b32 v211, off, s32 offset:492
+; GISEL-NEXT:    scratch_load_b32 v212, off, s32 offset:496
+; GISEL-NEXT:    scratch_load_b32 v213, off, s32 offset:500
+; GISEL-NEXT:    scratch_load_b32 v214, off, s32 offset:504
+; GISEL-NEXT:    scratch_load_b32 v215, off, s32 offset:508
 ; GISEL-NEXT:    s_clause 0xf
-; GISEL-NEXT:    scratch_load_b32 v224, off, s33 offset:520
-; GISEL-NEXT:    scratch_load_b32 v225, off, s33 offset:524
-; GISEL-NEXT:    scratch_load_b32 v226, off, s33 offset:528
-; GISEL-NEXT:    scratch_load_b32 v227, off, s33 offset:532
-; GISEL-NEXT:    scratch_load_b32 v228, off, s33 offset:536
-; GISEL-NEXT:    scratch_load_b32 v229, off, s33 offset:540
-; GISEL-NEXT:    scratch_load_b32 v230, off, s33 offset:544
-; GISEL-NEXT:    scratch_load_b32 v231, off, s33 offset:548
-; GISEL-NEXT:    scratch_load_b32 v240, off, s33 offset:552
-; GISEL-NEXT:    scratch_load_b32 v241, off, s33 offset:556
-; GISEL-NEXT:    scratch_load_b32 v242, off, s33 offset:560
-; GISEL-NEXT:    scratch_load_b32 v243, off, s33 offset:564
-; GISEL-NEXT:    scratch_load_b32 v244, off, s33 offset:568
-; GISEL-NEXT:    scratch_load_b32 v245, off, s33 offset:572
-; GISEL-NEXT:    scratch_load_b32 v246, off, s33 offset:576
-; GISEL-NEXT:    scratch_load_b32 v247, off, s33 offset:580
-; GISEL-NEXT:    s_mov_b32 exec_lo, s34
-; GISEL-NEXT:    s_mov_b32 s33, s35
-; GISEL-NEXT:    s_wait_loadcnt 0x0
-; GISEL-NEXT:    s_wait_alu 0xfffe
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-NEXT:    scratch_load_b32 v224, off, s32 offset:512
+; GISEL-NEXT:    scratch_load_b32 v225, off, s32 offset:516
+; GISEL-NEXT:    scratch_load_b32 v226, off, s32 offset:520
+; GISEL-NEXT:    scratch_load_b32 v227, off, s32 offset:524
+; GISEL-NEXT:    scratch_load_b32 v228, off, s32 offset:528
+; GISEL-NEXT:    scratch_load_b32 v229, off, s32 offset:532
+; GISEL-NEXT:    scratch_load_b32 v230, off, s32 offset:536
+; GISEL-NEXT:    scratch_load_b32 v231, off, s32 offset:540
+; GISEL-NEXT:    scratch_load_b32 v240, off, s32 offset:544
+; GISEL-NEXT:    scratch_load_b32 v241, off, s32 offset:548
+; GISEL-NEXT:    scratch_load_b32 v242, off, s32 offset:552
+; GISEL-NEXT:    scratch_load_b32 v243, off, s32 offset:556
+; GISEL-NEXT:    scratch_load_b32 v244, off, s32 offset:560
+; GISEL-NEXT:    scratch_load_b32 v245, off, s32 offset:564
+; GISEL-NEXT:    scratch_load_b32 v246, off, s32 offset:568
+; GISEL-NEXT:    scratch_load_b32 v247, off, s32 offset:572
+; GISEL-NEXT:    s_mov_b32 exec_lo, s0
+; GISEL-NEXT:    s_setpc_b64 s[36:37]
 ;
 ; DAGISEL64-LABEL: tail_call_gfx_from_whole_wave:
 ; DAGISEL64:       ; %bb.0:
@@ -3260,414 +3054,314 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ
 ; DAGISEL64-NEXT:    s_wait_samplecnt 0x0
 ; DAGISEL64-NEXT:    s_wait_bvhcnt 0x0
 ; DAGISEL64-NEXT:    s_wait_kmcnt 0x0
-; DAGISEL64-NEXT:    s_mov_b32 s36, s33
-; DAGISEL64-NEXT:    s_mov_b32 s33, s32
-; DAGISEL64-NEXT:    s_xor_saveexec_b64 s[34:35], -1
+; DAGISEL64-NEXT:    s_xor_saveexec_b64 s[0:1], -1
 ; DAGISEL64-NEXT:    s_clause 0x1f
-; DAGISEL64-NEXT:    scratch_store_b32 off, v0, s33 offset:4
-; DAGISEL64-NEXT:    scratch_store_b32 off, v1, s33 offset:8
-; DAGISEL64-NEXT:    scratch_store_b32 off, v2, s33 offset:12
-; DAGISEL64-NEXT:    scratch_store_b32 off, v3, s33 offset:16
-; DAGISEL64-NEXT:    scratch_store_b32 off, v4, s33 offset:20
-; DAGISEL64-NEXT:    scratch_store_b32 off, v5, s33 offset:24
-; DAGISEL64-NEXT:    scratch_store_b32 off, v6, s33 offset:28
-; DAGISEL64-NEXT:    scratch_store_b32 off, v7, s33 offset:32
-; DAGISEL64-NEXT:    scratch_store_b32 off, v8, s33 offset:36
-; DAGISEL64-NEXT:    scratch_store_b32 off, v9, s33 offset:40
-; DAGISEL64-NEXT:    scratch_store_b32 off, v10, s33 offset:44
-; DAGISEL64-NEXT:    scratch_store_b32 off, v11, s33 offset:48
-; DAGISEL64-NEXT:    scratch_store_b32 off, v12, s33 offset:52
-; DAGISEL64-NEXT:    scratch_store_b32 off, v13, s33 offset:56
-; DAGISEL64-NEXT:    scratch_store_b32 off, v14, s33 offset:60
-; DAGISEL64-NEXT:    scratch_store_b32 off, v15, s33 offset:64
-; DAGISEL64-NEXT:    scratch_store_b32 off, v16, s33 offset:68
-; DAGISEL64-NEXT:    scratch_store_b32 off, v17, s33 offset:72
-; DAGISEL64-NEXT:    scratch_store_b32 off, v18, s33 offset:76
-; DAGISEL64-NEXT:    scratch_store_b32 off, v19, s33 offset:80
-; DAGISEL64-NEXT:    scratch_store_b32 off, v20, s33 offset:84
-; DAGISEL64-NEXT:    scratch_store_b32 off, v21, s33 offset:88
-; DAGISEL64-NEXT:    scratch_store_b32 off, v22, s33 offset:92
-; DAGISEL64-NEXT:    scratch_store_b32 off, v23, s33 offset:96
-; DAGISEL64-NEXT:    scratch_store_b32 off, v24, s33 offset:100
-; DAGISEL64-NEXT:    scratch_store_b32 off, v25, s33 offset:104
-; DAGISEL64-NEXT:    scratch_store_b32 off, v26, s33 offset:108
-; DAGISEL64-NEXT:    scratch_store_b32 off, v27, s33 offset:112
-; DAGISEL64-NEXT:    scratch_store_b32 off, v28, s33 offset:116
-; DAGISEL64-NEXT:    scratch_store_b32 off, v29, s33 offset:120
-; DAGISEL64-NEXT:    scratch_store_b32 off, v30, s33 offset:124
-; DAGISEL64-NEXT:    scratch_store_b32 off, v31, s33 offset:128
+; DAGISEL64-NEXT:    scratch_store_b32 off, v0, s32
+; DAGISEL64-NEXT:    scratch_store_b32 off, v1, s32 offset:4
+; DAGISEL64-NEXT:    scratch_store_b32 off, v2, s32 offset:8
+; DAGISEL64-NEXT:    scratch_store_b32 off, v3, s32 offset:12
+; DAGISEL64-NEXT:    scratch_store_b32 off, v4, s32 offset:16
+; DAGISEL64-NEXT:    scratch_store_b32 off, v5, s32 offset:20
+; DAGISEL64-NEXT:    scratch_store_b32 off, v6, s32 offset:24
+; DAGISEL64-NEXT:    scratch_store_b32 off, v7, s32 offset:28
+; DAGISEL64-NEXT:    scratch_store_b32 off, v8, s32 offset:32
+; DAGISEL64-NEXT:    scratch_store_b32 off, v9, s32 offset:36
+; DAGISEL64-NEXT:    scratch_store_b32 off, v10, s32 offset:40
+; DAGISEL64-NEXT:    scratch_store_b32 off, v11, s32 offset:44
+; DAGISEL64-NEXT:    scratch_store_b32 off, v12, s32 offset:48
+; DAGISEL64-NEXT:    scratch_store_b32 off, v13, s32 offset:52
+; DAGISEL64-NEXT:    scratch_store_b32 off, v14, s32 offset:56
+; DAGISEL64-NEXT:    scratch_store_b32 off, v15, s32 offset:60
+; DAGISEL64-NEXT:    scratch_store_b32 off, v16, s32 offset:64
+; DAGISEL64-NEXT:    scratch_store_b32 off, v17, s32 offset:68
+; DAGISEL64-NEXT:    scratch_store_b32 off, v18, s32 offset:72
+; DAGISEL64-NEXT:    scratch_store_b32 off, v19, s32 offset:76
+; DAGISEL64-NEXT:    scratch_store_b32 off, v20, s32 offset:80
+; DAGISEL64-NEXT:    scratch_store_b32 off, v21, s32 offset:84
+; DAGISEL64-NEXT:    scratch_store_b32 off, v22, s32 offset:88
+; DAGISEL64-NEXT:    scratch_store_b32 off, v23, s32 offset:92
+; DAGISEL64-NEXT:    scratch_store_b32 off, v24, s32 offset:96
+; DAGISEL64-NEXT:    scratch_store_b32 off, v25, s32 offset:100
+; DAGISEL64-NEXT:    scratch_store_b32 off, v26, s32 offset:104
+; DAGISEL64-NEXT:    scratch_store_b32 off, v27, s32 offset:108
+; DAGISEL64-NEXT:    scratch_store_b32 off, v28, s32 offset:112
+; DAGISEL64-NEXT:    scratch_store_b32 off, v29, s32 offset:116
+; DAGISEL64-NEXT:    scratch_store_b32 off, v30, s32 offset:120
+; DAGISEL64-NEXT:    scratch_store_b32 off, v31, s32 offset:124
 ; DAGISEL64-NEXT:    s_clause 0x1f
-; DAGISEL64-NEXT:    scratch_store_b32 off, v32, s33 offset:132
-; DAGISEL64-NEXT:    scratch_store_b32 off, v33, s33 offset:136
-; DAGISEL64-NEXT:    scratch_store_b32 off, v34, s33 offset:140
-; DAGISEL64-NEXT:    scratch_store_b32 off, v35, s33 offset:144
-; DAGISEL64-NEXT:    scratch_store_b32 off, v36, s33 offset:148
-; DAGISEL64-NEXT:    scratch_store_b32 off, v37, s33 offset:152
-; DAGISEL64-NEXT:    scratch_store_b32 off, v38, s33 offset:156
-; DAGISEL64-NEXT:    scratch_store_b32 off, v39, s33 offset:160
-; DAGISEL64-NEXT:    scratch_store_b32 off, v48, s33 offset:164
-; DAGISEL64-NEXT:    scratch_store_b32 off, v49, s33 offset:168
-; DAGISEL64-NEXT:    scratch_store_b32 off, v50, s33 offset:172
-; DAGISEL64-NEXT:    scratch_store_b32 off, v51, s33 offset:176
-; DAGISEL64-NEXT:    scratch_store_b32 off, v52, s33 offset:180
-; DAGISEL64-NEXT:    scratch_store_b32 off, v53, s33 offset:184
-; DAGISEL64-NEXT:    scratch_store_b32 off, v54, s33 offset:188
-; DAGISEL64-NEXT:    scratch_store_b32 off, v55, s33 offset:192
-; DAGISEL64-NEXT:    scratch_store_b32 off, v64, s33 offset:196
-; DAGISEL64-NEXT:    scratch_store_b32 off, v65, s33 offset:200
-; DAGISEL64-NEXT:    scratch_store_b32 off, v66, s33 offset:204
-; DAGISEL64-NEXT:    scratch_store_b32 off, v67, s33 offset:208
-; DAGISEL64-NEXT:    scratch_store_b32 off, v68, s33 offset:212
-; DAGISEL64-NEXT:    scratch_store_b32 off, v69, s33 offset:216
-; DAGISEL64-NEXT:    scratch_store_b32 off, v70, s33 offset:220
-; DAGISEL64-NEXT:    scratch_store_b32 off, v71, s33 offset:224
-; DAGISEL64-NEXT:    scratch_store_b32 off, v80, s33 offset:228
-; DAGISEL64-NEXT:    scratch_store_b32 off, v81, s33 offset:232
-; DAGISEL64-NEXT:    scratch_store_b32 off, v82, s33 offset:236
-; DAGISEL64-NEXT:    scratch_store_b32 off, v83, s33 offset:240
-; DAGISEL64-NEXT:    scratch_store_b32 off, v84, s33 offset:244
-; DAGISEL64-NEXT:    scratch_store_b32 off, v85, s33 offset:248
-; DAGISEL64-NEXT:    scratch_store_b32 off, v86, s33 offset:252
-; DAGISEL64-NEXT:    scratch_store_b32 off, v87, s33 offset:256
+; DAGISEL64-NEXT:    scratch_store_b32 off, v32, s32 offset:128
+; DAGISEL64-NEXT:    scratch_store_b32 off, v33, s32 offset:132
+; DAGISEL64-NEXT:    scratch_store_b32 off, v34, s32 offset:136
+; DAGISEL64-NEXT:    scratch_store_b32 off, v35, s32 offset:140
+; DAGISEL64-NEXT:    scratch_store_b32 off, v36, s32 offset:144
+; DAGISEL64-NEXT:    scratch_store_b32 off, v37, s32 offset:148
+; DAGISEL64-NEXT:    scratch_store_b32 off, v38, s32 offset:152
+; DAGISEL64-NEXT:    scratch_store_b32 off, v39, s32 offset:156
+; DAGISEL64-NEXT:    scratch_store_b32 off, v48, s32 offset:160
+; DAGISEL64-NEXT:    scratch_store_b32 off, v49, s32 offset:164
+; DAGISEL64-NEXT:    scratch_store_b32 off, v50, s32 offset:168
+; DAGISEL64-NEXT:    scratch_store_b32 off, v51, s32 offset:172
+; DAGISEL64-NEXT:    scratch_store_b32 off, v52, s32 offset:176
+; DAGISEL64-NEXT:    scratch_store_b32 off, v53, s32 offset:180
+; DAGISEL64-NEXT:    scratch_store_b32 off, v54, s32 offset:184
+; DAGISEL64-NEXT:    scratch_store_b32 off, v55, s32 offset:188
+; DAGISEL64-NEXT:    scratch_store_b32 off, v64, s32 offset:192
+; DAGISEL64-NEXT:    scratch_store_b32 off, v65, s32 offset:196
+; DAGISEL64-NEXT:    scratch_store_b32 off, v66, s32 offset:200
+; DAGISEL64-NEXT:    scratch_store_b32 off, v67, s32 offset:204
+; DAGISEL64-NEXT:    scratch_store_b32 off, v68, s32 offset:208
+; DAGISEL64-NEXT:    scratch_store_b32 off, v69, s32 offset:212
+; DAGISEL64-NEXT:    scratch_store_b32 off, v70, s32 offset:216
+; DAGISEL64-NEXT:    scratch_store_b32 off, v71, s32 offset:220
+; DAGISEL64-NEXT:    scratch_store_b32 off, v80, s32 offset:224
+; DAGISEL64-NEXT:    scratch_store_b32 off, v81, s32 offset:228
+; DAGISEL64-NEXT:    scratch_store_b32 off, v82, s32 offset:232
+; DAGISEL64-NEXT:    scratch_store_b32 off, v83, s32 offset:236
+; DAGISEL64-NEXT:    scratch_store_b32 off, v84, s32 offset:240
+; DAGISEL64-NEXT:    scratch_store_b32 off, v85, s32 offset:244
+; DAGISEL64-NEXT:    scratch_store_b32 off, v86, s32 offset:248
+; DAGISEL64-NEXT:    scratch_store_b32 off, v87, s32 offset:252
 ; DAGISEL64-NEXT:    s_clause 0x1f
-; DAGISEL64-NEXT:    scratch_store_b32 off, v96, s33 offset:260
-; DAGISEL64-NEXT:    scratch_store_b32 off, v97, s33 offset:264
-; DAGISEL64-NEXT:    scratch_store_b32 off, v98, s33 offset:268
-; DAGISEL64-NEXT:    scratch_store_b32 off, v99, s33 offset:272
-; DAGISEL64-NEXT:    scratch_store_b32 off, v100, s33 offset:276
-; DAGISEL64-NEXT:    scratch_store_b32 off, v101, s33 offset:280
-; DAGISEL64-NEXT:    scratch_store_b32 off, v102, s33 offset:284
-; DAGISEL64-NEXT:    scratch_store_b32 off, v103, s33 offset:288
-; DAGISEL64-NEXT:    scratch_store_b32 off, v112, s33 offset:292
-; DAGISEL64-NEXT:    scratch_store_b32 off, v113, s33 offset:296
-; DAGISEL64-NEXT:    scratch_store_b32 off, v114, s33 offset:300
-; DAGISEL64-NEXT:    scratch_store_b32 off, v115, s33 offset:304
-; DAGISEL64-NEXT:    scratch_store_b32 off, v116, s33 offset:308
-; DAGISEL64-NEXT:    scratch_store_b32 off, v117, s33 offset:312
-; DAGISEL64-NEXT:    scratch_store_b32 off, v118, s33 offset:316
-; DAGISEL64-NEXT:    scratch_store_b32 off, v119, s33 offset:320
-; DAGISEL64-NEXT:    scratch_store_b32 off, v128, s33 offset:324
-; DAGISEL64-NEXT:    scratch_store_b32 off, v129, s33 offset:328
-; DAGISEL64-NEXT:    scratch_store_b32 off, v130, s33 offset:332
-; DAGISEL64-NEXT:    scratch_store_b32 off, v131, s33 offset:336
-; DAGISEL64-NEXT:    scratch_store_b32 off, v132, s33 offset:340
-; DAGISEL64-NEXT:    scratch_store_b32 off, v133, s33 offset:344
-; DAGISEL64-NEXT:    scratch_store_b32 off, v134, s33 offset:348
-; DAGISEL64-NEXT:    scratch_store_b32 off, v135, s33 offset:352
-; DAGISEL64-NEXT:    scratch_store_b32 off, v144, s33 offset:356
-; DAGISEL64-NEXT:    scratch_store_b32 off, v145, s33 offset:360
-; DAGISEL64-NEXT:    scratch_store_b32 off, v146, s33 offset:364
-; DAGISEL64-NEXT:    scratch_store_b32 off, v147, s33 offset:368
-; DAGISEL64-NEXT:    scratch_store_b32 off, v148, s33 offset:372
-; DAGISEL64-NEXT:    scratch_store_b32 off, v149, s33 offset:376
-; DAGISEL64-NEXT:    scratch_store_b32 off, v150, s33 offset:380
-; DAGISEL64-NEXT:    scratch_store_b32 off, v151, s33 offset:384
+; DAGISEL64-NEXT:    scratch_store_b32 off, v96, s32 offset:256
+; DAGISEL64-NEXT:    scratch_store_b32 off, v97, s32 offset:260
+; DAGISEL64-NEXT:    scratch_store_b32 off, v98, s32 offset:264
+; DAGISEL64-NEXT:    scratch_store_b32 off, v99, s32 offset:268
+; DAGISEL64-NEXT:    scratch_store_b32 off, v100, s32 offset:272
+; DAGISEL64-NEXT:    scratch_store_b32 off, v101, s32 offset:276
+; DAGISEL64-NEXT:    scratch_store_b32 off, v102, s32 offset:280
+; DAGISEL64-NEXT:    scratch_store_b32 off, v103, s32 offset:284
+; DAGISEL64-NEXT:    scratch_store_b32 off, v112, s32 offset:288
+; DAGISEL64-NEXT:    scratch_store_b32 off, v113, s32 offset:292
+; DAGISEL64-NEXT:    scratch_store_b32 off, v114, s32 offset:296
+; DAGISEL64-NEXT:    scratch_store_b32 off, v115, s32 offset:300
+; DAGISEL64-NEXT:    scratch_store_b32 off, v116, s32 offset:304
+; DAGISEL64-NEXT:    scratch_store_b32 off, v117, s32 offset:308
+; DAGISEL64-NEXT:    scratch_store_b32 off, v118, s32 offset:312
+; DAGISEL64-NEXT:    scratch_store_b32 off, v119, s32 offset:316
+; DAGISEL64-NEXT:    scratch_store_b32 off, v128, s32 offset:320
+; DAGISEL64-NEXT:    scratch_store_b32 off, v129, s32 offset:324
+; DAGISEL64-NEXT:    scratch_store_b32 off, v130, s32 offset:328
+; DAGISEL64-NEXT:    scratch_store_b32 off, v131, s32 offset:332
+; DAGISEL64-NEXT:    scratch_store_b32 off, v132, s32 offset:336
+; DAGISEL64-NEXT:    scratch_store_b32 off, v133, s32 offset:340
+; DAGISEL64-NEXT:    scratch_store_b32 off, v134, s32 offset:344
+; DAGISEL64-NEXT:    scratch_store_b32 off, v135, s32 offset:348
+; DAGISEL64-NEXT:    scratch_store_b32 off, v144, s32 offset:352
+; DAGISEL64-NEXT:    scratch_store_b32 off, v145, s32 offset:356
+; DAGISEL64-NEXT:    scratch_store_b32 off, v146, s32 offset:360
+; DAGISEL64-NEXT:    scratch_store_b32 off, v147, s32 offset:364
+; DAGISEL64-NEXT:    scratch_store_b32 off, v148, s32 offset:368
+; DAGISEL64-NEXT:    scratch_store_b32 off, v149, s32 offset:372
+; DAGISEL64-NEXT:    scratch_store_b32 off, v150, s32 offset:376
+; DAGISEL64-NEXT:    scratch_store_b32 off, v151, s32 offset:380
 ; DAGISEL64-NEXT:    s_clause 0x1f
-; DAGISEL64-NEXT:    scratch_store_b32 off, v160, s33 offset:388
-; DAGISEL64-NEXT:    scratch_store_b32 off, v161, s33 offset:392
-; DAGISEL64-NEXT:    scratch_store_b32 off, v162, s33 offset:396
-; DAGISEL64-NEXT:    scratch_store_b32 off, v163, s33 offset:400
-; DAGISEL64-NEXT:    scratch_store_b32 off, v164, s33 offset:404
-; DAGISEL64-NEXT:    scratch_store_b32 off, v165, s33 offset:408
-; DAGISEL64-NEXT:    scratch_store_b32 off, v166, s33 offset:412
-; DAGISEL64-NEXT:    scratch_store_b32 off, v167, s33 offset:416
-; DAGISEL64-NEXT:    scratch_store_b32 off, v176, s33 offset:420
-; DAGISEL64-NEXT:    scratch_store_b32 off, v177, s33 offset:424
-; DAGISEL64-NEXT:    scratch_store_b32 off, v178, s33 offset:428
-; DAGISEL64-NEXT:    scratch_store_b32 off, v179, s33 offset:432
-; DAGISEL64-NEXT:    scratch_store_b32 off, v180, s33 offset:436
-; DAGISEL64-NEXT:    scratch_store_b32 off, v181, s33 offset:440
-; DAGISEL64-NEXT:    scratch_store_b32 off, v182, s33 offset:444
-; DAGISEL64-NEXT:    scratch_store_b32 off, v183, s33 offset:448
-; DAGISEL64-NEXT:    scratch_store_b32 off, v192, s33 offset:452
-; DAGISEL64-NEXT:    scratch_store_b32 off, v193, s33 offset:456
-; DAGISEL64-NEXT:    scratch_store_b32 off, v194, s33 offset:460
-; DAGISEL64-NEXT:    scratch_store_b32 off, v195, s33 offset:464
-; DAGISEL64-NEXT:    scratch_store_b32 off, v196, s33 offset:468
-; DAGISEL64-NEXT:    scratch_store_b32 off, v197, s33 offset:472
-; DAGISEL64-NEXT:    scratch_store_b32 off, v198, s33 offset:476
-; DAGISEL64-NEXT:    scratch_store_b32 off, v199, s33 offset:480
-; DAGISEL64-NEXT:    scratch_store_b32 off, v208, s33 offset:484
-; DAGISEL64-NEXT:    scratch_store_b32 off, v209, s33 offset:488
-; DAGISEL64-NEXT:    scratch_store_b32 off, v210, s33 offset:492
-; DAGISEL64-NEXT:    scratch_store_b32 off, v211, s33 offset:496
-; DAGISEL64-NEXT:    scratch_store_b32 off, v212, s33 offset:500
-; DAGISEL64-NEXT:    scratch_store_b32 off, v213, s33 offset:504
-; DAGISEL64-NEXT:    scratch_store_b32 off, v214, s33 offset:508
-; DAGISEL64-NEXT:    scratch_store_b32 off, v215, s33 offset:512
+; DAGISEL64-NEXT:    scratch_store_b32 off, v160, s32 offset:384
+; DAGISEL64-NEXT:    scratch_store_b32 off, v161, s32 offset:388
+; DAGISEL64-NEXT:    scratch_store_b32 off, v162, s32 offset:392
+; DAGISEL64-NEXT:    scratch_store_b32 off, v163, s32 offset:396
+; DAGISEL64-NEXT:    scratch_store_b32 off, v164, s32 offset:400
+; DAGISEL64-NEXT:    scratch_store_b32 off, v165, s32 offset:404
+; DAGISEL64-NEXT:    scratch_store_b32 off, v166, s32 offset:408
+; DAGISEL64-NEXT:    scratch_store_b32 off, v167, s32 offset:412
+; DAGISEL64-NEXT:    scratch_store_b32 off, v176, s32 offset:416
+; DAGISEL64-NEXT:    scratch_store_b32 off, v177, s32 offset:420
+; DAGISEL64-NEXT:    scratch_store_b32 off, v178, s32 offset:424
+; DAGISEL64-NEXT:    scratch_store_b32 off, v179, s32 offset:428
+; DAGISEL64-NEXT:    scratch_store_b32 off, v180, s32 offset:432
+; DAGISEL64-NEXT:    scratch_store_b32 off, v181, s32 offset:436
+; DAGISEL64-NEXT:    scratch_store_b32 off, v182, s32 offset:440
+; DAGISEL64-NEXT:    scratch_store_b32 off, v183, s32 offset:444
+; DAGISEL64-NEXT:    scratch_store_b32 off, v192, s32 offset:448
+; DAGISEL64-NEXT:    scratch_store_b32 off, v193, s32 offset:452
+; DAGISEL64-NEXT:    scratch_store_b32 off, v194, s32 offset:456
+; DAGISEL64-NEXT:    scratch_store_b32 off, v195, s32 offset:460
+; DAGISEL64-NEXT:    scratch_store_b32 off, v196, s32 offset:464
+; DAGISEL64-NEXT:    scratch_store_b32 off, v197, s32 offset:468
+; DAGISEL64-NEXT:    scratch_store_b32 off, v198, s32 offset:472
+; DAGISEL64-NEXT:    scratch_store_b32 off, v199, s32 offset:476
+; DAGISEL64-NEXT:    scratch_store_b32 off, v208, s32 offset:480
+; DAGISEL64-NEXT:    scratch_store_b32 off, v209, s32 offset:484
+; DAGISEL64-NEXT:    scratch_store_b32 off, v210, s32 offset:488
+; DAGISEL64-NEXT:    scratch_store_b32 off, v211, s32 offset:492
+; DAGISEL64-NEXT:    scratch_store_b32 off, v212, s32 offset:496
+; DAGISEL64-NEXT:    scratch_store_b32 off, v213, s32 offset:500
+; DAGISEL64-NEXT:    scratch_store_b32 off, v214, s32 offset:504
+; DAGISEL64-NEXT:    scratch_store_b32 off, v215, s32 offset:508
 ; DAGISEL64-NEXT:    s_clause 0xf
-; DAGISEL64-NEXT:    scratch_store_b32 off, v224, s33 offset:516
-; DAGISEL64-NEXT:    scratch_store_b32 off, v225, s33 offset:520
-; DAGISEL64-NEXT:    scratch_store_b32 off, v226, s33 offset:524
-; DAGISEL64-NEXT:    scratch_store_b32 off, v227, s33 offset:528
-; DAGISEL64-NEXT:    scratch_store_b32 off, v228, s33 offset:532
-; DAGISEL64-NEXT:    scratch_store_b32 off, v229, s33 offset:536
-; DAGISEL64-NEXT:    scratch_store_b32 off, v230, s33 offset:540
-; DAGISEL64-NEXT:    scratch_store_b32 off, v231, s33 offset:544
-; DAGISEL64-NEXT:    scratch_store_b32 off, v240, s33 offset:548
-; DAGISEL64-NEXT:    scratch_store_b32 off, v241, s33 offset:552
-; DAGISEL64-NEXT:    scratch_store_b32 off, v242, s33 offset:556
-; DAGISEL64-NEXT:    scratch_store_b32 off, v243, s33 offset:560
-; DAGISEL64-NEXT:    scratch_store_b32 off, v244, s33 offset:564
-; DAGISEL64-NEXT:    scratch_store_b32 off, v245, s33 offset:568
-; DAGISEL64-NEXT:    scratch_store_b32 off, v246, s33 offset:572
-; DAGISEL64-NEXT:    scratch_store_b32 off, v247, s33 offset:576
+; DAGISEL64-NEXT:    scratch_store_b32 off, v224, s32 offset:512
+; DAGISEL64-NEXT:    scratch_store_b32 off, v225, s32 offset:516
+; DAGISEL64-NEXT:    scratch_store_b32 off, v226, s32 offset:520
+; DAGISEL64-NEXT:    scratch_store_b32 off, v227, s32 offset:524
+; DAGISEL64-NEXT:    scratch_store_b32 off, v228, s32 offset:528
+; DAGISEL64-NEXT:    scratch_store_b32 off, v229, s32 offset:532
+; DAGISEL64-NEXT:    scratch_store_b32 off, v230, s32 offset:536
+; DAGISEL64-NEXT:    scratch_store_b32 off, v231, s32 offset:540
+; DAGISEL64-NEXT:    scratch_store_b32 off, v240, s32 offset:544
+; DAGISEL64-NEXT:    scratch_store_b32 off, v241, s32 offset:548
+; DAGISEL64-NEXT:    scratch_store_b32 off, v242, s32 offset:552
+; DAGISEL64-NEXT:    scratch_store_b32 off, v243, s32 offset:556
+; DAGISEL64-NEXT:    scratch_store_b32 off, v244, s32 offset:560
+; DAGISEL64-NEXT:    scratch_store_b32 off, v245, s32 offset:564
+; DAGISEL64-NEXT:    scratch_store_b32 off, v246, s32 offset:568
+; DAGISEL64-NEXT:    scratch_store_b32 off, v247, s32 offset:572
 ; DAGISEL64-NEXT:    s_mov_b64 exec, -1
-; DAGISEL64-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s4, 0
 ; DAGISEL64-NEXT:    v_mov_b32_e32 v2, v0
+; DAGISEL64-NEXT:    s_mov_b32 s37, gfx_callee at abs32@hi
+; DAGISEL64-NEXT:    s_mov_b32 s36, gfx_callee at abs32@lo
 ; DAGISEL64-NEXT:    v_swap_b32 v0, v1
-; DAGISEL64-NEXT:    s_mov_b32 s1, gfx_callee at abs32@hi
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s5, 1
-; DAGISEL64-NEXT:    s_mov_b32 s0, gfx_callee at abs32@lo
-; DAGISEL64-NEXT:    s_addk_co_i32 s32, 0x250
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s6, 2
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s7, 3
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s8, 4
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s9, 5
-; DAGISEL64-NEXT:    s_mov_b64 s[8:9], 0
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s10, 6
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s11, 7
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s12, 8
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s13, 9
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s14, 10
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s15, 11
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s16, 12
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s17, 13
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s18, 14
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s19, 15
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s20, 16
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s21, 17
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s22, 18
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s23, 19
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s24, 20
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s25, 21
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s26, 22
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s27, 23
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s28, 24
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s29, 25
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s30, 26
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s31, 27
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s72, 28
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s73, 29
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s74, 30
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s75, 31
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s76, 32
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s77, 33
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s78, 34
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s79, 35
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s88, 36
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s89, 37
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s90, 38
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s91, 39
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s92, 40
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s93, 41
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s94, 42
-; DAGISEL64-NEXT:    v_writelane_b32 v40, s95, 43
 ; DAGISEL64-NEXT:    s_wait_alu 0xfffe
-; DAGISEL64-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-; DAGISEL64-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; DAGISEL64-NEXT:    v_readlane_b32 s95, v40, 43
-; DAGISEL64-NEXT:    v_readlane_b32 s94, v40, 42
-; DAGISEL64-NEXT:    v_readlane_b32 s93, v40, 41
-; DAGISEL64-NEXT:    v_readlane_b32 s92, v40, 40
-; DAGISEL64-NEXT:    v_readlane_b32 s91, v40, 39
-; DAGISEL64-NEXT:    v_readlane_b32 s90, v40, 38
-; DAGISEL64-NEXT:    v_readlane_b32 s89, v40, 37
-; DAGISEL64-NEXT:    v_readlane_b32 s88, v40, 36
-; DAGISEL64-NEXT:    v_readlane_b32 s79, v40, 35
-; DAGISEL64-NEXT:    v_readlane_b32 s78, v40, 34
-; DAGISEL64-NEXT:    v_readlane_b32 s77, v40, 33
-; DAGISEL64-NEXT:    v_readlane_b32 s76, v40, 32
-; DAGISEL64-NEXT:    v_readlane_b32 s75, v40, 31
-; DAGISEL64-NEXT:    v_readlane_b32 s74, v40, 30
-; DAGISEL64-NEXT:    v_readlane_b32 s73, v40, 29
-; DAGISEL64-NEXT:    v_readlane_b32 s72, v40, 28
-; DAGISEL64-NEXT:    v_readlane_b32 s31, v40, 27
-; DAGISEL64-NEXT:    v_readlane_b32 s30, v40, 26
-; DAGISEL64-NEXT:    v_readlane_b32 s29, v40, 25
-; DAGISEL64-NEXT:    v_readlane_b32 s28, v40, 24
-; DAGISEL64-NEXT:    v_readlane_b32 s27, v40, 23
-; DAGISEL64-NEXT:    v_readlane_b32 s26, v40, 22
-; DAGISEL64-NEXT:    v_readlane_b32 s25, v40, 21
-; DAGISEL64-NEXT:    v_readlane_b32 s24, v40, 20
-; DAGISEL64-NEXT:    v_readlane_b32 s23, v40, 19
-; DAGISEL64-NEXT:    v_readlane_b32 s22, v40, 18
-; DAGISEL64-NEXT:    v_readlane_b32 s21, v40, 17
-; DAGISEL64-NEXT:    v_readlane_b32 s20, v40, 16
-; DAGISEL64-NEXT:    v_readlane_b32 s19, v40, 15
-; DAGISEL64-NEXT:    v_readlane_b32 s18, v40, 14
-; DAGISEL64-NEXT:    v_readlane_b32 s17, v40, 13
-; DAGISEL64-NEXT:    v_readlane_b32 s16, v40, 12
-; DAGISEL64-NEXT:    v_readlane_b32 s15, v40, 11
-; DAGISEL64-NEXT:    v_readlane_b32 s14, v40, 10
-; DAGISEL64-NEXT:    v_readlane_b32 s13, v40, 9
-; DAGISEL64-NEXT:    v_readlane_b32 s12, v40, 8
-; DAGISEL64-NEXT:    v_readlane_b32 s11, v40, 7
-; DAGISEL64-NEXT:    v_readlane_b32 s10, v40, 6
-; DAGISEL64-NEXT:    v_readlane_b32 s9, v40, 5
-; DAGISEL64-NEXT:    v_readlane_b32 s8, v40, 4
-; DAGISEL64-NEXT:    v_readlane_b32 s7, v40, 3
-; DAGISEL64-NEXT:    v_readlane_b32 s6, v40, 2
-; DAGISEL64-NEXT:    v_readlane_b32 s5, v40, 1
-; DAGISEL64-NEXT:    v_readlane_b32 s4, v40, 0
-; DAGISEL64-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
-; DAGISEL64-NEXT:    s_mov_b32 s32, s33
-; DAGISEL64-NEXT:    s_xor_b64 exec, s[34:35], -1
+; DAGISEL64-NEXT:    s_xor_b64 exec, s[0:1], -1
 ; DAGISEL64-NEXT:    s_clause 0x1f
-; DAGISEL64-NEXT:    scratch_load_b32 v0, off, s33 offset:4
-; DAGISEL64-NEXT:    scratch_load_b32 v1, off, s33 offset:8
-; DAGISEL64-NEXT:    scratch_load_b32 v2, off, s33 offset:12
-; DAGISEL64-NEXT:    scratch_load_b32 v3, off, s33 offset:16
-; DAGISEL64-NEXT:    scratch_load_b32 v4, off, s33 offset:20
-; DAGISEL64-NEXT:    scratch_load_b32 v5, off, s33 offset:24
-; DAGISEL64-NEXT:    scratch_load_b32 v6, off, s33 offset:28
-; DAGISEL64-NEXT:    scratch_load_b32 v7, off, s33 offset:32
-; DAGISEL64-NEXT:    scratch_load_b32 v8, off, s33 offset:36
-; DAGISEL64-NEXT:    scratch_load_b32 v9, off, s33 offset:40
-; DAGISEL64-NEXT:    scratch_load_b32 v10, off, s33 offset:44
-; DAGISEL64-NEXT:    scratch_load_b32 v11, off, s33 offset:48
-; DAGISEL64-NEXT:    scratch_load_b32 v12, off, s33 offset:52
-; DAGISEL64-NEXT:    scratch_load_b32 v13, off, s33 offset:56
-; DAGISEL64-NEXT:    scratch_load_b32 v14, off, s33 offset:60
-; DAGISEL64-NEXT:    scratch_load_b32 v15, off, s33 offset:64
-; DAGISEL64-NEXT:    scratch_load_b32 v16, off, s33 offset:68
-; DAGISEL64-NEXT:    scratch_load_b32 v17, off, s33 offset:72
-; DAGISEL64-NEXT:    scratch_load_b32 v18, off, s33 offset:76
-; DAGISEL64-NEXT:    scratch_load_b32 v19, off, s33 offset:80
-; DAGISEL64-NEXT:    scratch_load_b32 v20, off, s33 offset:84
-; DAGISEL64-NEXT:    scratch_load_b32 v21, off, s33 offset:88
-; DAGISEL64-NEXT:    scratch_load_b32 v22, off, s33 offset:92
-; DAGISEL64-NEXT:    scratch_load_b32 v23, off, s33 offset:96
-; DAGISEL64-NEXT:    scratch_load_b32 v24, off, s33 offset:100
-; DAGISEL64-NEXT:    scratch_load_b32 v25, off, s33 offset:104
-; DAGISEL64-NEXT:    scratch_load_b32 v26, off, s33 offset:108
-; DAGISEL64-NEXT:    scratch_load_b32 v27, off, s33 offset:112
-; DAGISEL64-NEXT:    scratch_load_b32 v28, off, s33 offset:116
-; DAGISEL64-NEXT:    scratch_load_b32 v29, off, s33 offset:120
-; DAGISEL64-NEXT:    scratch_load_b32 v30, off, s33 offset:124
-; DAGISEL64-NEXT:    scratch_load_b32 v31, off, s33 offset:128
+; DAGISEL64-NEXT:    scratch_load_b32 v0, off, s32
+; DAGISEL64-NEXT:    scratch_load_b32 v1, off, s32 offset:4
+; DAGISEL64-NEXT:    scratch_load_b32 v2, off, s32 offset:8
+; DAGISEL64-NEXT:    scratch_load_b32 v3, off, s32 offset:12
+; DAGISEL64-NEXT:    scratch_load_b32 v4, off, s32 offset:16
+; DAGISEL64-NEXT:    scratch_load_b32 v5, off, s32 offset:20
+; DAGISEL64-NEXT:    scratch_load_b32 v6, off, s32 offset:24
+; DAGISEL64-NEXT:    scratch_load_b32 v7, off, s32 offset:28
+; DAGISEL64-NEXT:    scratch_load_b32 v8, off, s32 offset:32
+; DAGISEL64-NEXT:    scratch_load_b32 v9, off, s32 offset:36
+; DAGISEL64-NEXT:    scratch_load_b32 v10, off, s32 offset:40
+; DAGISEL64-NEXT:    scratch_load_b32 v11, off, s32 offset:44
+; DAGISEL64-NEXT:    scratch_load_b32 v12, off, s32 offset:48
+; DAGISEL64-NEXT:    scratch_load_b32 v13, off, s32 offset:52
+; DAGISEL64-NEXT:    scratch_load_b32 v14, off, s32 offset:56
+; DAGISEL64-NEXT:    scratch_load_b32 v15, off, s32 offset:60
+; DAGISEL64-NEXT:    scratch_load_b32 v16, off, s32 offset:64
+; DAGISEL64-NEXT:    scratch_load_b32 v17, off, s32 offset:68
+; DAGISEL64-NEXT:    scratch_load_b32 v18, off, s32 offset:72
+; DAGISEL64-NEXT:    scratch_load_b32 v19, off, s32 offset:76
+; DAGISEL64-NEXT:    scratch_load_b32 v20, off, s32 offset:80
+; DAGISEL64-NEXT:    scratch_load_b32 v21, off, s32 offset:84
+; DAGISEL64-NEXT:    scratch_load_b32 v22, off, s32 offset:88
+; DAGISEL64-NEXT:    scratch_load_b32 v23, off, s32 offset:92
+; DAGISEL64-NEXT:    scratch_load_b32 v24, off, s32 offset:96
+; DAGISEL64-NEXT:    scratch_load_b32 v25, off, s32 offset:100
+; DAGISEL64-NEXT:    scratch_load_b32 v26, off, s32 offset:104
+; DAGISEL64-NEXT:    scratch_load_b32 v27, off, s32 offset:108
+; DAGISEL64-NEXT:    scratch_load_b32 v28, off, s32 offset:112
+; DAGISEL64-NEXT:    scratch_load_b32 v29, off, s32 offset:116
+; DAGISEL64-NEXT:    scratch_load_b32 v30, off, s32 offset:120
+; DAGISEL64-NEXT:    scratch_load_b32 v31, off, s32 offset:124
 ; DAGISEL64-NEXT:    s_clause 0x1f
-; DAGISEL64-NEXT:    scratch_load_b32 v32, off, s33 offset:132
-; DAGISEL64-NEXT:    scratch_load_b32 v33, off, s33 offset:136
-; DAGISEL64-NEXT:    scratch_load_b32 v34, off, s33 offset:140
-; DAGISEL64-NEXT:    scratch_load_b32 v35, off, s33 offset:144
-; DAGISEL64-NEXT:    scratch_load_b32 v36, off, s33 offset:148
-; DAGISEL64-NEXT:    scratch_load_b32 v37, off, s33 offset:152
-; DAGISEL64-NEXT:    scratch_load_b32 v38, off, s33 offset:156
-; DAGISEL64-NEXT:    scratch_load_b32 v39, off, s33 offset:160
-; DAGISEL64-NEXT:    scratch_load_b32 v48, off, s33 offset:164
-; DAGISEL64-NEXT:    scratch_load_b32 v49, off, s33 offset:168
-; DAGISEL64-NEXT:    scratch_load_b32 v50, off, s33 offset:172
-; DAGISEL64-NEXT:    scratch_load_b32 v51, off, s33 offset:176
-; DAGISEL64-NEXT:    scratch_load_b32 v52, off, s33 offset:180
-; DAGISEL64-NEXT:    scratch_load_b32 v53, off, s33 offset:184
-; DAGISEL64-NEXT:    scratch_load_b32 v54, off, s33 offset:188
-; DAGISEL64-NEXT:    scratch_load_b32 v55, off, s33 offset:192
-; DAGISEL64-NEXT:    scratch_load_b32 v64, off, s33 offset:196
-; DAGISEL64-NEXT:    scratch_load_b32 v65, off, s33 offset:200
-; DAGISEL64-NEXT:    scratch_load_b32 v66, off, s33 offset:204
-; DAGISEL64-NEXT:    scratch_load_b32 v67, off, s33 offset:208
-; DAGISEL64-NEXT:    scratch_load_b32 v68, off, s33 offset:212
-; DAGISEL64-NEXT:    scratch_load_b32 v69, off, s33 offset:216
-; DAGISEL64-NEXT:    scratch_load_b32 v70, off, s33 offset:220
-; DAGISEL64-NEXT:    scratch_load_b32 v71, off, s33 offset:224
-; DAGISEL64-NEXT:    scratch_load_b32 v80, off, s33 offset:228
-; DAGISEL64-NEXT:    scratch_load_b32 v81, off, s33 offset:232
-; DAGISEL64-NEXT:    scratch_load_b32 v82, off, s33 offset:236
-; DAGISEL64-NEXT:    scratch_load_b32 v83, off, s33 offset:240
-; DAGISEL64-NEXT:    scratch_load_b32 v84, off, s33 offset:244
-; DAGISEL64-NEXT:    scratch_load_b32 v85, off, s33 offset:248
-; DAGISEL64-NEXT:    scratch_load_b32 v86, off, s33 offset:252
-; DAGISEL64-NEXT:    scratch_load_b32 v87, off, s33 offset:256
+; DAGISEL64-NEXT:    scratch_load_b32 v32, off, s32 offset:128
+; DAGISEL64-NEXT:    scratch_load_b32 v33, off, s32 offset:132
+; DAGISEL64-NEXT:    scratch_load_b32 v34, off, s32 offset:136
+; DAGISEL64-NEXT:    scratch_load_b32 v35, off, s32 offset:140
+; DAGISEL64-NEXT:    scratch_load_b32 v36, off, s32 offset:144
+; DAGISEL64-NEXT:    scratch_load_b32 v37, off, s32 offset:148
+; DAGISEL64-NEXT:    scratch_load_b32 v38, off, s32 offset:152
+; DAGISEL64-NEXT:    scratch_load_b32 v39, off, s32 offset:156
+; DAGISEL64-NEXT:    scratch_load_b32 v48, off, s32 offset:160
+; DAGISEL64-NEXT:    scratch_load_b32 v49, off, s32 offset:164
+; DAGISEL64-NEXT:    scratch_load_b32 v50, off, s32 offset:168
+; DAGISEL64-NEXT:    scratch_load_b32 v51, off, s32 offset:172
+; DAGISEL64-NEXT:    scratch_load_b32 v52, off, s32 offset:176
+; DAGISEL64-NEXT:    scratch_load_b32 v53, off, s32 offset:180
+; DAGISEL64-NEXT:    scratch_load_b32 v54, off, s32 offset:184
+; DAGISEL64-NEXT:    scratch_load_b32 v55, off, s32 offset:188
+; DAGISEL64-NEXT:    scratch_load_b32 v64, off, s32 offset:192
+; DAGISEL64-NEXT:    scratch_load_b32 v65, off, s32 offset:196
+; DAGISEL64-NEXT:    scratch_load_b32 v66, off, s32 offset:200
+; DAGISEL64-NEXT:    scratch_load_b32 v67, off, s32 offset:204
+; DAGISEL64-NEXT:    scratch_load_b32 v68, off, s32 offset:208
+; DAGISEL64-NEXT:    scratch_load_b32 v69, off, s32 offset:212
+; DAGISEL64-NEXT:    scratch_load_b32 v70, off, s32 offset:216
+; DAGISEL64-NEXT:    scratch_load_b32 v71, off, s32 offset:220
+; DAGISEL64-NEXT:    scratch_load_b32 v80, off, s32 offset:224
+; DAGISEL64-NEXT:    scratch_load_b32 v81, off, s32 offset:228
+; DAGISEL64-NEXT:    scratch_load_b32 v82, off, s32 offset:232
+; DAGISEL64-NEXT:    scratch_load_b32 v83, off, s32 offset:236
+; DAGISEL64-NEXT:    scratch_load_b32 v84, off, s32 offset:240
+; DAGISEL64-NEXT:    scratch_load_b32 v85, off, s32 offset:244
+; DAGISEL64-NEXT:    scratch_load_b32 v86, off, s32 offset:248
+; DAGISEL64-NEXT:    scratch_load_b32 v87, off, s32 offset:252
 ; DAGISEL64-NEXT:    s_clause 0x1f
-; DAGISEL64-NEXT:    scratch_load_b32 v96, off, s33 offset:260
-; DAGISEL64-NEXT:    scratch_load_b32 v97, off, s33 offset:264
-; DAGISEL64-NEXT:    scratch_load_b32 v98, off, s33 offset:268
-; DAGISEL64-NEXT:    scratch_load_b32 v99, off, s33 offset:272
-; DAGISEL64-NEXT:    scratch_load_b32 v100, off, s33 offset:276
-; DAGISEL64-NEXT:    scratch_load_b32 v101, off, s33 offset:280
-; DAGISEL64-NEXT:    scratch_load_b32 v102, off, s33 offset:284
-; DAGISEL64-NEXT:    scratch_load_b32 v103, off, s33 offset:288
-; DAGISEL64-NEXT:    scratch_load_b32 v112, off, s33 offset:292
-; DAGISEL64-NEXT:    scratch_load_b32 v113, off, s33 offset:296
-; DAGISEL64-NEXT:    scratch_load_b32 v114, off, s33 offset:300
-; DAGISEL64-NEXT:    scratch_load_b32 v115, off, s33 offset:304
-; DAGISEL64-NEXT:    scratch_load_b32 v116, off, s33 offset:308
-; DAGISEL64-NEXT:    scratch_load_b32 v117, off, s33 offset:312
-; DAGISEL64-NEXT:    scratch_load_b32 v118, off, s33 offset:316
-; DAGISEL64-NEXT:    scratch_load_b32 v119, off, s33 offset:320
-; DAGISEL64-NEXT:    scratch_load_b32 v128, off, s33 offset:324
-; DAGISEL64-NEXT:    scratch_load_b32 v129, off, s33 offset:328
-; DAGISEL64-NEXT:    scratch_load_b32 v130, off, s33 offset:332
-; DAGISEL64-NEXT:    scratch_load_b32 v131, off, s33 offset:336
-; DAGISEL64-NEXT:    scratch_load_b32 v132, off, s33 offset:340
-; DAGISEL64-NEXT:    scratch_load_b32 v133, off, s33 offset:344
-; DAGISEL64-NEXT:    scratch_load_b32 v134, off, s33 offset:348
-; DAGISEL64-NEXT:    scratch_load_b32 v135, off, s33 offset:352
-; DAGISEL64-NEXT:    scratch_load_b32 v144, off, s33 offset:356
-; DAGISEL64-NEXT:    scratch_load_b32 v145, off, s33 offset:360
-; DAGISEL64-NEXT:    scratch_load_b32 v146, off, s33 offset:364
-; DAGISEL64-NEXT:    scratch_load_b32 v147, off, s33 offset:368
-; DAGISEL64-NEXT:    scratch_load_b32 v148, off, s33 offset:372
-; DAGISEL64-NEXT:    scratch_load_b32 v149, off, s33 offset:376
-; DAGISEL64-NEXT:    scratch_load_b32 v150, off, s33 offset:380
-; DAGISEL64-NEXT:    scratch_load_b32 v151, off, s33 offset:384
+; DAGISEL64-NEXT:    scratch_load_b32 v96, off, s32 offset:256
+; DAGISEL64-NEXT:    scratch_load_b32 v97, off, s32 offset:260
+; DAGISEL64-NEXT:    scratch_load_b32 v98, off, s32 offset:264
+; DAGISEL64-NEXT:    scratch_load_b32 v99, off, s32 offset:268
+; DAGISEL64-NEXT:    scratch_load_b32 v100, off, s32 offset:272
+; DAGISEL64-NEXT:    scratch_load_b32 v101, off, s32 offset:276
+; DAGISEL64-NEXT:    scratch_load_b32 v102, off, s32 offset:280
+; DAGISEL64-NEXT:    scratch_load_b32 v103, off, s32 offset:284
+; DAGISEL64-NEXT:    scratch_load_b32 v112, off, s32 offset:288
+; DAGISEL64-NEXT:    scratch_load_b32 v113, off, s32 offset:292
+; DAGISEL64-NEXT:    scratch_load_b32 v114, off, s32 offset:296
+; DAGISEL64-NEXT:    scratch_load_b32 v115, off, s32 offset:300
+; DAGISEL64-NEXT:    scratch_load_b32 v116, off, s32 offset:304
+; DAGISEL64-NEXT:    scratch_load_b32 v117, off, s32 offset:308
+; DAGISEL64-NEXT:    scratch_load_b32 v118, off, s32 offset:312
+; DAGISEL64-NEXT:    scratch_load_b32 v119, off, s32 offset:316
+; DAGISEL64-NEXT:    scratch_load_b32 v128, off, s32 offset:320
+; DAGISEL64-NEXT:    scratch_load_b32 v129, off, s32 offset:324
+; DAGISEL64-NEXT:    scratch_load_b32 v130, off, s32 offset:328
+; DAGISEL64-NEXT:    scratch_load_b32 v131, off, s32 offset:332
+; DAGISEL64-NEXT:    scratch_load_b32 v132, off, s32 offset:336
+; DAGISEL64-NEXT:    scratch_load_b32 v133, off, s32 offset:340
+; DAGISEL64-NEXT:    scratch_load_b32 v134, off, s32 offset:344
+; DAGISEL64-NEXT:    scratch_load_b32 v135, off, s32 offset:348
+; DAGISEL64-NEXT:    scratch_load_b32 v144, off, s32 offset:352
+; DAGISEL64-NEXT:    scratch_load_b32 v145, off, s32 offset:356
+; DAGISEL64-NEXT:    scratch_load_b32 v146, off, s32 offset:360
+; DAGISEL64-NEXT:    scratch_load_b32 v147, off, s32 offset:364
+; DAGISEL64-NEXT:    scratch_load_b32 v148, off, s32 offset:368
+; DAGISEL64-NEXT:    scratch_load_b32 v149, off, s32 offset:372
+; DAGISEL64-NEXT:    scratch_load_b32 v150, off, s32 offset:376
+; DAGISEL64-NEXT:    scratch_load_b32 v151, off, s32 offset:380
 ; DAGISEL64-NEXT:    s_clause 0x1f
-; DAGISEL64-NEXT:    scratch_load_b32 v160, off, s33 offset:388
-; DAGISEL64-NEXT:    scratch_load_b32 v161, off, s33 offset:392
-; DAGISEL64-NEXT:    scratch_load_b32 v162, off, s33 offset:396
-; DAGISEL64-NEXT:    scratch_load_b32 v163, off, s33 offset:400
-; DAGISEL64-NEXT:    scratch_load_b32 v164, off, s33 offset:404
-; DAGISEL64-NEXT:    scratch_load_b32 v165, off, s33 offset:408
-; DAGISEL64-NEXT:    scratch_load_b32 v166, off, s33 offset:412
-; DAGISEL64-NEXT:    scratch_load_b32 v167, off, s33 offset:416
-; DAGISEL64-NEXT:    scratch_load_b32 v176, off, s33 offset:420
-; DAGISEL64-NEXT:    scratch_load_b32 v177, off, s33 offset:424
-; DAGISEL64-NEXT:    scratch_load_b32 v178, off, s33 offset:428
-; DAGISEL64-NEXT:    scratch_load_b32 v179, off, s33 offset:432
-; DAGISEL64-NEXT:    scratch_load_b32 v180, off, s33 offset:436
-; DAGISEL64-NEXT:    scratch_load_b32 v181, off, s33 offset:440
-; DAGISEL64-NEXT:    scratch_load_b32 v182, off, s33 offset:444
-; DAGISEL64-NEXT:    scratch_load_b32 v183, off, s33 offset:448
-; DAGISEL64-NEXT:    scratch_load_b32 v192, off, s33 offset:452
-; DAGISEL64-NEXT:    scratch_load_b32 v193, off, s33 offset:456
-; DAGISEL64-NEXT:    scratch_load_b32 v194, off, s33 offset:460
-; DAGISEL64-NEXT:    scratch_load_b32 v195, off, s33 offset:464
-; DAGISEL64-NEXT:    scratch_load_b32 v196, off, s33 offset:468
-; DAGISEL64-NEXT:    scratch_load_b32 v197, off, s33 offset:472
-; DAGISEL64-NEXT:    scratch_load_b32 v198, off, s33 offset:476
-; DAGISEL64-NEXT:    scratch_load_b32 v199, off, s33 offset:480
-; DAGISEL64-NEXT:    scratch_load_b32 v208, off, s33 offset:484
-; DAGISEL64-NEXT:    scratch_load_b32 v209, off, s33 offset:488
-; DAGISEL64-NEXT:    scratch_load_b32 v210, off, s33 offset:492
-; DAGISEL64-NEXT:    scratch_load_b32 v211, off, s33 offset:496
-; DAGISEL64-NEXT:    scratch_load_b32 v212, off, s33 offset:500
-; DAGISEL64-NEXT:    scratch_load_b32 v213, off, s33 offset:504
-; DAGISEL64-NEXT:    scratch_load_b32 v214, off, s33 offset:508
-; DAGISEL64-NEXT:    scratch_load_b32 v215, off, s33 offset:512
+; DAGISEL64-NEXT:    scratch_load_b32 v160, off, s32 offset:384
+; DAGISEL64-NEXT:    scratch_load_b32 v161, off, s32 offset:388
+; DAGISEL64-NEXT:    scratch_load_b32 v162, off, s32 offset:392
+; DAGISEL64-NEXT:    scratch_load_b32 v163, off, s32 offset:396
+; DAGISEL64-NEXT:    scratch_load_b32 v164, off, s32 offset:400
+; DAGISEL64-NEXT:    scratch_load_b32 v165, off, s32 offset:404
+; DAGISEL64-NEXT:    scratch_load_b32 v166, off, s32 offset:408
+; DAGISEL64-NEXT:    scratch_load_b32 v167, off, s32 offset:412
+; DAGISEL64-NEXT:    scratch_load_b32 v176, off, s32 offset:416
+; DAGISEL64-NEXT:    scratch_load_b32 v177, off, s32 offset:420
+; DAGISEL64-NEXT:    scratch_load_b32 v178, off, s32 offset:424
+; DAGISEL64-NEXT:    scratch_load_b32 v179, off, s32 offset:428
+; DAGISEL64-NEXT:    scratch_load_b32 v180, off, s32 offset:432
+; DAGISEL64-NEXT:    scratch_load_b32 v181, off, s32 offset:436
+; DAGISEL64-NEXT:    scratch_load_b32 v182, off, s32 offset:440
+; DAGISEL64-NEXT:    scratch_load_b32 v183, off, s32 offset:444
+; DAGISEL64-NEXT:    scratch_load_b32 v192, off, s32 offset:448
+; DAGISEL64-NEXT:    scratch_load_b32 v193, off, s32 offset:452
+; DAGISEL64-NEXT:    scratch_load_b32 v194, off, s32 offset:456
+; DAGISEL64-NEXT:    scratch_load_b32 v195, off, s32 offset:460
+; DAGISEL64-NEXT:    scratch_load_b32 v196, off, s32 offset:464
+; DAGISEL64-NEXT:    scratch_load_b32 v197, off, s32 offset:468
+; DAGISEL64-NEXT:    scratch_load_b32 v198, off, s32 offset:472
+; DAGISEL64-NEXT:    scratch_load_b32 v199, off, s32 offset:476
+; DAGISEL64-NEXT:    scratch_load_b32 v208, off, s32 offset:480
+; DAGISEL64-NEXT:    scratch_load_b32 v209, off, s32 offset:484
+; DAGISEL64-NEXT:    scratch_load_b32 v210, off, s32 offset:488
+; DAGISEL64-NEXT:    scratch_load_b32 v211, off, s32 offset:492
+; DAGISEL64-NEXT:    scratch_load_b32 v212, off, s32 offset:496
+; DAGISEL64-NEXT:    scratch_load_b32 v213, off, s32 offset:500
+; DAGISEL64-NEXT:    scratch_load_b32 v214, off, s32 offset:504
+; DAGISEL64-NEXT:    scratch_load_b32 v215, off, s32 offset:508
 ; DAGISEL64-NEXT:    s_clause 0xf
-; DAGISEL64-NEXT:    scratch_load_b32 v224, off, s33 offset:516
-; DAGISEL64-NEXT:    scratch_load_b32 v225, off, s33 offset:520
-; DAGISEL64-NEXT:    scratch_load_b32 v226, off, s33 offset:524
-; DAGISEL64-NEXT:    scratch_load_b32 v227, off, s33 offset:528
-; DAGISEL64-NEXT:    scratch_load_b32 v228, off, s33 offset:532
-; DAGISEL64-NEXT:    scratch_load_b32 v229, off, s33 offset:536
-; DAGISEL64-NEXT:    scratch_load_b32 v230, off, s33 offset:540
-; DAGISEL64-NEXT:    scratch_load_b32 v231, off, s33 offset:544
-; DAGISEL64-NEXT:    scratch_load_b32 v240, off, s33 offset:548
-; DAGISEL64-NEXT:    scratch_load_b32 v241, off, s33 offset:552
-; DAGISEL64-NEXT:    scratch_load_b32 v242, off, s33 offset:556
-; DAGISEL64-NEXT:    scratch_load_b32 v243, off, s33 offset:560
-; DAGISEL64-NEXT:    scratch_load_b32 v244, off, s33 offset:564
-; DAGISEL64-NEXT:    scratch_load_b32 v245, off, s33 offset:568
-; DAGISEL64-NEXT:    scratch_load_b32 v246, off, s33 offset:572
-; DAGISEL64-NEXT:    scratch_load_b32 v247, off, s33 offset:576
-; DAGISEL64-NEXT:    s_mov_b64 exec, s[34:35]
-; DAGISEL64-NEXT:    s_mov_b32 s33, s36
-; DAGISEL64-NEXT:    s_wait_loadcnt 0x0
-; DAGISEL64-NEXT:    s_wait_alu 0xfffe
-; DAGISEL64-NEXT:    s_setpc_b64 s[30:31]
+; DAGISEL64-NEXT:    scratch_load_b32 v224, off, s32 offset:512
+; DAGISEL64-NEXT:    scratch_load_b32 v225, off, s32 offset:516
+; DAGISEL64-NEXT:    scratch_load_b32 v226, off, s32 offset:520
+; DAGISEL64-NEXT:    scratch_load_b32 v227, off, s32 offset:524
+; DAGISEL64-NEXT:    scratch_load_b32 v228, off, s32 offset:528
+; DAGISEL64-NEXT:    scratch_load_b32 v229, off, s32 offset:532
+; DAGISEL64-NEXT:    scratch_load_b32 v230, off, s32 offset:536
+; DAGISEL64-NEXT:    scratch_load_b32 v231, off, s32 offset:540
+; DAGISEL64-NEXT:    scratch_load_b32 v240, off, s32 offset:544
+; DAGISEL64-NEXT:    scratch_load_b32 v241, off, s32 offset:548
+; DAGISEL64-NEXT:    scratch_load_b32 v242, off, s32 offset:552
+; DAGISEL64-NEXT:    scratch_load_b32 v243, off, s32 offset:556
+; DAGISEL64-NEXT:    scratch_load_b32 v244, off, s32 offset:560
+; DAGISEL64-NEXT:    scratch_load_b32 v245, off, s32 offset:564
+; DAGISEL64-NEXT:    scratch_load_b32 v246, off, s32 offset:568
+; DAGISEL64-NEXT:    scratch_load_b32 v247, off, s32 offset:572
+; DAGISEL64-NEXT:    s_mov_b64 exec, s[0:1]
+; DAGISEL64-NEXT:    s_setpc_b64 s[36:37]
 ;
 ; GISEL64-LABEL: tail_call_gfx_from_whole_wave:
 ; GISEL64:       ; %bb.0:
@@ -3676,415 +3370,315 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ
 ; GISEL64-NEXT:    s_wait_samplecnt 0x0
 ; GISEL64-NEXT:    s_wait_bvhcnt 0x0
 ; GISEL64-NEXT:    s_wait_kmcnt 0x0
-; GISEL64-NEXT:    s_mov_b32 s36, s33
-; GISEL64-NEXT:    s_mov_b32 s33, s32
-; GISEL64-NEXT:    s_xor_saveexec_b64 s[34:35], -1
+; GISEL64-NEXT:    s_xor_saveexec_b64 s[0:1], -1
 ; GISEL64-NEXT:    s_clause 0x1f
-; GISEL64-NEXT:    scratch_store_b32 off, v0, s33 offset:4
-; GISEL64-NEXT:    scratch_store_b32 off, v1, s33 offset:8
-; GISEL64-NEXT:    scratch_store_b32 off, v2, s33 offset:12
-; GISEL64-NEXT:    scratch_store_b32 off, v3, s33 offset:16
-; GISEL64-NEXT:    scratch_store_b32 off, v4, s33 offset:20
-; GISEL64-NEXT:    scratch_store_b32 off, v5, s33 offset:24
-; GISEL64-NEXT:    scratch_store_b32 off, v6, s33 offset:28
-; GISEL64-NEXT:    scratch_store_b32 off, v7, s33 offset:32
-; GISEL64-NEXT:    scratch_store_b32 off, v8, s33 offset:36
-; GISEL64-NEXT:    scratch_store_b32 off, v9, s33 offset:40
-; GISEL64-NEXT:    scratch_store_b32 off, v10, s33 offset:44
-; GISEL64-NEXT:    scratch_store_b32 off, v11, s33 offset:48
-; GISEL64-NEXT:    scratch_store_b32 off, v12, s33 offset:52
-; GISEL64-NEXT:    scratch_store_b32 off, v13, s33 offset:56
-; GISEL64-NEXT:    scratch_store_b32 off, v14, s33 offset:60
-; GISEL64-NEXT:    scratch_store_b32 off, v15, s33 offset:64
-; GISEL64-NEXT:    scratch_store_b32 off, v16, s33 offset:68
-; GISEL64-NEXT:    scratch_store_b32 off, v17, s33 offset:72
-; GISEL64-NEXT:    scratch_store_b32 off, v18, s33 offset:76
-; GISEL64-NEXT:    scratch_store_b32 off, v19, s33 offset:80
-; GISEL64-NEXT:    scratch_store_b32 off, v20, s33 offset:84
-; GISEL64-NEXT:    scratch_store_b32 off, v21, s33 offset:88
-; GISEL64-NEXT:    scratch_store_b32 off, v22, s33 offset:92
-; GISEL64-NEXT:    scratch_store_b32 off, v23, s33 offset:96
-; GISEL64-NEXT:    scratch_store_b32 off, v24, s33 offset:100
-; GISEL64-NEXT:    scratch_store_b32 off, v25, s33 offset:104
-; GISEL64-NEXT:    scratch_store_b32 off, v26, s33 offset:108
-; GISEL64-NEXT:    scratch_store_b32 off, v27, s33 offset:112
-; GISEL64-NEXT:    scratch_store_b32 off, v28, s33 offset:116
-; GISEL64-NEXT:    scratch_store_b32 off, v29, s33 offset:120
-; GISEL64-NEXT:    scratch_store_b32 off, v30, s33 offset:124
-; GISEL64-NEXT:    scratch_store_b32 off, v31, s33 offset:128
+; GISEL64-NEXT:    scratch_store_b32 off, v0, s32
+; GISEL64-NEXT:    scratch_store_b32 off, v1, s32 offset:4
+; GISEL64-NEXT:    scratch_store_b32 off, v2, s32 offset:8
+; GISEL64-NEXT:    scratch_store_b32 off, v3, s32 offset:12
+; GISEL64-NEXT:    scratch_store_b32 off, v4, s32 offset:16
+; GISEL64-NEXT:    scratch_store_b32 off, v5, s32 offset:20
+; GISEL64-NEXT:    scratch_store_b32 off, v6, s32 offset:24
+; GISEL64-NEXT:    scratch_store_b32 off, v7, s32 offset:28
+; GISEL64-NEXT:    scratch_store_b32 off, v8, s32 offset:32
+; GISEL64-NEXT:    scratch_store_b32 off, v9, s32 offset:36
+; GISEL64-NEXT:    scratch_store_b32 off, v10, s32 offset:40
+; GISEL64-NEXT:    scratch_store_b32 off, v11, s32 offset:44
+; GISEL64-NEXT:    scratch_store_b32 off, v12, s32 offset:48
+; GISEL64-NEXT:    scratch_store_b32 off, v13, s32 offset:52
+; GISEL64-NEXT:    scratch_store_b32 off, v14, s32 offset:56
+; GISEL64-NEXT:    scratch_store_b32 off, v15, s32 offset:60
+; GISEL64-NEXT:    scratch_store_b32 off, v16, s32 offset:64
+; GISEL64-NEXT:    scratch_store_b32 off, v17, s32 offset:68
+; GISEL64-NEXT:    scratch_store_b32 off, v18, s32 offset:72
+; GISEL64-NEXT:    scratch_store_b32 off, v19, s32 offset:76
+; GISEL64-NEXT:    scratch_store_b32 off, v20, s32 offset:80
+; GISEL64-NEXT:    scratch_store_b32 off, v21, s32 offset:84
+; GISEL64-NEXT:    scratch_store_b32 off, v22, s32 offset:88
+; GISEL64-NEXT:    scratch_store_b32 off, v23, s32 offset:92
+; GISEL64-NEXT:    scratch_store_b32 off, v24, s32 offset:96
+; GISEL64-NEXT:    scratch_store_b32 off, v25, s32 offset:100
+; GISEL64-NEXT:    scratch_store_b32 off, v26, s32 offset:104
+; GISEL64-NEXT:    scratch_store_b32 off, v27, s32 offset:108
+; GISEL64-NEXT:    scratch_store_b32 off, v28, s32 offset:112
+; GISEL64-NEXT:    scratch_store_b32 off, v29, s32 offset:116
+; GISEL64-NEXT:    scratch_store_b32 off, v30, s32 offset:120
+; GISEL64-NEXT:    scratch_store_b32 off, v31, s32 offset:124
 ; GISEL64-NEXT:    s_clause 0x1f
-; GISEL64-NEXT:    scratch_store_b32 off, v32, s33 offset:132
-; GISEL64-NEXT:    scratch_store_b32 off, v33, s33 offset:136
-; GISEL64-NEXT:    scratch_store_b32 off, v34, s33 offset:140
-; GISEL64-NEXT:    scratch_store_b32 off, v35, s33 offset:144
-; GISEL64-NEXT:    scratch_store_b32 off, v36, s33 offset:148
-; GISEL64-NEXT:    scratch_store_b32 off, v37, s33 offset:152
-; GISEL64-NEXT:    scratch_store_b32 off, v38, s33 offset:156
-; GISEL64-NEXT:    scratch_store_b32 off, v39, s33 offset:160
-; GISEL64-NEXT:    scratch_store_b32 off, v48, s33 offset:164
-; GISEL64-NEXT:    scratch_store_b32 off, v49, s33 offset:168
-; GISEL64-NEXT:    scratch_store_b32 off, v50, s33 offset:172
-; GISEL64-NEXT:    scratch_store_b32 off, v51, s33 offset:176
-; GISEL64-NEXT:    scratch_store_b32 off, v52, s33 offset:180
-; GISEL64-NEXT:    scratch_store_b32 off, v53, s33 offset:184
-; GISEL64-NEXT:    scratch_store_b32 off, v54, s33 offset:188
-; GISEL64-NEXT:    scratch_store_b32 off, v55, s33 offset:192
-; GISEL64-NEXT:    scratch_store_b32 off, v64, s33 offset:196
-; GISEL64-NEXT:    scratch_store_b32 off, v65, s33 offset:200
-; GISEL64-NEXT:    scratch_store_b32 off, v66, s33 offset:204
-; GISEL64-NEXT:    scratch_store_b32 off, v67, s33 offset:208
-; GISEL64-NEXT:    scratch_store_b32 off, v68, s33 offset:212
-; GISEL64-NEXT:    scratch_store_b32 off, v69, s33 offset:216
-; GISEL64-NEXT:    scratch_store_b32 off, v70, s33 offset:220
-; GISEL64-NEXT:    scratch_store_b32 off, v71, s33 offset:224
-; GISEL64-NEXT:    scratch_store_b32 off, v80, s33 offset:228
-; GISEL64-NEXT:    scratch_store_b32 off, v81, s33 offset:232
-; GISEL64-NEXT:    scratch_store_b32 off, v82, s33 offset:236
-; GISEL64-NEXT:    scratch_store_b32 off, v83, s33 offset:240
-; GISEL64-NEXT:    scratch_store_b32 off, v84, s33 offset:244
-; GISEL64-NEXT:    scratch_store_b32 off, v85, s33 offset:248
-; GISEL64-NEXT:    scratch_store_b32 off, v86, s33 offset:252
-; GISEL64-NEXT:    scratch_store_b32 off, v87, s33 offset:256
+; GISEL64-NEXT:    scratch_store_b32 off, v32, s32 offset:128
+; GISEL64-NEXT:    scratch_store_b32 off, v33, s32 offset:132
+; GISEL64-NEXT:    scratch_store_b32 off, v34, s32 offset:136
+; GISEL64-NEXT:    scratch_store_b32 off, v35, s32 offset:140
+; GISEL64-NEXT:    scratch_store_b32 off, v36, s32 offset:144
+; GISEL64-NEXT:    scratch_store_b32 off, v37, s32 offset:148
+; GISEL64-NEXT:    scratch_store_b32 off, v38, s32 offset:152
+; GISEL64-NEXT:    scratch_store_b32 off, v39, s32 offset:156
+; GISEL64-NEXT:    scratch_store_b32 off, v48, s32 offset:160
+; GISEL64-NEXT:    scratch_store_b32 off, v49, s32 offset:164
+; GISEL64-NEXT:    scratch_store_b32 off, v50, s32 offset:168
+; GISEL64-NEXT:    scratch_store_b32 off, v51, s32 offset:172
+; GISEL64-NEXT:    scratch_store_b32 off, v52, s32 offset:176
+; GISEL64-NEXT:    scratch_store_b32 off, v53, s32 offset:180
+; GISEL64-NEXT:    scratch_store_b32 off, v54, s32 offset:184
+; GISEL64-NEXT:    scratch_store_b32 off, v55, s32 offset:188
+; GISEL64-NEXT:    scratch_store_b32 off, v64, s32 offset:192
+; GISEL64-NEXT:    scratch_store_b32 off, v65, s32 offset:196
+; GISEL64-NEXT:    scratch_store_b32 off, v66, s32 offset:200
+; GISEL64-NEXT:    scratch_store_b32 off, v67, s32 offset:204
+; GISEL64-NEXT:    scratch_store_b32 off, v68, s32 offset:208
+; GISEL64-NEXT:    scratch_store_b32 off, v69, s32 offset:212
+; GISEL64-NEXT:    scratch_store_b32 off, v70, s32 offset:216
+; GISEL64-NEXT:    scratch_store_b32 off, v71, s32 offset:220
+; GISEL64-NEXT:    scratch_store_b32 off, v80, s32 offset:224
+; GISEL64-NEXT:    scratch_store_b32 off, v81, s32 offset:228
+; GISEL64-NEXT:    scratch_store_b32 off, v82, s32 offset:232
+; GISEL64-NEXT:    scratch_store_b32 off, v83, s32 offset:236
+; GISEL64-NEXT:    scratch_store_b32 off, v84, s32 offset:240
+; GISEL64-NEXT:    scratch_store_b32 off, v85, s32 offset:244
+; GISEL64-NEXT:    scratch_store_b32 off, v86, s32 offset:248
+; GISEL64-NEXT:    scratch_store_b32 off, v87, s32 offset:252
 ; GISEL64-NEXT:    s_clause 0x1f
-; GISEL64-NEXT:    scratch_store_b32 off, v96, s33 offset:260
-; GISEL64-NEXT:    scratch_store_b32 off, v97, s33 offset:264
-; GISEL64-NEXT:    scratch_store_b32 off, v98, s33 offset:268
-; GISEL64-NEXT:    scratch_store_b32 off, v99, s33 offset:272
-; GISEL64-NEXT:    scratch_store_b32 off, v100, s33 offset:276
-; GISEL64-NEXT:    scratch_store_b32 off, v101, s33 offset:280
-; GISEL64-NEXT:    scratch_store_b32 off, v102, s33 offset:284
-; GISEL64-NEXT:    scratch_store_b32 off, v103, s33 offset:288
-; GISEL64-NEXT:    scratch_store_b32 off, v112, s33 offset:292
-; GISEL64-NEXT:    scratch_store_b32 off, v113, s33 offset:296
-; GISEL64-NEXT:    scratch_store_b32 off, v114, s33 offset:300
-; GISEL64-NEXT:    scratch_store_b32 off, v115, s33 offset:304
-; GISEL64-NEXT:    scratch_store_b32 off, v116, s33 offset:308
-; GISEL64-NEXT:    scratch_store_b32 off, v117, s33 offset:312
-; GISEL64-NEXT:    scratch_store_b32 off, v118, s33 offset:316
-; GISEL64-NEXT:    scratch_store_b32 off, v119, s33 offset:320
-; GISEL64-NEXT:    scratch_store_b32 off, v128, s33 offset:324
-; GISEL64-NEXT:    scratch_store_b32 off, v129, s33 offset:328
-; GISEL64-NEXT:    scratch_store_b32 off, v130, s33 offset:332
-; GISEL64-NEXT:    scratch_store_b32 off, v131, s33 offset:336
-; GISEL64-NEXT:    scratch_store_b32 off, v132, s33 offset:340
-; GISEL64-NEXT:    scratch_store_b32 off, v133, s33 offset:344
-; GISEL64-NEXT:    scratch_store_b32 off, v134, s33 offset:348
-; GISEL64-NEXT:    scratch_store_b32 off, v135, s33 offset:352
-; GISEL64-NEXT:    scratch_store_b32 off, v144, s33 offset:356
-; GISEL64-NEXT:    scratch_store_b32 off, v145, s33 offset:360
-; GISEL64-NEXT:    scratch_store_b32 off, v146, s33 offset:364
-; GISEL64-NEXT:    scratch_store_b32 off, v147, s33 offset:368
-; GISEL64-NEXT:    scratch_store_b32 off, v148, s33 offset:372
-; GISEL64-NEXT:    scratch_store_b32 off, v149, s33 offset:376
-; GISEL64-NEXT:    scratch_store_b32 off, v150, s33 offset:380
-; GISEL64-NEXT:    scratch_store_b32 off, v151, s33 offset:384
+; GISEL64-NEXT:    scratch_store_b32 off, v96, s32 offset:256
+; GISEL64-NEXT:    scratch_store_b32 off, v97, s32 offset:260
+; GISEL64-NEXT:    scratch_store_b32 off, v98, s32 offset:264
+; GISEL64-NEXT:    scratch_store_b32 off, v99, s32 offset:268
+; GISEL64-NEXT:    scratch_store_b32 off, v100, s32 offset:272
+; GISEL64-NEXT:    scratch_store_b32 off, v101, s32 offset:276
+; GISEL64-NEXT:    scratch_store_b32 off, v102, s32 offset:280
+; GISEL64-NEXT:    scratch_store_b32 off, v103, s32 offset:284
+; GISEL64-NEXT:    scratch_store_b32 off, v112, s32 offset:288
+; GISEL64-NEXT:    scratch_store_b32 off, v113, s32 offset:292
+; GISEL64-NEXT:    scratch_store_b32 off, v114, s32 offset:296
+; GISEL64-NEXT:    scratch_store_b32 off, v115, s32 offset:300
+; GISEL64-NEXT:    scratch_store_b32 off, v116, s32 offset:304
+; GISEL64-NEXT:    scratch_store_b32 off, v117, s32 offset:308
+; GISEL64-NEXT:    scratch_store_b32 off, v118, s32 offset:312
+; GISEL64-NEXT:    scratch_store_b32 off, v119, s32 offset:316
+; GISEL64-NEXT:    scratch_store_b32 off, v128, s32 offset:320
+; GISEL64-NEXT:    scratch_store_b32 off, v129, s32 offset:324
+; GISEL64-NEXT:    scratch_store_b32 off, v130, s32 offset:328
+; GISEL64-NEXT:    scratch_store_b32 off, v131, s32 offset:332
+; GISEL64-NEXT:    scratch_store_b32 off, v132, s32 offset:336
+; GISEL64-NEXT:    scratch_store_b32 off, v133, s32 offset:340
+; GISEL64-NEXT:    scratch_store_b32 off, v134, s32 offset:344
+; GISEL64-NEXT:    scratch_store_b32 off, v135, s32 offset:348
+; GISEL64-NEXT:    scratch_store_b32 off, v144, s32 offset:352
+; GISEL64-NEXT:    scratch_store_b32 off, v145, s32 offset:356
+; GISEL64-NEXT:    scratch_store_b32 off, v146, s32 offset:360
+; GISEL64-NEXT:    scratch_store_b32 off, v147, s32 offset:364
+; GISEL64-NEXT:    scratch_store_b32 off, v148, s32 offset:368
+; GISEL64-NEXT:    scratch_store_b32 off, v149, s32 offset:372
+; GISEL64-NEXT:    scratch_store_b32 off, v150, s32 offset:376
+; GISEL64-NEXT:    scratch_store_b32 off, v151, s32 offset:380
 ; GISEL64-NEXT:    s_clause 0x1f
-; GISEL64-NEXT:    scratch_store_b32 off, v160, s33 offset:388
-; GISEL64-NEXT:    scratch_store_b32 off, v161, s33 offset:392
-; GISEL64-NEXT:    scratch_store_b32 off, v162, s33 offset:396
-; GISEL64-NEXT:    scratch_store_b32 off, v163, s33 offset:400
-; GISEL64-NEXT:    scratch_store_b32 off, v164, s33 offset:404
-; GISEL64-NEXT:    scratch_store_b32 off, v165, s33 offset:408
-; GISEL64-NEXT:    scratch_store_b32 off, v166, s33 offset:412
-; GISEL64-NEXT:    scratch_store_b32 off, v167, s33 offset:416
-; GISEL64-NEXT:    scratch_store_b32 off, v176, s33 offset:420
-; GISEL64-NEXT:    scratch_store_b32 off, v177, s33 offset:424
-; GISEL64-NEXT:    scratch_store_b32 off, v178, s33 offset:428
-; GISEL64-NEXT:    scratch_store_b32 off, v179, s33 offset:432
-; GISEL64-NEXT:    scratch_store_b32 off, v180, s33 offset:436
-; GISEL64-NEXT:    scratch_store_b32 off, v181, s33 offset:440
-; GISEL64-NEXT:    scratch_store_b32 off, v182, s33 offset:444
-; GISEL64-NEXT:    scratch_store_b32 off, v183, s33 offset:448
-; GISEL64-NEXT:    scratch_store_b32 off, v192, s33 offset:452
-; GISEL64-NEXT:    scratch_store_b32 off, v193, s33 offset:456
-; GISEL64-NEXT:    scratch_store_b32 off, v194, s33 offset:460
-; GISEL64-NEXT:    scratch_store_b32 off, v195, s33 offset:464
-; GISEL64-NEXT:    scratch_store_b32 off, v196, s33 offset:468
-; GISEL64-NEXT:    scratch_store_b32 off, v197, s33 offset:472
-; GISEL64-NEXT:    scratch_store_b32 off, v198, s33 offset:476
-; GISEL64-NEXT:    scratch_store_b32 off, v199, s33 offset:480
-; GISEL64-NEXT:    scratch_store_b32 off, v208, s33 offset:484
-; GISEL64-NEXT:    scratch_store_b32 off, v209, s33 offset:488
-; GISEL64-NEXT:    scratch_store_b32 off, v210, s33 offset:492
-; GISEL64-NEXT:    scratch_store_b32 off, v211, s33 offset:496
-; GISEL64-NEXT:    scratch_store_b32 off, v212, s33 offset:500
-; GISEL64-NEXT:    scratch_store_b32 off, v213, s33 offset:504
-; GISEL64-NEXT:    scratch_store_b32 off, v214, s33 offset:508
-; GISEL64-NEXT:    scratch_store_b32 off, v215, s33 offset:512
+; GISEL64-NEXT:    scratch_store_b32 off, v160, s32 offset:384
+; GISEL64-NEXT:    scratch_store_b32 off, v161, s32 offset:388
+; GISEL64-NEXT:    scratch_store_b32 off, v162, s32 offset:392
+; GISEL64-NEXT:    scratch_store_b32 off, v163, s32 offset:396
+; GISEL64-NEXT:    scratch_store_b32 off, v164, s32 offset:400
+; GISEL64-NEXT:    scratch_store_b32 off, v165, s32 offset:404
+; GISEL64-NEXT:    scratch_store_b32 off, v166, s32 offset:408
+; GISEL64-NEXT:    scratch_store_b32 off, v167, s32 offset:412
+; GISEL64-NEXT:    scratch_store_b32 off, v176, s32 offset:416
+; GISEL64-NEXT:    scratch_store_b32 off, v177, s32 offset:420
+; GISEL64-NEXT:    scratch_store_b32 off, v178, s32 offset:424
+; GISEL64-NEXT:    scratch_store_b32 off, v179, s32 offset:428
+; GISEL64-NEXT:    scratch_store_b32 off, v180, s32 offset:432
+; GISEL64-NEXT:    scratch_store_b32 off, v181, s32 offset:436
+; GISEL64-NEXT:    scratch_store_b32 off, v182, s32 offset:440
+; GISEL64-NEXT:    scratch_store_b32 off, v183, s32 offset:444
+; GISEL64-NEXT:    scratch_store_b32 off, v192, s32 offset:448
+; GISEL64-NEXT:    scratch_store_b32 off, v193, s32 offset:452
+; GISEL64-NEXT:    scratch_store_b32 off, v194, s32 offset:456
+; GISEL64-NEXT:    scratch_store_b32 off, v195, s32 offset:460
+; GISEL64-NEXT:    scratch_store_b32 off, v196, s32 offset:464
+; GISEL64-NEXT:    scratch_store_b32 off, v197, s32 offset:468
+; GISEL64-NEXT:    scratch_store_b32 off, v198, s32 offset:472
+; GISEL64-NEXT:    scratch_store_b32 off, v199, s32 offset:476
+; GISEL64-NEXT:    scratch_store_b32 off, v208, s32 offset:480
+; GISEL64-NEXT:    scratch_store_b32 off, v209, s32 offset:484
+; GISEL64-NEXT:    scratch_store_b32 off, v210, s32 offset:488
+; GISEL64-NEXT:    scratch_store_b32 off, v211, s32 offset:492
+; GISEL64-NEXT:    scratch_store_b32 off, v212, s32 offset:496
+; GISEL64-NEXT:    scratch_store_b32 off, v213, s32 offset:500
+; GISEL64-NEXT:    scratch_store_b32 off, v214, s32 offset:504
+; GISEL64-NEXT:    scratch_store_b32 off, v215, s32 offset:508
 ; GISEL64-NEXT:    s_clause 0xf
-; GISEL64-NEXT:    scratch_store_b32 off, v224, s33 offset:516
-; GISEL64-NEXT:    scratch_store_b32 off, v225, s33 offset:520
-; GISEL64-NEXT:    scratch_store_b32 off, v226, s33 offset:524
-; GISEL64-NEXT:    scratch_store_b32 off, v227, s33 offset:528
-; GISEL64-NEXT:    scratch_store_b32 off, v228, s33 offset:532
-; GISEL64-NEXT:    scratch_store_b32 off, v229, s33 offset:536
-; GISEL64-NEXT:    scratch_store_b32 off, v230, s33 offset:540
-; GISEL64-NEXT:    scratch_store_b32 off, v231, s33 offset:544
-; GISEL64-NEXT:    scratch_store_b32 off, v240, s33 offset:548
-; GISEL64-NEXT:    scratch_store_b32 off, v241, s33 offset:552
-; GISEL64-NEXT:    scratch_store_b32 off, v242, s33 offset:556
-; GISEL64-NEXT:    scratch_store_b32 off, v243, s33 offset:560
-; GISEL64-NEXT:    scratch_store_b32 off, v244, s33 offset:564
-; GISEL64-NEXT:    scratch_store_b32 off, v245, s33 offset:568
-; GISEL64-NEXT:    scratch_store_b32 off, v246, s33 offset:572
-; GISEL64-NEXT:    scratch_store_b32 off, v247, s33 offset:576
+; GISEL64-NEXT:    scratch_store_b32 off, v224, s32 offset:512
+; GISEL64-NEXT:    scratch_store_b32 off, v225, s32 offset:516
+; GISEL64-NEXT:    scratch_store_b32 off, v226, s32 offset:520
+; GISEL64-NEXT:    scratch_store_b32 off, v227, s32 offset:524
+; GISEL64-NEXT:    scratch_store_b32 off, v228, s32 offset:528
+; GISEL64-NEXT:    scratch_store_b32 off, v229, s32 offset:532
+; GISEL64-NEXT:    scratch_store_b32 off, v230, s32 offset:536
+; GISEL64-NEXT:    scratch_store_b32 off, v231, s32 offset:540
+; GISEL64-NEXT:    scratch_store_b32 off, v240, s32 offset:544
+; GISEL64-NEXT:    scratch_store_b32 off, v241, s32 offset:548
+; GISEL64-NEXT:    scratch_store_b32 off, v242, s32 offset:552
+; GISEL64-NEXT:    scratch_store_b32 off, v243, s32 offset:556
+; GISEL64-NEXT:    scratch_store_b32 off, v244, s32 offset:560
+; GISEL64-NEXT:    scratch_store_b32 off, v245, s32 offset:564
+; GISEL64-NEXT:    scratch_store_b32 off, v246, s32 offset:568
+; GISEL64-NEXT:    scratch_store_b32 off, v247, s32 offset:572
 ; GISEL64-NEXT:    s_mov_b64 exec, -1
-; GISEL64-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
-; GISEL64-NEXT:    v_writelane_b32 v40, s4, 0
 ; GISEL64-NEXT:    v_mov_b32_e32 v2, v0
 ; GISEL64-NEXT:    v_swap_b32 v0, v1
-; GISEL64-NEXT:    s_mov_b32 s0, gfx_callee at abs32@lo
-; GISEL64-NEXT:    v_writelane_b32 v40, s5, 1
-; GISEL64-NEXT:    s_mov_b32 s1, gfx_callee at abs32@hi
-; GISEL64-NEXT:    s_addk_co_i32 s32, 0x250
-; GISEL64-NEXT:    v_writelane_b32 v40, s6, 2
-; GISEL64-NEXT:    v_writelane_b32 v40, s7, 3
-; GISEL64-NEXT:    v_writelane_b32 v40, s8, 4
-; GISEL64-NEXT:    v_writelane_b32 v40, s9, 5
-; GISEL64-NEXT:    s_mov_b64 s[8:9], 0
-; GISEL64-NEXT:    v_writelane_b32 v40, s10, 6
-; GISEL64-NEXT:    v_writelane_b32 v40, s11, 7
-; GISEL64-NEXT:    v_writelane_b32 v40, s12, 8
-; GISEL64-NEXT:    v_writelane_b32 v40, s13, 9
-; GISEL64-NEXT:    v_writelane_b32 v40, s14, 10
-; GISEL64-NEXT:    v_writelane_b32 v40, s15, 11
-; GISEL64-NEXT:    v_writelane_b32 v40, s16, 12
-; GISEL64-NEXT:    v_writelane_b32 v40, s17, 13
-; GISEL64-NEXT:    v_writelane_b32 v40, s18, 14
-; GISEL64-NEXT:    v_writelane_b32 v40, s19, 15
-; GISEL64-NEXT:    v_writelane_b32 v40, s20, 16
-; GISEL64-NEXT:    v_writelane_b32 v40, s21, 17
-; GISEL64-NEXT:    v_writelane_b32 v40, s22, 18
-; GISEL64-NEXT:    v_writelane_b32 v40, s23, 19
-; GISEL64-NEXT:    v_writelane_b32 v40, s24, 20
-; GISEL64-NEXT:    v_writelane_b32 v40, s25, 21
-; GISEL64-NEXT:    v_writelane_b32 v40, s26, 22
-; GISEL64-NEXT:    v_writelane_b32 v40, s27, 23
-; GISEL64-NEXT:    v_writelane_b32 v40, s28, 24
-; GISEL64-NEXT:    v_writelane_b32 v40, s29, 25
-; GISEL64-NEXT:    v_writelane_b32 v40, s30, 26
-; GISEL64-NEXT:    v_writelane_b32 v40, s31, 27
-; GISEL64-NEXT:    v_writelane_b32 v40, s72, 28
-; GISEL64-NEXT:    v_writelane_b32 v40, s73, 29
-; GISEL64-NEXT:    v_writelane_b32 v40, s74, 30
-; GISEL64-NEXT:    v_writelane_b32 v40, s75, 31
-; GISEL64-NEXT:    v_writelane_b32 v40, s76, 32
-; GISEL64-NEXT:    v_writelane_b32 v40, s77, 33
-; GISEL64-NEXT:    v_writelane_b32 v40, s78, 34
-; GISEL64-NEXT:    v_writelane_b32 v40, s79, 35
-; GISEL64-NEXT:    v_writelane_b32 v40, s88, 36
-; GISEL64-NEXT:    v_writelane_b32 v40, s89, 37
-; GISEL64-NEXT:    v_writelane_b32 v40, s90, 38
-; GISEL64-NEXT:    v_writelane_b32 v40, s91, 39
-; GISEL64-NEXT:    v_writelane_b32 v40, s92, 40
-; GISEL64-NEXT:    v_writelane_b32 v40, s93, 41
-; GISEL64-NEXT:    v_writelane_b32 v40, s94, 42
-; GISEL64-NEXT:    v_writelane_b32 v40, s95, 43
+; GISEL64-NEXT:    s_mov_b32 s36, gfx_callee at abs32@lo
+; GISEL64-NEXT:    s_mov_b32 s37, gfx_callee at abs32@hi
 ; GISEL64-NEXT:    s_wait_alu 0xfffe
-; GISEL64-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-; GISEL64-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GISEL64-NEXT:    v_readlane_b32 s95, v40, 43
-; GISEL64-NEXT:    v_readlane_b32 s94, v40, 42
-; GISEL64-NEXT:    v_readlane_b32 s93, v40, 41
-; GISEL64-NEXT:    v_readlane_b32 s92, v40, 40
-; GISEL64-NEXT:    v_readlane_b32 s91, v40, 39
-; GISEL64-NEXT:    v_readlane_b32 s90, v40, 38
-; GISEL64-NEXT:    v_readlane_b32 s89, v40, 37
-; GISEL64-NEXT:    v_readlane_b32 s88, v40, 36
-; GISEL64-NEXT:    v_readlane_b32 s79, v40, 35
-; GISEL64-NEXT:    v_readlane_b32 s78, v40, 34
-; GISEL64-NEXT:    v_readlane_b32 s77, v40, 33
-; GISEL64-NEXT:    v_readlane_b32 s76, v40, 32
-; GISEL64-NEXT:    v_readlane_b32 s75, v40, 31
-; GISEL64-NEXT:    v_readlane_b32 s74, v40, 30
-; GISEL64-NEXT:    v_readlane_b32 s73, v40, 29
-; GISEL64-NEXT:    v_readlane_b32 s72, v40, 28
-; GISEL64-NEXT:    v_readlane_b32 s31, v40, 27
-; GISEL64-NEXT:    v_readlane_b32 s30, v40, 26
-; GISEL64-NEXT:    v_readlane_b32 s29, v40, 25
-; GISEL64-NEXT:    v_readlane_b32 s28, v40, 24
-; GISEL64-NEXT:    v_readlane_b32 s27, v40, 23
-; GISEL64-NEXT:    v_readlane_b32 s26, v40, 22
-; GISEL64-NEXT:    v_readlane_b32 s25, v40, 21
-; GISEL64-NEXT:    v_readlane_b32 s24, v40, 20
-; GISEL64-NEXT:    v_readlane_b32 s23, v40, 19
-; GISEL64-NEXT:    v_readlane_b32 s22, v40, 18
-; GISEL64-NEXT:    v_readlane_b32 s21, v40, 17
-; GISEL64-NEXT:    v_readlane_b32 s20, v40, 16
-; GISEL64-NEXT:    v_readlane_b32 s19, v40, 15
-; GISEL64-NEXT:    v_readlane_b32 s18, v40, 14
-; GISEL64-NEXT:    v_readlane_b32 s17, v40, 13
-; GISEL64-NEXT:    v_readlane_b32 s16, v40, 12
-; GISEL64-NEXT:    v_readlane_b32 s15, v40, 11
-; GISEL64-NEXT:    v_readlane_b32 s14, v40, 10
-; GISEL64-NEXT:    v_readlane_b32 s13, v40, 9
-; GISEL64-NEXT:    v_readlane_b32 s12, v40, 8
-; GISEL64-NEXT:    v_readlane_b32 s11, v40, 7
-; GISEL64-NEXT:    v_readlane_b32 s10, v40, 6
-; GISEL64-NEXT:    v_readlane_b32 s9, v40, 5
-; GISEL64-NEXT:    v_readlane_b32 s8, v40, 4
-; GISEL64-NEXT:    v_readlane_b32 s7, v40, 3
-; GISEL64-NEXT:    v_readlane_b32 s6, v40, 2
-; GISEL64-NEXT:    v_readlane_b32 s5, v40, 1
-; GISEL64-NEXT:    v_readlane_b32 s4, v40, 0
-; GISEL64-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
-; GISEL64-NEXT:    s_mov_b32 s32, s33
-; GISEL64-NEXT:    s_xor_b64 exec, s[34:35], -1
+; GISEL64-NEXT:    s_xor_b64 exec, s[0:1], -1
 ; GISEL64-NEXT:    s_clause 0x1f
-; GISEL64-NEXT:    scratch_load_b32 v0, off, s33 offset:4
-; GISEL64-NEXT:    scratch_load_b32 v1, off, s33 offset:8
-; GISEL64-NEXT:    scratch_load_b32 v2, off, s33 offset:12
-; GISEL64-NEXT:    scratch_load_b32 v3, off, s33 offset:16
-; GISEL64-NEXT:    scratch_load_b32 v4, off, s33 offset:20
-; GISEL64-NEXT:    scratch_load_b32 v5, off, s33 offset:24
-; GISEL64-NEXT:    scratch_load_b32 v6, off, s33 offset:28
-; GISEL64-NEXT:    scratch_load_b32 v7, off, s33 offset:32
-; GISEL64-NEXT:    scratch_load_b32 v8, off, s33 offset:36
-; GISEL64-NEXT:    scratch_load_b32 v9, off, s33 offset:40
-; GISEL64-NEXT:    scratch_load_b32 v10, off, s33 offset:44
-; GISEL64-NEXT:    scratch_load_b32 v11, off, s33 offset:48
-; GISEL64-NEXT:    scratch_load_b32 v12, off, s33 offset:52
-; GISEL64-NEXT:    scratch_load_b32 v13, off, s33 offset:56
-; GISEL64-NEXT:    scratch_load_b32 v14, off, s33 offset:60
-; GISEL64-NEXT:    scratch_load_b32 v15, off, s33 offset:64
-; GISEL64-NEXT:    scratch_load_b32 v16, off, s33 offset:68
-; GISEL64-NEXT:    scratch_load_b32 v17, off, s33 offset:72
-; GISEL64-NEXT:    scratch_load_b32 v18, off, s33 offset:76
-; GISEL64-NEXT:    scratch_load_b32 v19, off, s33 offset:80
-; GISEL64-NEXT:    scratch_load_b32 v20, off, s33 offset:84
-; GISEL64-NEXT:    scratch_load_b32 v21, off, s33 offset:88
-; GISEL64-NEXT:    scratch_load_b32 v22, off, s33 offset:92
-; GISEL64-NEXT:    scratch_load_b32 v23, off, s33 offset:96
-; GISEL64-NEXT:    scratch_load_b32 v24, off, s33 offset:100
-; GISEL64-NEXT:    scratch_load_b32 v25, off, s33 offset:104
-; GISEL64-NEXT:    scratch_load_b32 v26, off, s33 offset:108
-; GISEL64-NEXT:    scratch_load_b32 v27, off, s33 offset:112
-; GISEL64-NEXT:    scratch_load_b32 v28, off, s33 offset:116
-; GISEL64-NEXT:    scratch_load_b32 v29, off, s33 offset:120
-; GISEL64-NEXT:    scratch_load_b32 v30, off, s33 offset:124
-; GISEL64-NEXT:    scratch_load_b32 v31, off, s33 offset:128
+; GISEL64-NEXT:    scratch_load_b32 v0, off, s32
+; GISEL64-NEXT:    scratch_load_b32 v1, off, s32 offset:4
+; GISEL64-NEXT:    scratch_load_b32 v2, off, s32 offset:8
+; GISEL64-NEXT:    scratch_load_b32 v3, off, s32 offset:12
+; GISEL64-NEXT:    scratch_load_b32 v4, off, s32 offset:16
+; GISEL64-NEXT:    scratch_load_b32 v5, off, s32 offset:20
+; GISEL64-NEXT:    scratch_load_b32 v6, off, s32 offset:24
+; GISEL64-NEXT:    scratch_load_b32 v7, off, s32 offset:28
+; GISEL64-NEXT:    scratch_load_b32 v8, off, s32 offset:32
+; GISEL64-NEXT:    scratch_load_b32 v9, off, s32 offset:36
+; GISEL64-NEXT:    scratch_load_b32 v10, off, s32 offset:40
+; GISEL64-NEXT:    scratch_load_b32 v11, off, s32 offset:44
+; GISEL64-NEXT:    scratch_load_b32 v12, off, s32 offset:48
+; GISEL64-NEXT:    scratch_load_b32 v13, off, s32 offset:52
+; GISEL64-NEXT:    scratch_load_b32 v14, off, s32 offset:56
+; GISEL64-NEXT:    scratch_load_b32 v15, off, s32 offset:60
+; GISEL64-NEXT:    scratch_load_b32 v16, off, s32 offset:64
+; GISEL64-NEXT:    scratch_load_b32 v17, off, s32 offset:68
+; GISEL64-NEXT:    scratch_load_b32 v18, off, s32 offset:72
+; GISEL64-NEXT:    scratch_load_b32 v19, off, s32 offset:76
+; GISEL64-NEXT:    scratch_load_b32 v20, off, s32 offset:80
+; GISEL64-NEXT:    scratch_load_b32 v21, off, s32 offset:84
+; GISEL64-NEXT:    scratch_load_b32 v22, off, s32 offset:88
+; GISEL64-NEXT:    scratch_load_b32 v23, off, s32 offset:92
+; GISEL64-NEXT:    scratch_load_b32 v24, off, s32 offset:96
+; GISEL64-NEXT:    scratch_load_b32 v25, off, s32 offset:100
+; GISEL64-NEXT:    scratch_load_b32 v26, off, s32 offset:104
+; GISEL64-NEXT:    scratch_load_b32 v27, off, s32 offset:108
+; GISEL64-NEXT:    scratch_load_b32 v28, off, s32 offset:112
+; GISEL64-NEXT:    scratch_load_b32 v29, off, s32 offset:116
+; GISEL64-NEXT:    scratch_load_b32 v30, off, s32 offset:120
+; GISEL64-NEXT:    scratch_load_b32 v31, off, s32 offset:124
 ; GISEL64-NEXT:    s_clause 0x1f
-; GISEL64-NEXT:    scratch_load_b32 v32, off, s33 offset:132
-; GISEL64-NEXT:    scratch_load_b32 v33, off, s33 offset:136
-; GISEL64-NEXT:    scratch_load_b32 v34, off, s33 offset:140
-; GISEL64-NEXT:    scratch_load_b32 v35, off, s33 offset:144
-; GISEL64-NEXT:    scratch_load_b32 v36, off, s33 offset:148
-; GISEL64-NEXT:    scratch_load_b32 v37, off, s33 offset:152
-; GISEL64-NEXT:    scratch_load_b32 v38, off, s33 offset:156
-; GISEL64-NEXT:    scratch_load_b32 v39, off, s33 offset:160
-; GISEL64-NEXT:    scratch_load_b32 v48, off, s33 offset:164
-; GISEL64-NEXT:    scratch_load_b32 v49, off, s33 offset:168
-; GISEL64-NEXT:    scratch_load_b32 v50, off, s33 offset:172
-; GISEL64-NEXT:    scratch_load_b32 v51, off, s33 offset:176
-; GISEL64-NEXT:    scratch_load_b32 v52, off, s33 offset:180
-; GISEL64-NEXT:    scratch_load_b32 v53, off, s33 offset:184
-; GISEL64-NEXT:    scratch_load_b32 v54, off, s33 offset:188
-; GISEL64-NEXT:    scratch_load_b32 v55, off, s33 offset:192
-; GISEL64-NEXT:    scratch_load_b32 v64, off, s33 offset:196
-; GISEL64-NEXT:    scratch_load_b32 v65, off, s33 offset:200
-; GISEL64-NEXT:    scratch_load_b32 v66, off, s33 offset:204
-; GISEL64-NEXT:    scratch_load_b32 v67, off, s33 offset:208
-; GISEL64-NEXT:    scratch_load_b32 v68, off, s33 offset:212
-; GISEL64-NEXT:    scratch_load_b32 v69, off, s33 offset:216
-; GISEL64-NEXT:    scratch_load_b32 v70, off, s33 offset:220
-; GISEL64-NEXT:    scratch_load_b32 v71, off, s33 offset:224
-; GISEL64-NEXT:    scratch_load_b32 v80, off, s33 offset:228
-; GISEL64-NEXT:    scratch_load_b32 v81, off, s33 offset:232
-; GISEL64-NEXT:    scratch_load_b32 v82, off, s33 offset:236
-; GISEL64-NEXT:    scratch_load_b32 v83, off, s33 offset:240
-; GISEL64-NEXT:    scratch_load_b32 v84, off, s33 offset:244
-; GISEL64-NEXT:    scratch_load_b32 v85, off, s33 offset:248
-; GISEL64-NEXT:    scratch_load_b32 v86, off, s33 offset:252
-; GISEL64-NEXT:    scratch_load_b32 v87, off, s33 offset:256
+; GISEL64-NEXT:    scratch_load_b32 v32, off, s32 offset:128
+; GISEL64-NEXT:    scratch_load_b32 v33, off, s32 offset:132
+; GISEL64-NEXT:    scratch_load_b32 v34, off, s32 offset:136
+; GISEL64-NEXT:    scratch_load_b32 v35, off, s32 offset:140
+; GISEL64-NEXT:    scratch_load_b32 v36, off, s32 offset:144
+; GISEL64-NEXT:    scratch_load_b32 v37, off, s32 offset:148
+; GISEL64-NEXT:    scratch_load_b32 v38, off, s32 offset:152
+; GISEL64-NEXT:    scratch_load_b32 v39, off, s32 offset:156
+; GISEL64-NEXT:    scratch_load_b32 v48, off, s32 offset:160
+; GISEL64-NEXT:    scratch_load_b32 v49, off, s32 offset:164
+; GISEL64-NEXT:    scratch_load_b32 v50, off, s32 offset:168
+; GISEL64-NEXT:    scratch_load_b32 v51, off, s32 offset:172
+; GISEL64-NEXT:    scratch_load_b32 v52, off, s32 offset:176
+; GISEL64-NEXT:    scratch_load_b32 v53, off, s32 offset:180
+; GISEL64-NEXT:    scratch_load_b32 v54, off, s32 offset:184
+; GISEL64-NEXT:    scratch_load_b32 v55, off, s32 offset:188
+; GISEL64-NEXT:    scratch_load_b32 v64, off, s32 offset:192
+; GISEL64-NEXT:    scratch_load_b32 v65, off, s32 offset:196
+; GISEL64-NEXT:    scratch_load_b32 v66, off, s32 offset:200
+; GISEL64-NEXT:    scratch_load_b32 v67, off, s32 offset:204
+; GISEL64-NEXT:    scratch_load_b32 v68, off, s32 offset:208
+; GISEL64-NEXT:    scratch_load_b32 v69, off, s32 offset:212
+; GISEL64-NEXT:    scratch_load_b32 v70, off, s32 offset:216
+; GISEL64-NEXT:    scratch_load_b32 v71, off, s32 offset:220
+; GISEL64-NEXT:    scratch_load_b32 v80, off, s32 offset:224
+; GISEL64-NEXT:    scratch_load_b32 v81, off, s32 offset:228
+; GISEL64-NEXT:    scratch_load_b32 v82, off, s32 offset:232
+; GISEL64-NEXT:    scratch_load_b32 v83, off, s32 offset:236
+; GISEL64-NEXT:    scratch_load_b32 v84, off, s32 offset:240
+; GISEL64-NEXT:    scratch_load_b32 v85, off, s32 offset:244
+; GISEL64-NEXT:    scratch_load_b32 v86, off, s32 offset:248
+; GISEL64-NEXT:    scratch_load_b32 v87, off, s32 offset:252
 ; GISEL64-NEXT:    s_clause 0x1f
-; GISEL64-NEXT:    scratch_load_b32 v96, off, s33 offset:260
-; GISEL64-NEXT:    scratch_load_b32 v97, off, s33 offset:264
-; GISEL64-NEXT:    scratch_load_b32 v98, off, s33 offset:268
-; GISEL64-NEXT:    scratch_load_b32 v99, off, s33 offset:272
-; GISEL64-NEXT:    scratch_load_b32 v100, off, s33 offset:276
-; GISEL64-NEXT:    scratch_load_b32 v101, off, s33 offset:280
-; GISEL64-NEXT:    scratch_load_b32 v102, off, s33 offset:284
-; GISEL64-NEXT:    scratch_load_b32 v103, off, s33 offset:288
-; GISEL64-NEXT:    scratch_load_b32 v112, off, s33 offset:292
-; GISEL64-NEXT:    scratch_load_b32 v113, off, s33 offset:296
-; GISEL64-NEXT:    scratch_load_b32 v114, off, s33 offset:300
-; GISEL64-NEXT:    scratch_load_b32 v115, off, s33 offset:304
-; GISEL64-NEXT:    scratch_load_b32 v116, off, s33 offset:308
-; GISEL64-NEXT:    scratch_load_b32 v117, off, s33 offset:312
-; GISEL64-NEXT:    scratch_load_b32 v118, off, s33 offset:316
-; GISEL64-NEXT:    scratch_load_b32 v119, off, s33 offset:320
-; GISEL64-NEXT:    scratch_load_b32 v128, off, s33 offset:324
-; GISEL64-NEXT:    scratch_load_b32 v129, off, s33 offset:328
-; GISEL64-NEXT:    scratch_load_b32 v130, off, s33 offset:332
-; GISEL64-NEXT:    scratch_load_b32 v131, off, s33 offset:336
-; GISEL64-NEXT:    scratch_load_b32 v132, off, s33 offset:340
-; GISEL64-NEXT:    scratch_load_b32 v133, off, s33 offset:344
-; GISEL64-NEXT:    scratch_load_b32 v134, off, s33 offset:348
-; GISEL64-NEXT:    scratch_load_b32 v135, off, s33 offset:352
-; GISEL64-NEXT:    scratch_load_b32 v144, off, s33 offset:356
-; GISEL64-NEXT:    scratch_load_b32 v145, off, s33 offset:360
-; GISEL64-NEXT:    scratch_load_b32 v146, off, s33 offset:364
-; GISEL64-NEXT:    scratch_load_b32 v147, off, s33 offset:368
-; GISEL64-NEXT:    scratch_load_b32 v148, off, s33 offset:372
-; GISEL64-NEXT:    scratch_load_b32 v149, off, s33 offset:376
-; GISEL64-NEXT:    scratch_load_b32 v150, off, s33 offset:380
-; GISEL64-NEXT:    scratch_load_b32 v151, off, s33 offset:384
+; GISEL64-NEXT:    scratch_load_b32 v96, off, s32 offset:256
+; GISEL64-NEXT:    scratch_load_b32 v97, off, s32 offset:260
+; GISEL64-NEXT:    scratch_load_b32 v98, off, s32 offset:264
+; GISEL64-NEXT:    scratch_load_b32 v99, off, s32 offset:268
+; GISEL64-NEXT:    scratch_load_b32 v100, off, s32 offset:272
+; GISEL64-NEXT:    scratch_load_b32 v101, off, s32 offset:276
+; GISEL64-NEXT:    scratch_load_b32 v102, off, s32 offset:280
+; GISEL64-NEXT:    scratch_load_b32 v103, off, s32 offset:284
+; GISEL64-NEXT:    scratch_load_b32 v112, off, s32 offset:288
+; GISEL64-NEXT:    scratch_load_b32 v113, off, s32 offset:292
+; GISEL64-NEXT:    scratch_load_b32 v114, off, s32 offset:296
+; GISEL64-NEXT:    scratch_load_b32 v115, off, s32 offset:300
+; GISEL64-NEXT:    scratch_load_b32 v116, off, s32 offset:304
+; GISEL64-NEXT:    scratch_load_b32 v117, off, s32 offset:308
+; GISEL64-NEXT:    scratch_load_b32 v118, off, s32 offset:312
+; GISEL64-NEXT:    scratch_load_b32 v119, off, s32 offset:316
+; GISEL64-NEXT:    scratch_load_b32 v128, off, s32 offset:320
+; GISEL64-NEXT:    scratch_load_b32 v129, off, s32 offset:324
+; GISEL64-NEXT:    scratch_load_b32 v130, off, s32 offset:328
+; GISEL64-NEXT:    scratch_load_b32 v131, off, s32 offset:332
+; GISEL64-NEXT:    scratch_load_b32 v132, off, s32 offset:336
+; GISEL64-NEXT:    scratch_load_b32 v133, off, s32 offset:340
+; GISEL64-NEXT:    scratch_load_b32 v134, off, s32 offset:344
+; GISEL64-NEXT:    scratch_load_b32 v135, off, s32 offset:348
+; GISEL64-NEXT:    scratch_load_b32 v144, off, s32 offset:352
+; GISEL64-NEXT:    scratch_load_b32 v145, off, s32 offset:356
+; GISEL64-NEXT:    scratch_load_b32 v146, off, s32 offset:360
+; GISEL64-NEXT:    scratch_load_b32 v147, off, s32 offset:364
+; GISEL64-NEXT:    scratch_load_b32 v148, off, s32 offset:368
+; GISEL64-NEXT:    scratch_load_b32 v149, off, s32 offset:372
+; GISEL64-NEXT:    scratch_load_b32 v150, off, s32 offset:376
+; GISEL64-NEXT:    scratch_load_b32 v151, off, s32 offset:380
 ; GISEL64-NEXT:    s_clause 0x1f
-; GISEL64-NEXT:    scratch_load_b32 v160, off, s33 offset:388
-; GISEL64-NEXT:    scratch_load_b32 v161, off, s33 offset:392
-; GISEL64-NEXT:    scratch_load_b32 v162, off, s33 offset:396
-; GISEL64-NEXT:    scratch_load_b32 v163, off, s33 offset:400
-; GISEL64-NEXT:    scratch_load_b32 v164, off, s33 offset:404
-; GISEL64-NEXT:    scratch_load_b32 v165, off, s33 offset:408
-; GISEL64-NEXT:    scratch_load_b32 v166, off, s33 offset:412
-; GISEL64-NEXT:    scratch_load_b32 v167, off, s33 offset:416
-; GISEL64-NEXT:    scratch_load_b32 v176, off, s33 offset:420
-; GISEL64-NEXT:    scratch_load_b32 v177, off, s33 offset:424
-; GISEL64-NEXT:    scratch_load_b32 v178, off, s33 offset:428
-; GISEL64-NEXT:    scratch_load_b32 v179, off, s33 offset:432
-; GISEL64-NEXT:    scratch_load_b32 v180, off, s33 offset:436
-; GISEL64-NEXT:    scratch_load_b32 v181, off, s33 offset:440
-; GISEL64-NEXT:    scratch_load_b32 v182, off, s33 offset:444
-; GISEL64-NEXT:    scratch_load_b32 v183, off, s33 offset:448
-; GISEL64-NEXT:    scratch_load_b32 v192, off, s33 offset:452
-; GISEL64-NEXT:    scratch_load_b32 v193, off, s33 offset:456
-; GISEL64-NEXT:    scratch_load_b32 v194, off, s33 offset:460
-; GISEL64-NEXT:    scratch_load_b32 v195, off, s33 offset:464
-; GISEL64-NEXT:    scratch_load_b32 v196, off, s33 offset:468
-; GISEL64-NEXT:    scratch_load_b32 v197, off, s33 offset:472
-; GISEL64-NEXT:    scratch_load_b32 v198, off, s33 offset:476
-; GISEL64-NEXT:    scratch_load_b32 v199, off, s33 offset:480
-; GISEL64-NEXT:    scratch_load_b32 v208, off, s33 offset:484
-; GISEL64-NEXT:    scratch_load_b32 v209, off, s33 offset:488
-; GISEL64-NEXT:    scratch_load_b32 v210, off, s33 offset:492
-; GISEL64-NEXT:    scratch_load_b32 v211, off, s33 offset:496
-; GISEL64-NEXT:    scratch_load_b32 v212, off, s33 offset:500
-; GISEL64-NEXT:    scratch_load_b32 v213, off, s33 offset:504
-; GISEL64-NEXT:    scratch_load_b32 v214, off, s33 offset:508
-; GISEL64-NEXT:    scratch_load_b32 v215, off, s33 offset:512
+; GISEL64-NEXT:    scratch_load_b32 v160, off, s32 offset:384
+; GISEL64-NEXT:    scratch_load_b32 v161, off, s32 offset:388
+; GISEL64-NEXT:    scratch_load_b32 v162, off, s32 offset:392
+; GISEL64-NEXT:    scratch_load_b32 v163, off, s32 offset:396
+; GISEL64-NEXT:    scratch_load_b32 v164, off, s32 offset:400
+; GISEL64-NEXT:    scratch_load_b32 v165, off, s32 offset:404
+; GISEL64-NEXT:    scratch_load_b32 v166, off, s32 offset:408
+; GISEL64-NEXT:    scratch_load_b32 v167, off, s32 offset:412
+; GISEL64-NEXT:    scratch_load_b32 v176, off, s32 offset:416
+; GISEL64-NEXT:    scratch_load_b32 v177, off, s32 offset:420
+; GISEL64-NEXT:    scratch_load_b32 v178, off, s32 offset:424
+; GISEL64-NEXT:    scratch_load_b32 v179, off, s32 offset:428
+; GISEL64-NEXT:    scratch_load_b32 v180, off, s32 offset:432
+; GISEL64-NEXT:    scratch_load_b32 v181, off, s32 offset:436
+; GISEL64-NEXT:    scratch_load_b32 v182, off, s32 offset:440
+; GISEL64-NEXT:    scratch_load_b32 v183, off, s32 offset:444
+; GISEL64-NEXT:    scratch_load_b32 v192, off, s32 offset:448
+; GISEL64-NEXT:    scratch_load_b32 v193, off, s32 offset:452
+; GISEL64-NEXT:    scratch_load_b32 v194, off, s32 offset:456
+; GISEL64-NEXT:    scratch_load_b32 v195, off, s32 offset:460
+; GISEL64-NEXT:    scratch_load_b32 v196, off, s32 offset:464
+; GISEL64-NEXT:    scratch_load_b32 v197, off, s32 offset:468
+; GISEL64-NEXT:    scratch_load_b32 v198, off, s32 offset:472
+; GISEL64-NEXT:    scratch_load_b32 v199, off, s32 offset:476
+; GISEL64-NEXT:    scratch_load_b32 v208, off, s32 offset:480
+; GISEL64-NEXT:    scratch_load_b32 v209, off, s32 offset:484
+; GISEL64-NEXT:    scratch_load_b32 v210, off, s32 offset:488
+; GISEL64-NEXT:    scratch_load_b32 v211, off, s32 offset:492
+; GISEL64-NEXT:    scratch_load_b32 v212, off, s32 offset:496
+; GISEL64-NEXT:    scratch_load_b32 v213, off, s32 offset:500
+; GISEL64-NEXT:    scratch_load_b32 v214, off, s32 offset:504
+; GISEL64-NEXT:    scratch_load_b32 v215, off, s32 offset:508
 ; GISEL64-NEXT:    s_clause 0xf
-; GISEL64-NEXT:    scratch_load_b32 v224, off, s33 offset:516
-; GISEL64-NEXT:    scratch_load_b32 v225, off, s33 offset:520
-; GISEL64-NEXT:    scratch_load_b32 v226, off, s33 offset:524
-; GISEL64-NEXT:    scratch_load_b32 v227, off, s33 offset:528
-; GISEL64-NEXT:    scratch_load_b32 v228, off, s33 offset:532
-; GISEL64-NEXT:    scratch_load_b32 v229, off, s33 offset:536
-; GISEL64-NEXT:    scratch_load_b32 v230, off, s33 offset:540
-; GISEL64-NEXT:    scratch_load_b32 v231, off, s33 offset:544
-; GISEL64-NEXT:    scratch_load_b32 v240, off, s33 offset:548
-; GISEL64-NEXT:    scratch_load_b32 v241, off, s33 offset:552
-; GISEL64-NEXT:    scratch_load_b32 v242, off, s33 offset:556
-; GISEL64-NEXT:    scratch_load_b32 v243, off, s33 offset:560
-; GISEL64-NEXT:    scratch_load_b32 v244, off, s33 offset:564
-; GISEL64-NEXT:    scratch_load_b32 v245, off, s33 offset:568
-; GISEL64-NEXT:    scratch_load_b32 v246, off, s33 offset:572
-; GISEL64-NEXT:    scratch_load_b32 v247, off, s33 offset:576
-; GISEL64-NEXT:    s_mov_b64 exec, s[34:35]
-; GISEL64-NEXT:    s_mov_b32 s33, s36
-; GISEL64-NEXT:    s_wait_loadcnt 0x0
-; GISEL64-NEXT:    s_wait_alu 0xfffe
-; GISEL64-NEXT:    s_setpc_b64 s[30:31]
-  %ret = tail call <2 x half>(<2 x half>, <2 x half>) @gfx_callee(<2 x half> %y, <2 x half> %x) convergent
+; GISEL64-NEXT:    scratch_load_b32 v224, off, s32 offset:512
+; GISEL64-NEXT:    scratch_load_b32 v225, off, s32 offset:516
+; GISEL64-NEXT:    scratch_load_b32 v226, off, s32 offset:520
+; GISEL64-NEXT:    scratch_load_b32 v227, off, s32 offset:524
+; GISEL64-NEXT:    scratch_load_b32 v228, off, s32 offset:528
+; GISEL64-NEXT:    scratch_load_b32 v229, off, s32 offset:532
+; GISEL64-NEXT:    scratch_load_b32 v230, off, s32 offset:536
+; GISEL64-NEXT:    scratch_load_b32 v231, off, s32 offset:540
+; GISEL64-NEXT:    scratch_load_b32 v240, off, s32 offset:544
+; GISEL64-NEXT:    scratch_load_b32 v241, off, s32 offset:548
+; GISEL64-NEXT:    scratch_load_b32 v242, off, s32 offset:552
+; GISEL64-NEXT:    scratch_load_b32 v243, off, s32 offset:556
+; GISEL64-NEXT:    scratch_load_b32 v244, off, s32 offset:560
+; GISEL64-NEXT:    scratch_load_b32 v245, off, s32 offset:564
+; GISEL64-NEXT:    scratch_load_b32 v246, off, s32 offset:568
+; GISEL64-NEXT:    scratch_load_b32 v247, off, s32 offset:572
+; GISEL64-NEXT:    s_mov_b64 exec, s[0:1]
+; GISEL64-NEXT:    s_setpc_b64 s[36:37]
+  %ret = tail call amdgpu_gfx <2 x half>(<2 x half>, <2 x half>) @gfx_callee(<2 x half> %y, <2 x half> %x) convergent
   ret <2 x half> %ret
 }
 



More information about the llvm-branch-commits mailing list