[llvm-branch-commits] [llvm] 2f23e30 - Revert "[AMDGPU] Intrinsic for launching whole wave functions (#145859)"

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Aug 6 03:22:26 PDT 2025


Author: Diana Picus
Date: 2025-08-06T12:22:23+02:00
New Revision: 2f23e305e1858a5e6629ffdfbfe0e8a696214ff8

URL: https://github.com/llvm/llvm-project/commit/2f23e305e1858a5e6629ffdfbfe0e8a696214ff8
DIFF: https://github.com/llvm/llvm-project/commit/2f23e305e1858a5e6629ffdfbfe0e8a696214ff8.diff

LOG: Revert "[AMDGPU] Intrinsic for launching whole wave functions (#145859)"

This reverts commit 0461cd3d1d6f722b2833dd913c1f974aeebcf82a.

Added: 
    

Modified: 
    llvm/include/llvm/IR/IntrinsicsAMDGPU.td
    llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/lib/IR/Verifier.cpp
    llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
    llvm/test/CodeGen/AMDGPU/irtranslator-whole-wave-functions.ll
    llvm/test/CodeGen/AMDGPU/isel-whole-wave-functions.ll
    llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll

Removed: 
    llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll
    llvm/test/Verifier/AMDGPU/intrinsic-amdgcn-call-whole-wave.ll


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 191ed5f523a74..90cfd8cedd51b 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2671,18 +2671,6 @@ def int_amdgcn_cs_chain:
             ],
             [IntrConvergent, IntrNoReturn, ImmArg<ArgIndex<4>>]>;
 
-// Run a function with all the lanes enabled. Only direct calls are allowed. The
-// first argument is the callee, which must have the `amdgpu_gfx_whole_wave`
-// calling convention and must not be variadic. The remaining arguments to the
-// callee are taken from the arguments passed to the intrinsic. Lanes that are
-// inactive at the point of the call will receive poison. The return value is
-// the return value of the callee for the active lanes (there is no return
-// value in the inactive ones).
-def int_amdgcn_call_whole_wave:
-  Intrinsic<[llvm_any_ty],    // The return type of the callee.
-            [llvm_anyptr_ty,  // The callee.
-             llvm_vararg_ty], // The arguments to the callee.
-            [IntrConvergent]>;
 
 //===----------------------------------------------------------------------===//
 // CI+ Intrinsics

diff  --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 787543df1f0f0..bbfae570e1e1a 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2556,7 +2556,6 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
                          getOrCreateVReg(*ConstantInt::getTrue(CI.getType())));
     return true;
   case Intrinsic::amdgcn_cs_chain:
-  case Intrinsic::amdgcn_call_whole_wave:
     return translateCallBase(CI, MIRBuilder);
   case Intrinsic::fptrunc_round: {
     uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI);

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d5b904055e547..d0815e9f51822 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7984,43 +7984,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     HasTailCall = true;
     return;
   }
-  case Intrinsic::amdgcn_call_whole_wave: {
-    TargetLowering::ArgListTy Args;
-
-    // The first argument is the callee. Skip it when assembling the call args.
-    TargetLowering::ArgListEntry Arg;
-    for (unsigned Idx = 1; Idx < I.arg_size(); ++Idx) {
-      Arg.Node = getValue(I.getArgOperand(Idx));
-      Arg.Ty = I.getArgOperand(Idx)->getType();
-      Arg.setAttributes(&I, Idx);
-      Args.push_back(Arg);
-    }
-
-    SDValue ConvControlToken;
-    if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
-      auto *Token = Bundle->Inputs[0].get();
-      ConvControlToken = getValue(Token);
-    }
-
-    TargetLowering::CallLoweringInfo CLI(DAG);
-    CLI.setDebugLoc(getCurSDLoc())
-        .setChain(getRoot())
-        .setCallee(CallingConv::AMDGPU_Gfx_WholeWave, I.getType(),
-                   getValue(I.getArgOperand(0)), std::move(Args))
-        .setTailCall(false)
-        .setIsPreallocated(
-            I.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0)
-        .setConvergent(I.isConvergent())
-        .setConvergenceControlToken(ConvControlToken);
-    CLI.CB = &I;
-
-    std::pair<SDValue, SDValue> Result =
-        lowerInvokable(CLI, /*EHPadBB=*/nullptr);
-
-    if (Result.first.getNode())
-      setValue(&I, Result.first);
-    return;
-  }
   case Intrinsic::ptrmask: {
     SDValue Ptr = getValue(I.getOperand(0));
     SDValue Mask = getValue(I.getOperand(1));

diff  --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index f3f0ae5233977..ca3f148f881a4 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6612,36 +6612,6 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
           "Value for inactive lanes must be a VGPR function argument", &Call);
     break;
   }
-  case Intrinsic::amdgcn_call_whole_wave: {
-    auto F = dyn_cast<Function>(Call.getArgOperand(0));
-    Check(F, "Indirect whole wave calls are not allowed", &Call);
-
-    CallingConv::ID CC = F->getCallingConv();
-    Check(CC == CallingConv::AMDGPU_Gfx_WholeWave,
-          "Callee must have the amdgpu_gfx_whole_wave calling convention",
-          &Call);
-
-    Check(!F->isVarArg(), "Variadic whole wave calls are not allowed", &Call);
-
-    Check(Call.arg_size() == F->arg_size(),
-          "Call argument count must match callee argument count", &Call);
-
-    // The first argument of the call is the callee, and the first argument of
-    // the callee is the active mask. The rest of the arguments must match.
-    Check(F->arg_begin()->getType()->isIntegerTy(1),
-          "Callee must have i1 as its first argument", &Call);
-    for (auto [CallArg, FuncArg] :
-         drop_begin(zip_equal(Call.args(), F->args()))) {
-      Check(CallArg->getType() == FuncArg.getType(),
-            "Argument types must match", &Call);
-
-      // Check that inreg attributes match between call site and function
-      Check(Call.paramHasAttr(FuncArg.getArgNo(), Attribute::InReg) ==
-                FuncArg.hasInRegAttr(),
-            "Argument inreg attributes must match", &Call);
-    }
-    break;
-  }
   case Intrinsic::amdgcn_s_prefetch_data: {
     Check(
         AMDGPU::isFlatGlobalAddrSpace(

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 3ff6e22fbb943..3d8d274f06246 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -1464,22 +1464,9 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
                                    CallLoweringInfo &Info) const {
   if (Function *F = Info.CB->getCalledFunction())
     if (F->isIntrinsic()) {
-      switch (F->getIntrinsicID()) {
-      case Intrinsic::amdgcn_cs_chain:
-        return lowerChainCall(MIRBuilder, Info);
-      case Intrinsic::amdgcn_call_whole_wave:
-        Info.CallConv = CallingConv::AMDGPU_Gfx_WholeWave;
-
-        // Get the callee from the original instruction, so it doesn't look like
-        // this is an indirect call.
-        Info.Callee = MachineOperand::CreateGA(
-            cast<GlobalValue>(Info.CB->getOperand(0)), /*Offset=*/0);
-        Info.OrigArgs.erase(Info.OrigArgs.begin());
-        Info.IsVarArg = false;
-        break;
-      default:
-        llvm_unreachable("Unexpected intrinsic call");
-      }
+      assert(F->getIntrinsicID() == Intrinsic::amdgcn_cs_chain &&
+             "Unexpected intrinsic");
+      return lowerChainCall(MIRBuilder, Info);
     }
 
   if (Info.IsVarArg) {

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll
deleted file mode 100644
index eac0767c88d80..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/amdgcn-call-whole-wave.ll
+++ /dev/null
@@ -1,174 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck %s --check-prefix=DAGISEL
-; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1200 < %s | FileCheck %s --check-prefix=GISEL
-
-declare amdgpu_gfx_whole_wave i32 @good_callee(i1 %active, i32 %x, i32 %y, i32 inreg %c)
-
-define amdgpu_gfx void @basic_test(i32 %x, i32 inreg %c, ptr addrspace(1) %ptr) {
-; DAGISEL-LABEL: basic_test:
-; DAGISEL:       ; %bb.0:
-; DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
-; DAGISEL-NEXT:    s_wait_expcnt 0x0
-; DAGISEL-NEXT:    s_wait_samplecnt 0x0
-; DAGISEL-NEXT:    s_wait_bvhcnt 0x0
-; DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; DAGISEL-NEXT:    s_mov_b32 s0, s33
-; DAGISEL-NEXT:    s_mov_b32 s33, s32
-; DAGISEL-NEXT:    s_or_saveexec_b32 s1, -1
-; DAGISEL-NEXT:    scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill
-; DAGISEL-NEXT:    s_wait_alu 0xfffe
-; DAGISEL-NEXT:    s_mov_b32 exec_lo, s1
-; DAGISEL-NEXT:    v_writelane_b32 v42, s0, 2
-; DAGISEL-NEXT:    s_clause 0x1
-; DAGISEL-NEXT:    scratch_store_b32 off, v40, s33 offset:4
-; DAGISEL-NEXT:    scratch_store_b32 off, v41, s33
-; DAGISEL-NEXT:    v_dual_mov_b32 v41, v2 :: v_dual_mov_b32 v40, v1
-; DAGISEL-NEXT:    v_add_nc_u32_e32 v1, 13, v0
-; DAGISEL-NEXT:    v_writelane_b32 v42, s30, 0
-; DAGISEL-NEXT:    s_mov_b32 s1, good_callee at abs32@hi
-; DAGISEL-NEXT:    s_mov_b32 s0, good_callee at abs32@lo
-; DAGISEL-NEXT:    s_add_co_i32 s32, s32, 16
-; DAGISEL-NEXT:    v_writelane_b32 v42, s31, 1
-; DAGISEL-NEXT:    s_wait_alu 0xfffe
-; DAGISEL-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-; DAGISEL-NEXT:    global_store_b32 v[40:41], v0, off
-; DAGISEL-NEXT:    s_clause 0x1
-; DAGISEL-NEXT:    scratch_load_b32 v41, off, s33
-; DAGISEL-NEXT:    scratch_load_b32 v40, off, s33 offset:4
-; DAGISEL-NEXT:    v_readlane_b32 s31, v42, 1
-; DAGISEL-NEXT:    v_readlane_b32 s30, v42, 0
-; DAGISEL-NEXT:    s_mov_b32 s32, s33
-; DAGISEL-NEXT:    v_readlane_b32 s0, v42, 2
-; DAGISEL-NEXT:    s_or_saveexec_b32 s1, -1
-; DAGISEL-NEXT:    scratch_load_b32 v42, off, s33 offset:8 ; 4-byte Folded Reload
-; DAGISEL-NEXT:    s_wait_alu 0xfffe
-; DAGISEL-NEXT:    s_mov_b32 exec_lo, s1
-; DAGISEL-NEXT:    s_mov_b32 s33, s0
-; DAGISEL-NEXT:    s_wait_loadcnt 0x0
-; DAGISEL-NEXT:    s_wait_alu 0xfffe
-; DAGISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GISEL-LABEL: basic_test:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GISEL-NEXT:    s_wait_expcnt 0x0
-; GISEL-NEXT:    s_wait_samplecnt 0x0
-; GISEL-NEXT:    s_wait_bvhcnt 0x0
-; GISEL-NEXT:    s_wait_kmcnt 0x0
-; GISEL-NEXT:    s_mov_b32 s0, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b32 s1, -1
-; GISEL-NEXT:    scratch_store_b32 off, v42, s33 offset:8 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_wait_alu 0xfffe
-; GISEL-NEXT:    s_mov_b32 exec_lo, s1
-; GISEL-NEXT:    v_writelane_b32 v42, s0, 2
-; GISEL-NEXT:    s_clause 0x1
-; GISEL-NEXT:    scratch_store_b32 off, v40, s33 offset:4
-; GISEL-NEXT:    scratch_store_b32 off, v41, s33
-; GISEL-NEXT:    v_dual_mov_b32 v40, v1 :: v_dual_mov_b32 v41, v2
-; GISEL-NEXT:    v_add_nc_u32_e32 v1, 13, v0
-; GISEL-NEXT:    v_writelane_b32 v42, s30, 0
-; GISEL-NEXT:    s_mov_b32 s0, good_callee at abs32@lo
-; GISEL-NEXT:    s_mov_b32 s1, good_callee at abs32@hi
-; GISEL-NEXT:    s_add_co_i32 s32, s32, 16
-; GISEL-NEXT:    v_writelane_b32 v42, s31, 1
-; GISEL-NEXT:    s_wait_alu 0xfffe
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-; GISEL-NEXT:    global_store_b32 v[40:41], v0, off
-; GISEL-NEXT:    s_clause 0x1
-; GISEL-NEXT:    scratch_load_b32 v41, off, s33
-; GISEL-NEXT:    scratch_load_b32 v40, off, s33 offset:4
-; GISEL-NEXT:    v_readlane_b32 s31, v42, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v42, 0
-; GISEL-NEXT:    s_mov_b32 s32, s33
-; GISEL-NEXT:    v_readlane_b32 s0, v42, 2
-; GISEL-NEXT:    s_or_saveexec_b32 s1, -1
-; GISEL-NEXT:    scratch_load_b32 v42, off, s33 offset:8 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_wait_alu 0xfffe
-; GISEL-NEXT:    s_mov_b32 exec_lo, s1
-; GISEL-NEXT:    s_mov_b32 s33, s0
-; GISEL-NEXT:    s_wait_loadcnt 0x0
-; GISEL-NEXT:    s_wait_alu 0xfffe
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-  %y = add i32 %x, 13
-  %ret = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @good_callee, i32 %x, i32 %y, i32 inreg %c)
-  store i32 %ret, ptr addrspace(1) %ptr
-  ret void
-}
-
-declare amdgpu_gfx_whole_wave void @void_callee(i1 %active, i32 %x)
-
-define amdgpu_gfx void @ret_void(i32 %x) {
-; DAGISEL-LABEL: ret_void:
-; DAGISEL:       ; %bb.0:
-; DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
-; DAGISEL-NEXT:    s_wait_expcnt 0x0
-; DAGISEL-NEXT:    s_wait_samplecnt 0x0
-; DAGISEL-NEXT:    s_wait_bvhcnt 0x0
-; DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; DAGISEL-NEXT:    s_mov_b32 s0, s33
-; DAGISEL-NEXT:    s_mov_b32 s33, s32
-; DAGISEL-NEXT:    s_or_saveexec_b32 s1, -1
-; DAGISEL-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
-; DAGISEL-NEXT:    s_wait_alu 0xfffe
-; DAGISEL-NEXT:    s_mov_b32 exec_lo, s1
-; DAGISEL-NEXT:    v_writelane_b32 v40, s0, 2
-; DAGISEL-NEXT:    s_mov_b32 s1, void_callee at abs32@hi
-; DAGISEL-NEXT:    s_mov_b32 s0, void_callee at abs32@lo
-; DAGISEL-NEXT:    s_add_co_i32 s32, s32, 16
-; DAGISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; DAGISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; DAGISEL-NEXT:    s_wait_alu 0xfffe
-; DAGISEL-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-; DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; DAGISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; DAGISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; DAGISEL-NEXT:    s_mov_b32 s32, s33
-; DAGISEL-NEXT:    v_readlane_b32 s0, v40, 2
-; DAGISEL-NEXT:    s_or_saveexec_b32 s1, -1
-; DAGISEL-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
-; DAGISEL-NEXT:    s_wait_alu 0xfffe
-; DAGISEL-NEXT:    s_mov_b32 exec_lo, s1
-; DAGISEL-NEXT:    s_mov_b32 s33, s0
-; DAGISEL-NEXT:    s_wait_loadcnt 0x0
-; DAGISEL-NEXT:    s_wait_alu 0xfffe
-; DAGISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GISEL-LABEL: ret_void:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GISEL-NEXT:    s_wait_expcnt 0x0
-; GISEL-NEXT:    s_wait_samplecnt 0x0
-; GISEL-NEXT:    s_wait_bvhcnt 0x0
-; GISEL-NEXT:    s_wait_kmcnt 0x0
-; GISEL-NEXT:    s_mov_b32 s0, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b32 s1, -1
-; GISEL-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_wait_alu 0xfffe
-; GISEL-NEXT:    s_mov_b32 exec_lo, s1
-; GISEL-NEXT:    v_writelane_b32 v40, s0, 2
-; GISEL-NEXT:    s_mov_b32 s0, void_callee at abs32@lo
-; GISEL-NEXT:    s_mov_b32 s1, void_callee at abs32@hi
-; GISEL-NEXT:    s_add_co_i32 s32, s32, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    s_wait_alu 0xfffe
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-; GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    s_mov_b32 s32, s33
-; GISEL-NEXT:    v_readlane_b32 s0, v40, 2
-; GISEL-NEXT:    s_or_saveexec_b32 s1, -1
-; GISEL-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_wait_alu 0xfffe
-; GISEL-NEXT:    s_mov_b32 exec_lo, s1
-; GISEL-NEXT:    s_mov_b32 s33, s0
-; GISEL-NEXT:    s_wait_loadcnt 0x0
-; GISEL-NEXT:    s_wait_alu 0xfffe
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-  call void(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @void_callee, i32 %x)
-  ret void
-}
-

diff  --git a/llvm/test/CodeGen/AMDGPU/irtranslator-whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/irtranslator-whole-wave-functions.ll
index 17c8010bcbe05..8fc5afb155573 100644
--- a/llvm/test/CodeGen/AMDGPU/irtranslator-whole-wave-functions.ll
+++ b/llvm/test/CodeGen/AMDGPU/irtranslator-whole-wave-functions.ll
@@ -101,29 +101,3 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) {
   %ret = call i64 @llvm.amdgcn.update.dpp.i64(i64 %x, i64 %y, i32 1, i32 1, i32 1, i1 false)
   ret i64 %ret
 }
-
-declare amdgpu_gfx_whole_wave i32 @callee(i1 %active, i32 %x)
-
-; Make sure we don't pass the first argument (i1).
-define amdgpu_cs void @call(i32 %x, ptr %p) {
-  ; CHECK-LABEL: name: call
-  ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
-  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK-NEXT:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee
-  ; CHECK-NEXT:   $vgpr0 = COPY [[COPY]](s32)
-  ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV1]](p0), @callee, csr_amdgpu_si_gfx, implicit $vgpr0, implicit-def $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK-NEXT:   G_STORE [[COPY3]](s32), [[MV]](p0) :: (store (s32) into %ir.p)
-  ; CHECK-NEXT:   S_ENDPGM 0
-  %ret = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @callee, i32 %x) convergent
-  store i32 %ret, ptr %p
-  ret void
-}

diff  --git a/llvm/test/CodeGen/AMDGPU/isel-whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/isel-whole-wave-functions.ll
index 69809b115e037..3450d63ff7b4a 100644
--- a/llvm/test/CodeGen/AMDGPU/isel-whole-wave-functions.ll
+++ b/llvm/test/CodeGen/AMDGPU/isel-whole-wave-functions.ll
@@ -189,79 +189,3 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) {
   ret i64 %ret
 }
 
-declare amdgpu_gfx_whole_wave i32 @callee(i1 %active, <8 x i32> %x)
-
-; Make sure we don't pass the first argument (i1).
-define amdgpu_cs void @call(<8 x i32> %x, ptr %p) {
-  ; DAGISEL-LABEL: name: call
-  ; DAGISEL: bb.0 (%ir-block.0):
-  ; DAGISEL-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
-  ; DAGISEL-NEXT: {{  $}}
-  ; DAGISEL-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr9
-  ; DAGISEL-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8
-  ; DAGISEL-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr7
-  ; DAGISEL-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr6
-  ; DAGISEL-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr5
-  ; DAGISEL-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4
-  ; DAGISEL-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; DAGISEL-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; DAGISEL-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; DAGISEL-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; DAGISEL-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; DAGISEL-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
-  ; DAGISEL-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
-  ; DAGISEL-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee
-  ; DAGISEL-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee
-  ; DAGISEL-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, killed [[S_MOV_B32_]], %subreg.sub1
-  ; DAGISEL-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
-  ; DAGISEL-NEXT:   $vgpr0 = COPY [[COPY9]]
-  ; DAGISEL-NEXT:   $vgpr1 = COPY [[COPY8]]
-  ; DAGISEL-NEXT:   $vgpr2 = COPY [[COPY7]]
-  ; DAGISEL-NEXT:   $vgpr3 = COPY [[COPY6]]
-  ; DAGISEL-NEXT:   $vgpr4 = COPY [[COPY5]]
-  ; DAGISEL-NEXT:   $vgpr5 = COPY [[COPY4]]
-  ; DAGISEL-NEXT:   $vgpr6 = COPY [[COPY3]]
-  ; DAGISEL-NEXT:   $vgpr7 = COPY [[COPY2]]
-  ; DAGISEL-NEXT:   $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], @callee, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit-def $vgpr0
-  ; DAGISEL-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
-  ; DAGISEL-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; DAGISEL-NEXT:   [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
-  ; DAGISEL-NEXT:   FLAT_STORE_DWORD killed [[COPY11]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.p)
-  ; DAGISEL-NEXT:   S_ENDPGM 0
-  ;
-  ; GISEL-LABEL: name: call
-  ; GISEL: bb.1 (%ir-block.0):
-  ; GISEL-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
-  ; GISEL-NEXT: {{  $}}
-  ; GISEL-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GISEL-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-  ; GISEL-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-  ; GISEL-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
-  ; GISEL-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
-  ; GISEL-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
-  ; GISEL-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
-  ; GISEL-NEXT:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7
-  ; GISEL-NEXT:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8
-  ; GISEL-NEXT:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr9
-  ; GISEL-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
-  ; GISEL-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
-  ; GISEL-NEXT:   $vgpr0 = COPY [[COPY]]
-  ; GISEL-NEXT:   $vgpr1 = COPY [[COPY1]]
-  ; GISEL-NEXT:   $vgpr2 = COPY [[COPY2]]
-  ; GISEL-NEXT:   $vgpr3 = COPY [[COPY3]]
-  ; GISEL-NEXT:   $vgpr4 = COPY [[COPY4]]
-  ; GISEL-NEXT:   $vgpr5 = COPY [[COPY5]]
-  ; GISEL-NEXT:   $vgpr6 = COPY [[COPY6]]
-  ; GISEL-NEXT:   $vgpr7 = COPY [[COPY7]]
-  ; GISEL-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee
-  ; GISEL-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee
-  ; GISEL-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
-  ; GISEL-NEXT:   $sgpr30_sgpr31 = SI_CALL [[REG_SEQUENCE1]], @callee, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit-def $vgpr0
-  ; GISEL-NEXT:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-  ; GISEL-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
-  ; GISEL-NEXT:   FLAT_STORE_DWORD [[REG_SEQUENCE]], [[COPY10]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.p)
-  ; GISEL-NEXT:   S_ENDPGM 0
-  %ret = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @callee, <8 x i32> %x) convergent
-  store i32 %ret, ptr %p
-  ret void
-}

diff  --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
index 36e8adb23f1f5..a13a68a665aee 100644
--- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
+++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
@@ -2412,1427 +2412,3 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2
   %ret = call amdgpu_gfx <2 x half>(<2 x half>, <2 x half>) @gfx_callee(<2 x half> %y, <2 x half> %x) convergent
   ret <2 x half> %ret
 }
-
-declare amdgpu_gfx_whole_wave float @callee(i1 %active, <8 x float> %x)
-
-define amdgpu_cs void @call_from_entry(<8 x float> %x, ptr %p) {
-; DAGISEL-LABEL: call_from_entry:
-; DAGISEL:       ; %bb.0:
-; DAGISEL-NEXT:    s_mov_b32 s1, callee at abs32@hi
-; DAGISEL-NEXT:    s_mov_b32 s0, callee at abs32@lo
-; DAGISEL-NEXT:    s_mov_b32 s32, 0
-; DAGISEL-NEXT:    v_dual_mov_b32 v41, v9 :: v_dual_mov_b32 v40, v8
-; DAGISEL-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-; DAGISEL-NEXT:    flat_store_b32 v[40:41], v0
-; DAGISEL-NEXT:    s_endpgm
-;
-; GISEL-LABEL: call_from_entry:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_mov_b32 s0, callee at abs32@lo
-; GISEL-NEXT:    s_mov_b32 s1, callee at abs32@hi
-; GISEL-NEXT:    s_mov_b32 s32, 0
-; GISEL-NEXT:    v_dual_mov_b32 v40, v8 :: v_dual_mov_b32 v41, v9
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-; GISEL-NEXT:    flat_store_b32 v[40:41], v0
-; GISEL-NEXT:    s_endpgm
-;
-; DAGISEL64-LABEL: call_from_entry:
-; DAGISEL64:       ; %bb.0:
-; DAGISEL64-NEXT:    s_mov_b32 s1, callee at abs32@hi
-; DAGISEL64-NEXT:    s_mov_b32 s0, callee at abs32@lo
-; DAGISEL64-NEXT:    s_mov_b32 s32, 0
-; DAGISEL64-NEXT:    v_mov_b32_e32 v41, v9
-; DAGISEL64-NEXT:    v_mov_b32_e32 v40, v8
-; DAGISEL64-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-; DAGISEL64-NEXT:    flat_store_b32 v[40:41], v0
-; DAGISEL64-NEXT:    s_endpgm
-;
-; GISEL64-LABEL: call_from_entry:
-; GISEL64:       ; %bb.0:
-; GISEL64-NEXT:    s_mov_b32 s0, callee at abs32@lo
-; GISEL64-NEXT:    s_mov_b32 s1, callee at abs32@hi
-; GISEL64-NEXT:    s_mov_b32 s32, 0
-; GISEL64-NEXT:    v_mov_b32_e32 v40, v8
-; GISEL64-NEXT:    v_mov_b32_e32 v41, v9
-; GISEL64-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-; GISEL64-NEXT:    flat_store_b32 v[40:41], v0
-; GISEL64-NEXT:    s_endpgm
-  %ret = call float(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @callee, <8 x float> %x) convergent
-  store float %ret, ptr %p
-  ret void
-}
-
-define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float> %x, ptr %p) {
-; DAGISEL-LABEL: call_from_whole_wave:
-; DAGISEL:       ; %bb.0:
-; DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
-; DAGISEL-NEXT:    s_wait_expcnt 0x0
-; DAGISEL-NEXT:    s_wait_samplecnt 0x0
-; DAGISEL-NEXT:    s_wait_bvhcnt 0x0
-; DAGISEL-NEXT:    s_wait_kmcnt 0x0
-; DAGISEL-NEXT:    s_mov_b32 s0, s33
-; DAGISEL-NEXT:    s_mov_b32 s33, s32
-; DAGISEL-NEXT:    s_xor_saveexec_b32 s4, -1
-; DAGISEL-NEXT:    s_clause 0x1f
-; DAGISEL-NEXT:    scratch_store_b32 off, v0, s33 offset:4
-; DAGISEL-NEXT:    scratch_store_b32 off, v1, s33 offset:8
-; DAGISEL-NEXT:    scratch_store_b32 off, v2, s33 offset:12
-; DAGISEL-NEXT:    scratch_store_b32 off, v3, s33 offset:16
-; DAGISEL-NEXT:    scratch_store_b32 off, v4, s33 offset:20
-; DAGISEL-NEXT:    scratch_store_b32 off, v5, s33 offset:24
-; DAGISEL-NEXT:    scratch_store_b32 off, v6, s33 offset:28
-; DAGISEL-NEXT:    scratch_store_b32 off, v7, s33 offset:32
-; DAGISEL-NEXT:    scratch_store_b32 off, v8, s33 offset:36
-; DAGISEL-NEXT:    scratch_store_b32 off, v9, s33 offset:40
-; DAGISEL-NEXT:    scratch_store_b32 off, v10, s33 offset:44
-; DAGISEL-NEXT:    scratch_store_b32 off, v11, s33 offset:48
-; DAGISEL-NEXT:    scratch_store_b32 off, v12, s33 offset:52
-; DAGISEL-NEXT:    scratch_store_b32 off, v13, s33 offset:56
-; DAGISEL-NEXT:    scratch_store_b32 off, v14, s33 offset:60
-; DAGISEL-NEXT:    scratch_store_b32 off, v15, s33 offset:64
-; DAGISEL-NEXT:    scratch_store_b32 off, v16, s33 offset:68
-; DAGISEL-NEXT:    scratch_store_b32 off, v17, s33 offset:72
-; DAGISEL-NEXT:    scratch_store_b32 off, v18, s33 offset:76
-; DAGISEL-NEXT:    scratch_store_b32 off, v19, s33 offset:80
-; DAGISEL-NEXT:    scratch_store_b32 off, v20, s33 offset:84
-; DAGISEL-NEXT:    scratch_store_b32 off, v21, s33 offset:88
-; DAGISEL-NEXT:    scratch_store_b32 off, v22, s33 offset:92
-; DAGISEL-NEXT:    scratch_store_b32 off, v23, s33 offset:96
-; DAGISEL-NEXT:    scratch_store_b32 off, v24, s33 offset:100
-; DAGISEL-NEXT:    scratch_store_b32 off, v25, s33 offset:104
-; DAGISEL-NEXT:    scratch_store_b32 off, v26, s33 offset:108
-; DAGISEL-NEXT:    scratch_store_b32 off, v27, s33 offset:112
-; DAGISEL-NEXT:    scratch_store_b32 off, v28, s33 offset:116
-; DAGISEL-NEXT:    scratch_store_b32 off, v29, s33 offset:120
-; DAGISEL-NEXT:    scratch_store_b32 off, v30, s33 offset:124
-; DAGISEL-NEXT:    scratch_store_b32 off, v31, s33 offset:128
-; DAGISEL-NEXT:    s_clause 0x1f
-; DAGISEL-NEXT:    scratch_store_b32 off, v32, s33 offset:132
-; DAGISEL-NEXT:    scratch_store_b32 off, v33, s33 offset:136
-; DAGISEL-NEXT:    scratch_store_b32 off, v34, s33 offset:140
-; DAGISEL-NEXT:    scratch_store_b32 off, v35, s33 offset:144
-; DAGISEL-NEXT:    scratch_store_b32 off, v36, s33 offset:148
-; DAGISEL-NEXT:    scratch_store_b32 off, v37, s33 offset:152
-; DAGISEL-NEXT:    scratch_store_b32 off, v38, s33 offset:156
-; DAGISEL-NEXT:    scratch_store_b32 off, v39, s33 offset:160
-; DAGISEL-NEXT:    scratch_store_b32 off, v48, s33 offset:172
-; DAGISEL-NEXT:    scratch_store_b32 off, v49, s33 offset:176
-; DAGISEL-NEXT:    scratch_store_b32 off, v50, s33 offset:180
-; DAGISEL-NEXT:    scratch_store_b32 off, v51, s33 offset:184
-; DAGISEL-NEXT:    scratch_store_b32 off, v52, s33 offset:188
-; DAGISEL-NEXT:    scratch_store_b32 off, v53, s33 offset:192
-; DAGISEL-NEXT:    scratch_store_b32 off, v54, s33 offset:196
-; DAGISEL-NEXT:    scratch_store_b32 off, v55, s33 offset:200
-; DAGISEL-NEXT:    scratch_store_b32 off, v64, s33 offset:204
-; DAGISEL-NEXT:    scratch_store_b32 off, v65, s33 offset:208
-; DAGISEL-NEXT:    scratch_store_b32 off, v66, s33 offset:212
-; DAGISEL-NEXT:    scratch_store_b32 off, v67, s33 offset:216
-; DAGISEL-NEXT:    scratch_store_b32 off, v68, s33 offset:220
-; DAGISEL-NEXT:    scratch_store_b32 off, v69, s33 offset:224
-; DAGISEL-NEXT:    scratch_store_b32 off, v70, s33 offset:228
-; DAGISEL-NEXT:    scratch_store_b32 off, v71, s33 offset:232
-; DAGISEL-NEXT:    scratch_store_b32 off, v80, s33 offset:236
-; DAGISEL-NEXT:    scratch_store_b32 off, v81, s33 offset:240
-; DAGISEL-NEXT:    scratch_store_b32 off, v82, s33 offset:244
-; DAGISEL-NEXT:    scratch_store_b32 off, v83, s33 offset:248
-; DAGISEL-NEXT:    scratch_store_b32 off, v84, s33 offset:252
-; DAGISEL-NEXT:    scratch_store_b32 off, v85, s33 offset:256
-; DAGISEL-NEXT:    scratch_store_b32 off, v86, s33 offset:260
-; DAGISEL-NEXT:    scratch_store_b32 off, v87, s33 offset:264
-; DAGISEL-NEXT:    s_clause 0x1f
-; DAGISEL-NEXT:    scratch_store_b32 off, v96, s33 offset:268
-; DAGISEL-NEXT:    scratch_store_b32 off, v97, s33 offset:272
-; DAGISEL-NEXT:    scratch_store_b32 off, v98, s33 offset:276
-; DAGISEL-NEXT:    scratch_store_b32 off, v99, s33 offset:280
-; DAGISEL-NEXT:    scratch_store_b32 off, v100, s33 offset:284
-; DAGISEL-NEXT:    scratch_store_b32 off, v101, s33 offset:288
-; DAGISEL-NEXT:    scratch_store_b32 off, v102, s33 offset:292
-; DAGISEL-NEXT:    scratch_store_b32 off, v103, s33 offset:296
-; DAGISEL-NEXT:    scratch_store_b32 off, v112, s33 offset:300
-; DAGISEL-NEXT:    scratch_store_b32 off, v113, s33 offset:304
-; DAGISEL-NEXT:    scratch_store_b32 off, v114, s33 offset:308
-; DAGISEL-NEXT:    scratch_store_b32 off, v115, s33 offset:312
-; DAGISEL-NEXT:    scratch_store_b32 off, v116, s33 offset:316
-; DAGISEL-NEXT:    scratch_store_b32 off, v117, s33 offset:320
-; DAGISEL-NEXT:    scratch_store_b32 off, v118, s33 offset:324
-; DAGISEL-NEXT:    scratch_store_b32 off, v119, s33 offset:328
-; DAGISEL-NEXT:    scratch_store_b32 off, v128, s33 offset:332
-; DAGISEL-NEXT:    scratch_store_b32 off, v129, s33 offset:336
-; DAGISEL-NEXT:    scratch_store_b32 off, v130, s33 offset:340
-; DAGISEL-NEXT:    scratch_store_b32 off, v131, s33 offset:344
-; DAGISEL-NEXT:    scratch_store_b32 off, v132, s33 offset:348
-; DAGISEL-NEXT:    scratch_store_b32 off, v133, s33 offset:352
-; DAGISEL-NEXT:    scratch_store_b32 off, v134, s33 offset:356
-; DAGISEL-NEXT:    scratch_store_b32 off, v135, s33 offset:360
-; DAGISEL-NEXT:    scratch_store_b32 off, v144, s33 offset:364
-; DAGISEL-NEXT:    scratch_store_b32 off, v145, s33 offset:368
-; DAGISEL-NEXT:    scratch_store_b32 off, v146, s33 offset:372
-; DAGISEL-NEXT:    scratch_store_b32 off, v147, s33 offset:376
-; DAGISEL-NEXT:    scratch_store_b32 off, v148, s33 offset:380
-; DAGISEL-NEXT:    scratch_store_b32 off, v149, s33 offset:384
-; DAGISEL-NEXT:    scratch_store_b32 off, v150, s33 offset:388
-; DAGISEL-NEXT:    scratch_store_b32 off, v151, s33 offset:392
-; DAGISEL-NEXT:    s_clause 0x1f
-; DAGISEL-NEXT:    scratch_store_b32 off, v160, s33 offset:396
-; DAGISEL-NEXT:    scratch_store_b32 off, v161, s33 offset:400
-; DAGISEL-NEXT:    scratch_store_b32 off, v162, s33 offset:404
-; DAGISEL-NEXT:    scratch_store_b32 off, v163, s33 offset:408
-; DAGISEL-NEXT:    scratch_store_b32 off, v164, s33 offset:412
-; DAGISEL-NEXT:    scratch_store_b32 off, v165, s33 offset:416
-; DAGISEL-NEXT:    scratch_store_b32 off, v166, s33 offset:420
-; DAGISEL-NEXT:    scratch_store_b32 off, v167, s33 offset:424
-; DAGISEL-NEXT:    scratch_store_b32 off, v176, s33 offset:428
-; DAGISEL-NEXT:    scratch_store_b32 off, v177, s33 offset:432
-; DAGISEL-NEXT:    scratch_store_b32 off, v178, s33 offset:436
-; DAGISEL-NEXT:    scratch_store_b32 off, v179, s33 offset:440
-; DAGISEL-NEXT:    scratch_store_b32 off, v180, s33 offset:444
-; DAGISEL-NEXT:    scratch_store_b32 off, v181, s33 offset:448
-; DAGISEL-NEXT:    scratch_store_b32 off, v182, s33 offset:452
-; DAGISEL-NEXT:    scratch_store_b32 off, v183, s33 offset:456
-; DAGISEL-NEXT:    scratch_store_b32 off, v192, s33 offset:460
-; DAGISEL-NEXT:    scratch_store_b32 off, v193, s33 offset:464
-; DAGISEL-NEXT:    scratch_store_b32 off, v194, s33 offset:468
-; DAGISEL-NEXT:    scratch_store_b32 off, v195, s33 offset:472
-; DAGISEL-NEXT:    scratch_store_b32 off, v196, s33 offset:476
-; DAGISEL-NEXT:    scratch_store_b32 off, v197, s33 offset:480
-; DAGISEL-NEXT:    scratch_store_b32 off, v198, s33 offset:484
-; DAGISEL-NEXT:    scratch_store_b32 off, v199, s33 offset:488
-; DAGISEL-NEXT:    scratch_store_b32 off, v208, s33 offset:492
-; DAGISEL-NEXT:    scratch_store_b32 off, v209, s33 offset:496
-; DAGISEL-NEXT:    scratch_store_b32 off, v210, s33 offset:500
-; DAGISEL-NEXT:    scratch_store_b32 off, v211, s33 offset:504
-; DAGISEL-NEXT:    scratch_store_b32 off, v212, s33 offset:508
-; DAGISEL-NEXT:    scratch_store_b32 off, v213, s33 offset:512
-; DAGISEL-NEXT:    scratch_store_b32 off, v214, s33 offset:516
-; DAGISEL-NEXT:    scratch_store_b32 off, v215, s33 offset:520
-; DAGISEL-NEXT:    s_clause 0xf
-; DAGISEL-NEXT:    scratch_store_b32 off, v224, s33 offset:524
-; DAGISEL-NEXT:    scratch_store_b32 off, v225, s33 offset:528
-; DAGISEL-NEXT:    scratch_store_b32 off, v226, s33 offset:532
-; DAGISEL-NEXT:    scratch_store_b32 off, v227, s33 offset:536
-; DAGISEL-NEXT:    scratch_store_b32 off, v228, s33 offset:540
-; DAGISEL-NEXT:    scratch_store_b32 off, v229, s33 offset:544
-; DAGISEL-NEXT:    scratch_store_b32 off, v230, s33 offset:548
-; DAGISEL-NEXT:    scratch_store_b32 off, v231, s33 offset:552
-; DAGISEL-NEXT:    scratch_store_b32 off, v240, s33 offset:556
-; DAGISEL-NEXT:    scratch_store_b32 off, v241, s33 offset:560
-; DAGISEL-NEXT:    scratch_store_b32 off, v242, s33 offset:564
-; DAGISEL-NEXT:    scratch_store_b32 off, v243, s33 offset:568
-; DAGISEL-NEXT:    scratch_store_b32 off, v244, s33 offset:572
-; DAGISEL-NEXT:    scratch_store_b32 off, v245, s33 offset:576
-; DAGISEL-NEXT:    scratch_store_b32 off, v246, s33 offset:580
-; DAGISEL-NEXT:    scratch_store_b32 off, v247, s33 offset:584
-; DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
-; DAGISEL-NEXT:    s_clause 0x2
-; DAGISEL-NEXT:    scratch_store_b32 off, v42, s33
-; DAGISEL-NEXT:    scratch_store_b32 off, v40, s33 offset:164
-; DAGISEL-NEXT:    scratch_store_b32 off, v41, s33 offset:168
-; DAGISEL-NEXT:    s_wait_alu 0xfffe
-; DAGISEL-NEXT:    v_writelane_b32 v42, s0, 3
-; DAGISEL-NEXT:    s_mov_b32 s1, callee at abs32@hi
-; DAGISEL-NEXT:    s_mov_b32 s0, callee at abs32@lo
-; DAGISEL-NEXT:    s_addk_co_i32 s32, 0x250
-; DAGISEL-NEXT:    v_dual_mov_b32 v41, v9 :: v_dual_mov_b32 v40, v8
-; DAGISEL-NEXT:    v_writelane_b32 v42, s4, 0
-; DAGISEL-NEXT:    v_writelane_b32 v42, s30, 1
-; DAGISEL-NEXT:    v_writelane_b32 v42, s31, 2
-; DAGISEL-NEXT:    s_wait_alu 0xfffe
-; DAGISEL-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-; DAGISEL-NEXT:    flat_store_b32 v[40:41], v0
-; DAGISEL-NEXT:    v_readlane_b32 s31, v42, 2
-; DAGISEL-NEXT:    v_readlane_b32 s30, v42, 1
-; DAGISEL-NEXT:    v_readlane_b32 s4, v42, 0
-; DAGISEL-NEXT:    v_readlane_b32 s0, v42, 3
-; DAGISEL-NEXT:    s_clause 0x2
-; DAGISEL-NEXT:    scratch_load_b32 v42, off, s33
-; DAGISEL-NEXT:    scratch_load_b32 v40, off, s33 offset:164
-; DAGISEL-NEXT:    scratch_load_b32 v41, off, s33 offset:168
-; DAGISEL-NEXT:    s_mov_b32 s32, s33
-; DAGISEL-NEXT:    s_xor_b32 exec_lo, s4, -1
-; DAGISEL-NEXT:    s_clause 0x1f
-; DAGISEL-NEXT:    scratch_load_b32 v0, off, s33 offset:4
-; DAGISEL-NEXT:    scratch_load_b32 v1, off, s33 offset:8
-; DAGISEL-NEXT:    scratch_load_b32 v2, off, s33 offset:12
-; DAGISEL-NEXT:    scratch_load_b32 v3, off, s33 offset:16
-; DAGISEL-NEXT:    scratch_load_b32 v4, off, s33 offset:20
-; DAGISEL-NEXT:    scratch_load_b32 v5, off, s33 offset:24
-; DAGISEL-NEXT:    scratch_load_b32 v6, off, s33 offset:28
-; DAGISEL-NEXT:    scratch_load_b32 v7, off, s33 offset:32
-; DAGISEL-NEXT:    scratch_load_b32 v8, off, s33 offset:36
-; DAGISEL-NEXT:    scratch_load_b32 v9, off, s33 offset:40
-; DAGISEL-NEXT:    scratch_load_b32 v10, off, s33 offset:44
-; DAGISEL-NEXT:    scratch_load_b32 v11, off, s33 offset:48
-; DAGISEL-NEXT:    scratch_load_b32 v12, off, s33 offset:52
-; DAGISEL-NEXT:    scratch_load_b32 v13, off, s33 offset:56
-; DAGISEL-NEXT:    scratch_load_b32 v14, off, s33 offset:60
-; DAGISEL-NEXT:    scratch_load_b32 v15, off, s33 offset:64
-; DAGISEL-NEXT:    scratch_load_b32 v16, off, s33 offset:68
-; DAGISEL-NEXT:    scratch_load_b32 v17, off, s33 offset:72
-; DAGISEL-NEXT:    scratch_load_b32 v18, off, s33 offset:76
-; DAGISEL-NEXT:    scratch_load_b32 v19, off, s33 offset:80
-; DAGISEL-NEXT:    scratch_load_b32 v20, off, s33 offset:84
-; DAGISEL-NEXT:    scratch_load_b32 v21, off, s33 offset:88
-; DAGISEL-NEXT:    scratch_load_b32 v22, off, s33 offset:92
-; DAGISEL-NEXT:    scratch_load_b32 v23, off, s33 offset:96
-; DAGISEL-NEXT:    scratch_load_b32 v24, off, s33 offset:100
-; DAGISEL-NEXT:    scratch_load_b32 v25, off, s33 offset:104
-; DAGISEL-NEXT:    scratch_load_b32 v26, off, s33 offset:108
-; DAGISEL-NEXT:    scratch_load_b32 v27, off, s33 offset:112
-; DAGISEL-NEXT:    scratch_load_b32 v28, off, s33 offset:116
-; DAGISEL-NEXT:    scratch_load_b32 v29, off, s33 offset:120
-; DAGISEL-NEXT:    scratch_load_b32 v30, off, s33 offset:124
-; DAGISEL-NEXT:    scratch_load_b32 v31, off, s33 offset:128
-; DAGISEL-NEXT:    s_clause 0x1f
-; DAGISEL-NEXT:    scratch_load_b32 v32, off, s33 offset:132
-; DAGISEL-NEXT:    scratch_load_b32 v33, off, s33 offset:136
-; DAGISEL-NEXT:    scratch_load_b32 v34, off, s33 offset:140
-; DAGISEL-NEXT:    scratch_load_b32 v35, off, s33 offset:144
-; DAGISEL-NEXT:    scratch_load_b32 v36, off, s33 offset:148
-; DAGISEL-NEXT:    scratch_load_b32 v37, off, s33 offset:152
-; DAGISEL-NEXT:    scratch_load_b32 v38, off, s33 offset:156
-; DAGISEL-NEXT:    scratch_load_b32 v39, off, s33 offset:160
-; DAGISEL-NEXT:    scratch_load_b32 v48, off, s33 offset:172
-; DAGISEL-NEXT:    scratch_load_b32 v49, off, s33 offset:176
-; DAGISEL-NEXT:    scratch_load_b32 v50, off, s33 offset:180
-; DAGISEL-NEXT:    scratch_load_b32 v51, off, s33 offset:184
-; DAGISEL-NEXT:    scratch_load_b32 v52, off, s33 offset:188
-; DAGISEL-NEXT:    scratch_load_b32 v53, off, s33 offset:192
-; DAGISEL-NEXT:    scratch_load_b32 v54, off, s33 offset:196
-; DAGISEL-NEXT:    scratch_load_b32 v55, off, s33 offset:200
-; DAGISEL-NEXT:    scratch_load_b32 v64, off, s33 offset:204
-; DAGISEL-NEXT:    scratch_load_b32 v65, off, s33 offset:208
-; DAGISEL-NEXT:    scratch_load_b32 v66, off, s33 offset:212
-; DAGISEL-NEXT:    scratch_load_b32 v67, off, s33 offset:216
-; DAGISEL-NEXT:    scratch_load_b32 v68, off, s33 offset:220
-; DAGISEL-NEXT:    scratch_load_b32 v69, off, s33 offset:224
-; DAGISEL-NEXT:    scratch_load_b32 v70, off, s33 offset:228
-; DAGISEL-NEXT:    scratch_load_b32 v71, off, s33 offset:232
-; DAGISEL-NEXT:    scratch_load_b32 v80, off, s33 offset:236
-; DAGISEL-NEXT:    scratch_load_b32 v81, off, s33 offset:240
-; DAGISEL-NEXT:    scratch_load_b32 v82, off, s33 offset:244
-; DAGISEL-NEXT:    scratch_load_b32 v83, off, s33 offset:248
-; DAGISEL-NEXT:    scratch_load_b32 v84, off, s33 offset:252
-; DAGISEL-NEXT:    scratch_load_b32 v85, off, s33 offset:256
-; DAGISEL-NEXT:    scratch_load_b32 v86, off, s33 offset:260
-; DAGISEL-NEXT:    scratch_load_b32 v87, off, s33 offset:264
-; DAGISEL-NEXT:    s_clause 0x1f
-; DAGISEL-NEXT:    scratch_load_b32 v96, off, s33 offset:268
-; DAGISEL-NEXT:    scratch_load_b32 v97, off, s33 offset:272
-; DAGISEL-NEXT:    scratch_load_b32 v98, off, s33 offset:276
-; DAGISEL-NEXT:    scratch_load_b32 v99, off, s33 offset:280
-; DAGISEL-NEXT:    scratch_load_b32 v100, off, s33 offset:284
-; DAGISEL-NEXT:    scratch_load_b32 v101, off, s33 offset:288
-; DAGISEL-NEXT:    scratch_load_b32 v102, off, s33 offset:292
-; DAGISEL-NEXT:    scratch_load_b32 v103, off, s33 offset:296
-; DAGISEL-NEXT:    scratch_load_b32 v112, off, s33 offset:300
-; DAGISEL-NEXT:    scratch_load_b32 v113, off, s33 offset:304
-; DAGISEL-NEXT:    scratch_load_b32 v114, off, s33 offset:308
-; DAGISEL-NEXT:    scratch_load_b32 v115, off, s33 offset:312
-; DAGISEL-NEXT:    scratch_load_b32 v116, off, s33 offset:316
-; DAGISEL-NEXT:    scratch_load_b32 v117, off, s33 offset:320
-; DAGISEL-NEXT:    scratch_load_b32 v118, off, s33 offset:324
-; DAGISEL-NEXT:    scratch_load_b32 v119, off, s33 offset:328
-; DAGISEL-NEXT:    scratch_load_b32 v128, off, s33 offset:332
-; DAGISEL-NEXT:    scratch_load_b32 v129, off, s33 offset:336
-; DAGISEL-NEXT:    scratch_load_b32 v130, off, s33 offset:340
-; DAGISEL-NEXT:    scratch_load_b32 v131, off, s33 offset:344
-; DAGISEL-NEXT:    scratch_load_b32 v132, off, s33 offset:348
-; DAGISEL-NEXT:    scratch_load_b32 v133, off, s33 offset:352
-; DAGISEL-NEXT:    scratch_load_b32 v134, off, s33 offset:356
-; DAGISEL-NEXT:    scratch_load_b32 v135, off, s33 offset:360
-; DAGISEL-NEXT:    scratch_load_b32 v144, off, s33 offset:364
-; DAGISEL-NEXT:    scratch_load_b32 v145, off, s33 offset:368
-; DAGISEL-NEXT:    scratch_load_b32 v146, off, s33 offset:372
-; DAGISEL-NEXT:    scratch_load_b32 v147, off, s33 offset:376
-; DAGISEL-NEXT:    scratch_load_b32 v148, off, s33 offset:380
-; DAGISEL-NEXT:    scratch_load_b32 v149, off, s33 offset:384
-; DAGISEL-NEXT:    scratch_load_b32 v150, off, s33 offset:388
-; DAGISEL-NEXT:    scratch_load_b32 v151, off, s33 offset:392
-; DAGISEL-NEXT:    s_clause 0x1f
-; DAGISEL-NEXT:    scratch_load_b32 v160, off, s33 offset:396
-; DAGISEL-NEXT:    scratch_load_b32 v161, off, s33 offset:400
-; DAGISEL-NEXT:    scratch_load_b32 v162, off, s33 offset:404
-; DAGISEL-NEXT:    scratch_load_b32 v163, off, s33 offset:408
-; DAGISEL-NEXT:    scratch_load_b32 v164, off, s33 offset:412
-; DAGISEL-NEXT:    scratch_load_b32 v165, off, s33 offset:416
-; DAGISEL-NEXT:    scratch_load_b32 v166, off, s33 offset:420
-; DAGISEL-NEXT:    scratch_load_b32 v167, off, s33 offset:424
-; DAGISEL-NEXT:    scratch_load_b32 v176, off, s33 offset:428
-; DAGISEL-NEXT:    scratch_load_b32 v177, off, s33 offset:432
-; DAGISEL-NEXT:    scratch_load_b32 v178, off, s33 offset:436
-; DAGISEL-NEXT:    scratch_load_b32 v179, off, s33 offset:440
-; DAGISEL-NEXT:    scratch_load_b32 v180, off, s33 offset:444
-; DAGISEL-NEXT:    scratch_load_b32 v181, off, s33 offset:448
-; DAGISEL-NEXT:    scratch_load_b32 v182, off, s33 offset:452
-; DAGISEL-NEXT:    scratch_load_b32 v183, off, s33 offset:456
-; DAGISEL-NEXT:    scratch_load_b32 v192, off, s33 offset:460
-; DAGISEL-NEXT:    scratch_load_b32 v193, off, s33 offset:464
-; DAGISEL-NEXT:    scratch_load_b32 v194, off, s33 offset:468
-; DAGISEL-NEXT:    scratch_load_b32 v195, off, s33 offset:472
-; DAGISEL-NEXT:    scratch_load_b32 v196, off, s33 offset:476
-; DAGISEL-NEXT:    scratch_load_b32 v197, off, s33 offset:480
-; DAGISEL-NEXT:    scratch_load_b32 v198, off, s33 offset:484
-; DAGISEL-NEXT:    scratch_load_b32 v199, off, s33 offset:488
-; DAGISEL-NEXT:    scratch_load_b32 v208, off, s33 offset:492
-; DAGISEL-NEXT:    scratch_load_b32 v209, off, s33 offset:496
-; DAGISEL-NEXT:    scratch_load_b32 v210, off, s33 offset:500
-; DAGISEL-NEXT:    scratch_load_b32 v211, off, s33 offset:504
-; DAGISEL-NEXT:    scratch_load_b32 v212, off, s33 offset:508
-; DAGISEL-NEXT:    scratch_load_b32 v213, off, s33 offset:512
-; DAGISEL-NEXT:    scratch_load_b32 v214, off, s33 offset:516
-; DAGISEL-NEXT:    scratch_load_b32 v215, off, s33 offset:520
-; DAGISEL-NEXT:    s_clause 0xf
-; DAGISEL-NEXT:    scratch_load_b32 v224, off, s33 offset:524
-; DAGISEL-NEXT:    scratch_load_b32 v225, off, s33 offset:528
-; DAGISEL-NEXT:    scratch_load_b32 v226, off, s33 offset:532
-; DAGISEL-NEXT:    scratch_load_b32 v227, off, s33 offset:536
-; DAGISEL-NEXT:    scratch_load_b32 v228, off, s33 offset:540
-; DAGISEL-NEXT:    scratch_load_b32 v229, off, s33 offset:544
-; DAGISEL-NEXT:    scratch_load_b32 v230, off, s33 offset:548
-; DAGISEL-NEXT:    scratch_load_b32 v231, off, s33 offset:552
-; DAGISEL-NEXT:    scratch_load_b32 v240, off, s33 offset:556
-; DAGISEL-NEXT:    scratch_load_b32 v241, off, s33 offset:560
-; DAGISEL-NEXT:    scratch_load_b32 v242, off, s33 offset:564
-; DAGISEL-NEXT:    scratch_load_b32 v243, off, s33 offset:568
-; DAGISEL-NEXT:    scratch_load_b32 v244, off, s33 offset:572
-; DAGISEL-NEXT:    scratch_load_b32 v245, off, s33 offset:576
-; DAGISEL-NEXT:    scratch_load_b32 v246, off, s33 offset:580
-; DAGISEL-NEXT:    scratch_load_b32 v247, off, s33 offset:584
-; DAGISEL-NEXT:    s_mov_b32 exec_lo, s4
-; DAGISEL-NEXT:    s_mov_b32 s33, s0
-; DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
-; DAGISEL-NEXT:    s_wait_alu 0xfffe
-; DAGISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GISEL-LABEL: call_from_whole_wave:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GISEL-NEXT:    s_wait_expcnt 0x0
-; GISEL-NEXT:    s_wait_samplecnt 0x0
-; GISEL-NEXT:    s_wait_bvhcnt 0x0
-; GISEL-NEXT:    s_wait_kmcnt 0x0
-; GISEL-NEXT:    s_mov_b32 s0, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_xor_saveexec_b32 s4, -1
-; GISEL-NEXT:    s_clause 0x1f
-; GISEL-NEXT:    scratch_store_b32 off, v0, s33 offset:4
-; GISEL-NEXT:    scratch_store_b32 off, v1, s33 offset:8
-; GISEL-NEXT:    scratch_store_b32 off, v2, s33 offset:12
-; GISEL-NEXT:    scratch_store_b32 off, v3, s33 offset:16
-; GISEL-NEXT:    scratch_store_b32 off, v4, s33 offset:20
-; GISEL-NEXT:    scratch_store_b32 off, v5, s33 offset:24
-; GISEL-NEXT:    scratch_store_b32 off, v6, s33 offset:28
-; GISEL-NEXT:    scratch_store_b32 off, v7, s33 offset:32
-; GISEL-NEXT:    scratch_store_b32 off, v8, s33 offset:36
-; GISEL-NEXT:    scratch_store_b32 off, v9, s33 offset:40
-; GISEL-NEXT:    scratch_store_b32 off, v10, s33 offset:44
-; GISEL-NEXT:    scratch_store_b32 off, v11, s33 offset:48
-; GISEL-NEXT:    scratch_store_b32 off, v12, s33 offset:52
-; GISEL-NEXT:    scratch_store_b32 off, v13, s33 offset:56
-; GISEL-NEXT:    scratch_store_b32 off, v14, s33 offset:60
-; GISEL-NEXT:    scratch_store_b32 off, v15, s33 offset:64
-; GISEL-NEXT:    scratch_store_b32 off, v16, s33 offset:68
-; GISEL-NEXT:    scratch_store_b32 off, v17, s33 offset:72
-; GISEL-NEXT:    scratch_store_b32 off, v18, s33 offset:76
-; GISEL-NEXT:    scratch_store_b32 off, v19, s33 offset:80
-; GISEL-NEXT:    scratch_store_b32 off, v20, s33 offset:84
-; GISEL-NEXT:    scratch_store_b32 off, v21, s33 offset:88
-; GISEL-NEXT:    scratch_store_b32 off, v22, s33 offset:92
-; GISEL-NEXT:    scratch_store_b32 off, v23, s33 offset:96
-; GISEL-NEXT:    scratch_store_b32 off, v24, s33 offset:100
-; GISEL-NEXT:    scratch_store_b32 off, v25, s33 offset:104
-; GISEL-NEXT:    scratch_store_b32 off, v26, s33 offset:108
-; GISEL-NEXT:    scratch_store_b32 off, v27, s33 offset:112
-; GISEL-NEXT:    scratch_store_b32 off, v28, s33 offset:116
-; GISEL-NEXT:    scratch_store_b32 off, v29, s33 offset:120
-; GISEL-NEXT:    scratch_store_b32 off, v30, s33 offset:124
-; GISEL-NEXT:    scratch_store_b32 off, v31, s33 offset:128
-; GISEL-NEXT:    s_clause 0x1f
-; GISEL-NEXT:    scratch_store_b32 off, v32, s33 offset:132
-; GISEL-NEXT:    scratch_store_b32 off, v33, s33 offset:136
-; GISEL-NEXT:    scratch_store_b32 off, v34, s33 offset:140
-; GISEL-NEXT:    scratch_store_b32 off, v35, s33 offset:144
-; GISEL-NEXT:    scratch_store_b32 off, v36, s33 offset:148
-; GISEL-NEXT:    scratch_store_b32 off, v37, s33 offset:152
-; GISEL-NEXT:    scratch_store_b32 off, v38, s33 offset:156
-; GISEL-NEXT:    scratch_store_b32 off, v39, s33 offset:160
-; GISEL-NEXT:    scratch_store_b32 off, v48, s33 offset:172
-; GISEL-NEXT:    scratch_store_b32 off, v49, s33 offset:176
-; GISEL-NEXT:    scratch_store_b32 off, v50, s33 offset:180
-; GISEL-NEXT:    scratch_store_b32 off, v51, s33 offset:184
-; GISEL-NEXT:    scratch_store_b32 off, v52, s33 offset:188
-; GISEL-NEXT:    scratch_store_b32 off, v53, s33 offset:192
-; GISEL-NEXT:    scratch_store_b32 off, v54, s33 offset:196
-; GISEL-NEXT:    scratch_store_b32 off, v55, s33 offset:200
-; GISEL-NEXT:    scratch_store_b32 off, v64, s33 offset:204
-; GISEL-NEXT:    scratch_store_b32 off, v65, s33 offset:208
-; GISEL-NEXT:    scratch_store_b32 off, v66, s33 offset:212
-; GISEL-NEXT:    scratch_store_b32 off, v67, s33 offset:216
-; GISEL-NEXT:    scratch_store_b32 off, v68, s33 offset:220
-; GISEL-NEXT:    scratch_store_b32 off, v69, s33 offset:224
-; GISEL-NEXT:    scratch_store_b32 off, v70, s33 offset:228
-; GISEL-NEXT:    scratch_store_b32 off, v71, s33 offset:232
-; GISEL-NEXT:    scratch_store_b32 off, v80, s33 offset:236
-; GISEL-NEXT:    scratch_store_b32 off, v81, s33 offset:240
-; GISEL-NEXT:    scratch_store_b32 off, v82, s33 offset:244
-; GISEL-NEXT:    scratch_store_b32 off, v83, s33 offset:248
-; GISEL-NEXT:    scratch_store_b32 off, v84, s33 offset:252
-; GISEL-NEXT:    scratch_store_b32 off, v85, s33 offset:256
-; GISEL-NEXT:    scratch_store_b32 off, v86, s33 offset:260
-; GISEL-NEXT:    scratch_store_b32 off, v87, s33 offset:264
-; GISEL-NEXT:    s_clause 0x1f
-; GISEL-NEXT:    scratch_store_b32 off, v96, s33 offset:268
-; GISEL-NEXT:    scratch_store_b32 off, v97, s33 offset:272
-; GISEL-NEXT:    scratch_store_b32 off, v98, s33 offset:276
-; GISEL-NEXT:    scratch_store_b32 off, v99, s33 offset:280
-; GISEL-NEXT:    scratch_store_b32 off, v100, s33 offset:284
-; GISEL-NEXT:    scratch_store_b32 off, v101, s33 offset:288
-; GISEL-NEXT:    scratch_store_b32 off, v102, s33 offset:292
-; GISEL-NEXT:    scratch_store_b32 off, v103, s33 offset:296
-; GISEL-NEXT:    scratch_store_b32 off, v112, s33 offset:300
-; GISEL-NEXT:    scratch_store_b32 off, v113, s33 offset:304
-; GISEL-NEXT:    scratch_store_b32 off, v114, s33 offset:308
-; GISEL-NEXT:    scratch_store_b32 off, v115, s33 offset:312
-; GISEL-NEXT:    scratch_store_b32 off, v116, s33 offset:316
-; GISEL-NEXT:    scratch_store_b32 off, v117, s33 offset:320
-; GISEL-NEXT:    scratch_store_b32 off, v118, s33 offset:324
-; GISEL-NEXT:    scratch_store_b32 off, v119, s33 offset:328
-; GISEL-NEXT:    scratch_store_b32 off, v128, s33 offset:332
-; GISEL-NEXT:    scratch_store_b32 off, v129, s33 offset:336
-; GISEL-NEXT:    scratch_store_b32 off, v130, s33 offset:340
-; GISEL-NEXT:    scratch_store_b32 off, v131, s33 offset:344
-; GISEL-NEXT:    scratch_store_b32 off, v132, s33 offset:348
-; GISEL-NEXT:    scratch_store_b32 off, v133, s33 offset:352
-; GISEL-NEXT:    scratch_store_b32 off, v134, s33 offset:356
-; GISEL-NEXT:    scratch_store_b32 off, v135, s33 offset:360
-; GISEL-NEXT:    scratch_store_b32 off, v144, s33 offset:364
-; GISEL-NEXT:    scratch_store_b32 off, v145, s33 offset:368
-; GISEL-NEXT:    scratch_store_b32 off, v146, s33 offset:372
-; GISEL-NEXT:    scratch_store_b32 off, v147, s33 offset:376
-; GISEL-NEXT:    scratch_store_b32 off, v148, s33 offset:380
-; GISEL-NEXT:    scratch_store_b32 off, v149, s33 offset:384
-; GISEL-NEXT:    scratch_store_b32 off, v150, s33 offset:388
-; GISEL-NEXT:    scratch_store_b32 off, v151, s33 offset:392
-; GISEL-NEXT:    s_clause 0x1f
-; GISEL-NEXT:    scratch_store_b32 off, v160, s33 offset:396
-; GISEL-NEXT:    scratch_store_b32 off, v161, s33 offset:400
-; GISEL-NEXT:    scratch_store_b32 off, v162, s33 offset:404
-; GISEL-NEXT:    scratch_store_b32 off, v163, s33 offset:408
-; GISEL-NEXT:    scratch_store_b32 off, v164, s33 offset:412
-; GISEL-NEXT:    scratch_store_b32 off, v165, s33 offset:416
-; GISEL-NEXT:    scratch_store_b32 off, v166, s33 offset:420
-; GISEL-NEXT:    scratch_store_b32 off, v167, s33 offset:424
-; GISEL-NEXT:    scratch_store_b32 off, v176, s33 offset:428
-; GISEL-NEXT:    scratch_store_b32 off, v177, s33 offset:432
-; GISEL-NEXT:    scratch_store_b32 off, v178, s33 offset:436
-; GISEL-NEXT:    scratch_store_b32 off, v179, s33 offset:440
-; GISEL-NEXT:    scratch_store_b32 off, v180, s33 offset:444
-; GISEL-NEXT:    scratch_store_b32 off, v181, s33 offset:448
-; GISEL-NEXT:    scratch_store_b32 off, v182, s33 offset:452
-; GISEL-NEXT:    scratch_store_b32 off, v183, s33 offset:456
-; GISEL-NEXT:    scratch_store_b32 off, v192, s33 offset:460
-; GISEL-NEXT:    scratch_store_b32 off, v193, s33 offset:464
-; GISEL-NEXT:    scratch_store_b32 off, v194, s33 offset:468
-; GISEL-NEXT:    scratch_store_b32 off, v195, s33 offset:472
-; GISEL-NEXT:    scratch_store_b32 off, v196, s33 offset:476
-; GISEL-NEXT:    scratch_store_b32 off, v197, s33 offset:480
-; GISEL-NEXT:    scratch_store_b32 off, v198, s33 offset:484
-; GISEL-NEXT:    scratch_store_b32 off, v199, s33 offset:488
-; GISEL-NEXT:    scratch_store_b32 off, v208, s33 offset:492
-; GISEL-NEXT:    scratch_store_b32 off, v209, s33 offset:496
-; GISEL-NEXT:    scratch_store_b32 off, v210, s33 offset:500
-; GISEL-NEXT:    scratch_store_b32 off, v211, s33 offset:504
-; GISEL-NEXT:    scratch_store_b32 off, v212, s33 offset:508
-; GISEL-NEXT:    scratch_store_b32 off, v213, s33 offset:512
-; GISEL-NEXT:    scratch_store_b32 off, v214, s33 offset:516
-; GISEL-NEXT:    scratch_store_b32 off, v215, s33 offset:520
-; GISEL-NEXT:    s_clause 0xf
-; GISEL-NEXT:    scratch_store_b32 off, v224, s33 offset:524
-; GISEL-NEXT:    scratch_store_b32 off, v225, s33 offset:528
-; GISEL-NEXT:    scratch_store_b32 off, v226, s33 offset:532
-; GISEL-NEXT:    scratch_store_b32 off, v227, s33 offset:536
-; GISEL-NEXT:    scratch_store_b32 off, v228, s33 offset:540
-; GISEL-NEXT:    scratch_store_b32 off, v229, s33 offset:544
-; GISEL-NEXT:    scratch_store_b32 off, v230, s33 offset:548
-; GISEL-NEXT:    scratch_store_b32 off, v231, s33 offset:552
-; GISEL-NEXT:    scratch_store_b32 off, v240, s33 offset:556
-; GISEL-NEXT:    scratch_store_b32 off, v241, s33 offset:560
-; GISEL-NEXT:    scratch_store_b32 off, v242, s33 offset:564
-; GISEL-NEXT:    scratch_store_b32 off, v243, s33 offset:568
-; GISEL-NEXT:    scratch_store_b32 off, v244, s33 offset:572
-; GISEL-NEXT:    scratch_store_b32 off, v245, s33 offset:576
-; GISEL-NEXT:    scratch_store_b32 off, v246, s33 offset:580
-; GISEL-NEXT:    scratch_store_b32 off, v247, s33 offset:584
-; GISEL-NEXT:    s_mov_b32 exec_lo, -1
-; GISEL-NEXT:    s_clause 0x2
-; GISEL-NEXT:    scratch_store_b32 off, v42, s33
-; GISEL-NEXT:    scratch_store_b32 off, v40, s33 offset:164
-; GISEL-NEXT:    scratch_store_b32 off, v41, s33 offset:168
-; GISEL-NEXT:    s_wait_alu 0xfffe
-; GISEL-NEXT:    v_writelane_b32 v42, s0, 3
-; GISEL-NEXT:    s_mov_b32 s0, callee at abs32@lo
-; GISEL-NEXT:    s_mov_b32 s1, callee at abs32@hi
-; GISEL-NEXT:    s_addk_co_i32 s32, 0x250
-; GISEL-NEXT:    v_dual_mov_b32 v40, v8 :: v_dual_mov_b32 v41, v9
-; GISEL-NEXT:    v_writelane_b32 v42, s4, 0
-; GISEL-NEXT:    v_writelane_b32 v42, s30, 1
-; GISEL-NEXT:    v_writelane_b32 v42, s31, 2
-; GISEL-NEXT:    s_wait_alu 0xfffe
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-; GISEL-NEXT:    flat_store_b32 v[40:41], v0
-; GISEL-NEXT:    v_readlane_b32 s31, v42, 2
-; GISEL-NEXT:    v_readlane_b32 s30, v42, 1
-; GISEL-NEXT:    v_readlane_b32 s4, v42, 0
-; GISEL-NEXT:    v_readlane_b32 s0, v42, 3
-; GISEL-NEXT:    s_clause 0x2
-; GISEL-NEXT:    scratch_load_b32 v42, off, s33
-; GISEL-NEXT:    scratch_load_b32 v40, off, s33 offset:164
-; GISEL-NEXT:    scratch_load_b32 v41, off, s33 offset:168
-; GISEL-NEXT:    s_mov_b32 s32, s33
-; GISEL-NEXT:    s_xor_b32 exec_lo, s4, -1
-; GISEL-NEXT:    s_clause 0x1f
-; GISEL-NEXT:    scratch_load_b32 v0, off, s33 offset:4
-; GISEL-NEXT:    scratch_load_b32 v1, off, s33 offset:8
-; GISEL-NEXT:    scratch_load_b32 v2, off, s33 offset:12
-; GISEL-NEXT:    scratch_load_b32 v3, off, s33 offset:16
-; GISEL-NEXT:    scratch_load_b32 v4, off, s33 offset:20
-; GISEL-NEXT:    scratch_load_b32 v5, off, s33 offset:24
-; GISEL-NEXT:    scratch_load_b32 v6, off, s33 offset:28
-; GISEL-NEXT:    scratch_load_b32 v7, off, s33 offset:32
-; GISEL-NEXT:    scratch_load_b32 v8, off, s33 offset:36
-; GISEL-NEXT:    scratch_load_b32 v9, off, s33 offset:40
-; GISEL-NEXT:    scratch_load_b32 v10, off, s33 offset:44
-; GISEL-NEXT:    scratch_load_b32 v11, off, s33 offset:48
-; GISEL-NEXT:    scratch_load_b32 v12, off, s33 offset:52
-; GISEL-NEXT:    scratch_load_b32 v13, off, s33 offset:56
-; GISEL-NEXT:    scratch_load_b32 v14, off, s33 offset:60
-; GISEL-NEXT:    scratch_load_b32 v15, off, s33 offset:64
-; GISEL-NEXT:    scratch_load_b32 v16, off, s33 offset:68
-; GISEL-NEXT:    scratch_load_b32 v17, off, s33 offset:72
-; GISEL-NEXT:    scratch_load_b32 v18, off, s33 offset:76
-; GISEL-NEXT:    scratch_load_b32 v19, off, s33 offset:80
-; GISEL-NEXT:    scratch_load_b32 v20, off, s33 offset:84
-; GISEL-NEXT:    scratch_load_b32 v21, off, s33 offset:88
-; GISEL-NEXT:    scratch_load_b32 v22, off, s33 offset:92
-; GISEL-NEXT:    scratch_load_b32 v23, off, s33 offset:96
-; GISEL-NEXT:    scratch_load_b32 v24, off, s33 offset:100
-; GISEL-NEXT:    scratch_load_b32 v25, off, s33 offset:104
-; GISEL-NEXT:    scratch_load_b32 v26, off, s33 offset:108
-; GISEL-NEXT:    scratch_load_b32 v27, off, s33 offset:112
-; GISEL-NEXT:    scratch_load_b32 v28, off, s33 offset:116
-; GISEL-NEXT:    scratch_load_b32 v29, off, s33 offset:120
-; GISEL-NEXT:    scratch_load_b32 v30, off, s33 offset:124
-; GISEL-NEXT:    scratch_load_b32 v31, off, s33 offset:128
-; GISEL-NEXT:    s_clause 0x1f
-; GISEL-NEXT:    scratch_load_b32 v32, off, s33 offset:132
-; GISEL-NEXT:    scratch_load_b32 v33, off, s33 offset:136
-; GISEL-NEXT:    scratch_load_b32 v34, off, s33 offset:140
-; GISEL-NEXT:    scratch_load_b32 v35, off, s33 offset:144
-; GISEL-NEXT:    scratch_load_b32 v36, off, s33 offset:148
-; GISEL-NEXT:    scratch_load_b32 v37, off, s33 offset:152
-; GISEL-NEXT:    scratch_load_b32 v38, off, s33 offset:156
-; GISEL-NEXT:    scratch_load_b32 v39, off, s33 offset:160
-; GISEL-NEXT:    scratch_load_b32 v48, off, s33 offset:172
-; GISEL-NEXT:    scratch_load_b32 v49, off, s33 offset:176
-; GISEL-NEXT:    scratch_load_b32 v50, off, s33 offset:180
-; GISEL-NEXT:    scratch_load_b32 v51, off, s33 offset:184
-; GISEL-NEXT:    scratch_load_b32 v52, off, s33 offset:188
-; GISEL-NEXT:    scratch_load_b32 v53, off, s33 offset:192
-; GISEL-NEXT:    scratch_load_b32 v54, off, s33 offset:196
-; GISEL-NEXT:    scratch_load_b32 v55, off, s33 offset:200
-; GISEL-NEXT:    scratch_load_b32 v64, off, s33 offset:204
-; GISEL-NEXT:    scratch_load_b32 v65, off, s33 offset:208
-; GISEL-NEXT:    scratch_load_b32 v66, off, s33 offset:212
-; GISEL-NEXT:    scratch_load_b32 v67, off, s33 offset:216
-; GISEL-NEXT:    scratch_load_b32 v68, off, s33 offset:220
-; GISEL-NEXT:    scratch_load_b32 v69, off, s33 offset:224
-; GISEL-NEXT:    scratch_load_b32 v70, off, s33 offset:228
-; GISEL-NEXT:    scratch_load_b32 v71, off, s33 offset:232
-; GISEL-NEXT:    scratch_load_b32 v80, off, s33 offset:236
-; GISEL-NEXT:    scratch_load_b32 v81, off, s33 offset:240
-; GISEL-NEXT:    scratch_load_b32 v82, off, s33 offset:244
-; GISEL-NEXT:    scratch_load_b32 v83, off, s33 offset:248
-; GISEL-NEXT:    scratch_load_b32 v84, off, s33 offset:252
-; GISEL-NEXT:    scratch_load_b32 v85, off, s33 offset:256
-; GISEL-NEXT:    scratch_load_b32 v86, off, s33 offset:260
-; GISEL-NEXT:    scratch_load_b32 v87, off, s33 offset:264
-; GISEL-NEXT:    s_clause 0x1f
-; GISEL-NEXT:    scratch_load_b32 v96, off, s33 offset:268
-; GISEL-NEXT:    scratch_load_b32 v97, off, s33 offset:272
-; GISEL-NEXT:    scratch_load_b32 v98, off, s33 offset:276
-; GISEL-NEXT:    scratch_load_b32 v99, off, s33 offset:280
-; GISEL-NEXT:    scratch_load_b32 v100, off, s33 offset:284
-; GISEL-NEXT:    scratch_load_b32 v101, off, s33 offset:288
-; GISEL-NEXT:    scratch_load_b32 v102, off, s33 offset:292
-; GISEL-NEXT:    scratch_load_b32 v103, off, s33 offset:296
-; GISEL-NEXT:    scratch_load_b32 v112, off, s33 offset:300
-; GISEL-NEXT:    scratch_load_b32 v113, off, s33 offset:304
-; GISEL-NEXT:    scratch_load_b32 v114, off, s33 offset:308
-; GISEL-NEXT:    scratch_load_b32 v115, off, s33 offset:312
-; GISEL-NEXT:    scratch_load_b32 v116, off, s33 offset:316
-; GISEL-NEXT:    scratch_load_b32 v117, off, s33 offset:320
-; GISEL-NEXT:    scratch_load_b32 v118, off, s33 offset:324
-; GISEL-NEXT:    scratch_load_b32 v119, off, s33 offset:328
-; GISEL-NEXT:    scratch_load_b32 v128, off, s33 offset:332
-; GISEL-NEXT:    scratch_load_b32 v129, off, s33 offset:336
-; GISEL-NEXT:    scratch_load_b32 v130, off, s33 offset:340
-; GISEL-NEXT:    scratch_load_b32 v131, off, s33 offset:344
-; GISEL-NEXT:    scratch_load_b32 v132, off, s33 offset:348
-; GISEL-NEXT:    scratch_load_b32 v133, off, s33 offset:352
-; GISEL-NEXT:    scratch_load_b32 v134, off, s33 offset:356
-; GISEL-NEXT:    scratch_load_b32 v135, off, s33 offset:360
-; GISEL-NEXT:    scratch_load_b32 v144, off, s33 offset:364
-; GISEL-NEXT:    scratch_load_b32 v145, off, s33 offset:368
-; GISEL-NEXT:    scratch_load_b32 v146, off, s33 offset:372
-; GISEL-NEXT:    scratch_load_b32 v147, off, s33 offset:376
-; GISEL-NEXT:    scratch_load_b32 v148, off, s33 offset:380
-; GISEL-NEXT:    scratch_load_b32 v149, off, s33 offset:384
-; GISEL-NEXT:    scratch_load_b32 v150, off, s33 offset:388
-; GISEL-NEXT:    scratch_load_b32 v151, off, s33 offset:392
-; GISEL-NEXT:    s_clause 0x1f
-; GISEL-NEXT:    scratch_load_b32 v160, off, s33 offset:396
-; GISEL-NEXT:    scratch_load_b32 v161, off, s33 offset:400
-; GISEL-NEXT:    scratch_load_b32 v162, off, s33 offset:404
-; GISEL-NEXT:    scratch_load_b32 v163, off, s33 offset:408
-; GISEL-NEXT:    scratch_load_b32 v164, off, s33 offset:412
-; GISEL-NEXT:    scratch_load_b32 v165, off, s33 offset:416
-; GISEL-NEXT:    scratch_load_b32 v166, off, s33 offset:420
-; GISEL-NEXT:    scratch_load_b32 v167, off, s33 offset:424
-; GISEL-NEXT:    scratch_load_b32 v176, off, s33 offset:428
-; GISEL-NEXT:    scratch_load_b32 v177, off, s33 offset:432
-; GISEL-NEXT:    scratch_load_b32 v178, off, s33 offset:436
-; GISEL-NEXT:    scratch_load_b32 v179, off, s33 offset:440
-; GISEL-NEXT:    scratch_load_b32 v180, off, s33 offset:444
-; GISEL-NEXT:    scratch_load_b32 v181, off, s33 offset:448
-; GISEL-NEXT:    scratch_load_b32 v182, off, s33 offset:452
-; GISEL-NEXT:    scratch_load_b32 v183, off, s33 offset:456
-; GISEL-NEXT:    scratch_load_b32 v192, off, s33 offset:460
-; GISEL-NEXT:    scratch_load_b32 v193, off, s33 offset:464
-; GISEL-NEXT:    scratch_load_b32 v194, off, s33 offset:468
-; GISEL-NEXT:    scratch_load_b32 v195, off, s33 offset:472
-; GISEL-NEXT:    scratch_load_b32 v196, off, s33 offset:476
-; GISEL-NEXT:    scratch_load_b32 v197, off, s33 offset:480
-; GISEL-NEXT:    scratch_load_b32 v198, off, s33 offset:484
-; GISEL-NEXT:    scratch_load_b32 v199, off, s33 offset:488
-; GISEL-NEXT:    scratch_load_b32 v208, off, s33 offset:492
-; GISEL-NEXT:    scratch_load_b32 v209, off, s33 offset:496
-; GISEL-NEXT:    scratch_load_b32 v210, off, s33 offset:500
-; GISEL-NEXT:    scratch_load_b32 v211, off, s33 offset:504
-; GISEL-NEXT:    scratch_load_b32 v212, off, s33 offset:508
-; GISEL-NEXT:    scratch_load_b32 v213, off, s33 offset:512
-; GISEL-NEXT:    scratch_load_b32 v214, off, s33 offset:516
-; GISEL-NEXT:    scratch_load_b32 v215, off, s33 offset:520
-; GISEL-NEXT:    s_clause 0xf
-; GISEL-NEXT:    scratch_load_b32 v224, off, s33 offset:524
-; GISEL-NEXT:    scratch_load_b32 v225, off, s33 offset:528
-; GISEL-NEXT:    scratch_load_b32 v226, off, s33 offset:532
-; GISEL-NEXT:    scratch_load_b32 v227, off, s33 offset:536
-; GISEL-NEXT:    scratch_load_b32 v228, off, s33 offset:540
-; GISEL-NEXT:    scratch_load_b32 v229, off, s33 offset:544
-; GISEL-NEXT:    scratch_load_b32 v230, off, s33 offset:548
-; GISEL-NEXT:    scratch_load_b32 v231, off, s33 offset:552
-; GISEL-NEXT:    scratch_load_b32 v240, off, s33 offset:556
-; GISEL-NEXT:    scratch_load_b32 v241, off, s33 offset:560
-; GISEL-NEXT:    scratch_load_b32 v242, off, s33 offset:564
-; GISEL-NEXT:    scratch_load_b32 v243, off, s33 offset:568
-; GISEL-NEXT:    scratch_load_b32 v244, off, s33 offset:572
-; GISEL-NEXT:    scratch_load_b32 v245, off, s33 offset:576
-; GISEL-NEXT:    scratch_load_b32 v246, off, s33 offset:580
-; GISEL-NEXT:    scratch_load_b32 v247, off, s33 offset:584
-; GISEL-NEXT:    s_mov_b32 exec_lo, s4
-; GISEL-NEXT:    s_mov_b32 s33, s0
-; GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GISEL-NEXT:    s_wait_alu 0xfffe
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; DAGISEL64-LABEL: call_from_whole_wave:
-; DAGISEL64:       ; %bb.0:
-; DAGISEL64-NEXT:    s_wait_loadcnt_dscnt 0x0
-; DAGISEL64-NEXT:    s_wait_expcnt 0x0
-; DAGISEL64-NEXT:    s_wait_samplecnt 0x0
-; DAGISEL64-NEXT:    s_wait_bvhcnt 0x0
-; DAGISEL64-NEXT:    s_wait_kmcnt 0x0
-; DAGISEL64-NEXT:    s_mov_b32 s0, s33
-; DAGISEL64-NEXT:    s_mov_b32 s33, s32
-; DAGISEL64-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; DAGISEL64-NEXT:    s_clause 0x1f
-; DAGISEL64-NEXT:    scratch_store_b32 off, v0, s33 offset:4
-; DAGISEL64-NEXT:    scratch_store_b32 off, v1, s33 offset:8
-; DAGISEL64-NEXT:    scratch_store_b32 off, v2, s33 offset:12
-; DAGISEL64-NEXT:    scratch_store_b32 off, v3, s33 offset:16
-; DAGISEL64-NEXT:    scratch_store_b32 off, v4, s33 offset:20
-; DAGISEL64-NEXT:    scratch_store_b32 off, v5, s33 offset:24
-; DAGISEL64-NEXT:    scratch_store_b32 off, v6, s33 offset:28
-; DAGISEL64-NEXT:    scratch_store_b32 off, v7, s33 offset:32
-; DAGISEL64-NEXT:    scratch_store_b32 off, v8, s33 offset:36
-; DAGISEL64-NEXT:    scratch_store_b32 off, v9, s33 offset:40
-; DAGISEL64-NEXT:    scratch_store_b32 off, v10, s33 offset:44
-; DAGISEL64-NEXT:    scratch_store_b32 off, v11, s33 offset:48
-; DAGISEL64-NEXT:    scratch_store_b32 off, v12, s33 offset:52
-; DAGISEL64-NEXT:    scratch_store_b32 off, v13, s33 offset:56
-; DAGISEL64-NEXT:    scratch_store_b32 off, v14, s33 offset:60
-; DAGISEL64-NEXT:    scratch_store_b32 off, v15, s33 offset:64
-; DAGISEL64-NEXT:    scratch_store_b32 off, v16, s33 offset:68
-; DAGISEL64-NEXT:    scratch_store_b32 off, v17, s33 offset:72
-; DAGISEL64-NEXT:    scratch_store_b32 off, v18, s33 offset:76
-; DAGISEL64-NEXT:    scratch_store_b32 off, v19, s33 offset:80
-; DAGISEL64-NEXT:    scratch_store_b32 off, v20, s33 offset:84
-; DAGISEL64-NEXT:    scratch_store_b32 off, v21, s33 offset:88
-; DAGISEL64-NEXT:    scratch_store_b32 off, v22, s33 offset:92
-; DAGISEL64-NEXT:    scratch_store_b32 off, v23, s33 offset:96
-; DAGISEL64-NEXT:    scratch_store_b32 off, v24, s33 offset:100
-; DAGISEL64-NEXT:    scratch_store_b32 off, v25, s33 offset:104
-; DAGISEL64-NEXT:    scratch_store_b32 off, v26, s33 offset:108
-; DAGISEL64-NEXT:    scratch_store_b32 off, v27, s33 offset:112
-; DAGISEL64-NEXT:    scratch_store_b32 off, v28, s33 offset:116
-; DAGISEL64-NEXT:    scratch_store_b32 off, v29, s33 offset:120
-; DAGISEL64-NEXT:    scratch_store_b32 off, v30, s33 offset:124
-; DAGISEL64-NEXT:    scratch_store_b32 off, v31, s33 offset:128
-; DAGISEL64-NEXT:    s_clause 0x1f
-; DAGISEL64-NEXT:    scratch_store_b32 off, v32, s33 offset:132
-; DAGISEL64-NEXT:    scratch_store_b32 off, v33, s33 offset:136
-; DAGISEL64-NEXT:    scratch_store_b32 off, v34, s33 offset:140
-; DAGISEL64-NEXT:    scratch_store_b32 off, v35, s33 offset:144
-; DAGISEL64-NEXT:    scratch_store_b32 off, v36, s33 offset:148
-; DAGISEL64-NEXT:    scratch_store_b32 off, v37, s33 offset:152
-; DAGISEL64-NEXT:    scratch_store_b32 off, v38, s33 offset:156
-; DAGISEL64-NEXT:    scratch_store_b32 off, v39, s33 offset:160
-; DAGISEL64-NEXT:    scratch_store_b32 off, v48, s33 offset:172
-; DAGISEL64-NEXT:    scratch_store_b32 off, v49, s33 offset:176
-; DAGISEL64-NEXT:    scratch_store_b32 off, v50, s33 offset:180
-; DAGISEL64-NEXT:    scratch_store_b32 off, v51, s33 offset:184
-; DAGISEL64-NEXT:    scratch_store_b32 off, v52, s33 offset:188
-; DAGISEL64-NEXT:    scratch_store_b32 off, v53, s33 offset:192
-; DAGISEL64-NEXT:    scratch_store_b32 off, v54, s33 offset:196
-; DAGISEL64-NEXT:    scratch_store_b32 off, v55, s33 offset:200
-; DAGISEL64-NEXT:    scratch_store_b32 off, v64, s33 offset:204
-; DAGISEL64-NEXT:    scratch_store_b32 off, v65, s33 offset:208
-; DAGISEL64-NEXT:    scratch_store_b32 off, v66, s33 offset:212
-; DAGISEL64-NEXT:    scratch_store_b32 off, v67, s33 offset:216
-; DAGISEL64-NEXT:    scratch_store_b32 off, v68, s33 offset:220
-; DAGISEL64-NEXT:    scratch_store_b32 off, v69, s33 offset:224
-; DAGISEL64-NEXT:    scratch_store_b32 off, v70, s33 offset:228
-; DAGISEL64-NEXT:    scratch_store_b32 off, v71, s33 offset:232
-; DAGISEL64-NEXT:    scratch_store_b32 off, v80, s33 offset:236
-; DAGISEL64-NEXT:    scratch_store_b32 off, v81, s33 offset:240
-; DAGISEL64-NEXT:    scratch_store_b32 off, v82, s33 offset:244
-; DAGISEL64-NEXT:    scratch_store_b32 off, v83, s33 offset:248
-; DAGISEL64-NEXT:    scratch_store_b32 off, v84, s33 offset:252
-; DAGISEL64-NEXT:    scratch_store_b32 off, v85, s33 offset:256
-; DAGISEL64-NEXT:    scratch_store_b32 off, v86, s33 offset:260
-; DAGISEL64-NEXT:    scratch_store_b32 off, v87, s33 offset:264
-; DAGISEL64-NEXT:    s_clause 0x1f
-; DAGISEL64-NEXT:    scratch_store_b32 off, v96, s33 offset:268
-; DAGISEL64-NEXT:    scratch_store_b32 off, v97, s33 offset:272
-; DAGISEL64-NEXT:    scratch_store_b32 off, v98, s33 offset:276
-; DAGISEL64-NEXT:    scratch_store_b32 off, v99, s33 offset:280
-; DAGISEL64-NEXT:    scratch_store_b32 off, v100, s33 offset:284
-; DAGISEL64-NEXT:    scratch_store_b32 off, v101, s33 offset:288
-; DAGISEL64-NEXT:    scratch_store_b32 off, v102, s33 offset:292
-; DAGISEL64-NEXT:    scratch_store_b32 off, v103, s33 offset:296
-; DAGISEL64-NEXT:    scratch_store_b32 off, v112, s33 offset:300
-; DAGISEL64-NEXT:    scratch_store_b32 off, v113, s33 offset:304
-; DAGISEL64-NEXT:    scratch_store_b32 off, v114, s33 offset:308
-; DAGISEL64-NEXT:    scratch_store_b32 off, v115, s33 offset:312
-; DAGISEL64-NEXT:    scratch_store_b32 off, v116, s33 offset:316
-; DAGISEL64-NEXT:    scratch_store_b32 off, v117, s33 offset:320
-; DAGISEL64-NEXT:    scratch_store_b32 off, v118, s33 offset:324
-; DAGISEL64-NEXT:    scratch_store_b32 off, v119, s33 offset:328
-; DAGISEL64-NEXT:    scratch_store_b32 off, v128, s33 offset:332
-; DAGISEL64-NEXT:    scratch_store_b32 off, v129, s33 offset:336
-; DAGISEL64-NEXT:    scratch_store_b32 off, v130, s33 offset:340
-; DAGISEL64-NEXT:    scratch_store_b32 off, v131, s33 offset:344
-; DAGISEL64-NEXT:    scratch_store_b32 off, v132, s33 offset:348
-; DAGISEL64-NEXT:    scratch_store_b32 off, v133, s33 offset:352
-; DAGISEL64-NEXT:    scratch_store_b32 off, v134, s33 offset:356
-; DAGISEL64-NEXT:    scratch_store_b32 off, v135, s33 offset:360
-; DAGISEL64-NEXT:    scratch_store_b32 off, v144, s33 offset:364
-; DAGISEL64-NEXT:    scratch_store_b32 off, v145, s33 offset:368
-; DAGISEL64-NEXT:    scratch_store_b32 off, v146, s33 offset:372
-; DAGISEL64-NEXT:    scratch_store_b32 off, v147, s33 offset:376
-; DAGISEL64-NEXT:    scratch_store_b32 off, v148, s33 offset:380
-; DAGISEL64-NEXT:    scratch_store_b32 off, v149, s33 offset:384
-; DAGISEL64-NEXT:    scratch_store_b32 off, v150, s33 offset:388
-; DAGISEL64-NEXT:    scratch_store_b32 off, v151, s33 offset:392
-; DAGISEL64-NEXT:    s_clause 0x1f
-; DAGISEL64-NEXT:    scratch_store_b32 off, v160, s33 offset:396
-; DAGISEL64-NEXT:    scratch_store_b32 off, v161, s33 offset:400
-; DAGISEL64-NEXT:    scratch_store_b32 off, v162, s33 offset:404
-; DAGISEL64-NEXT:    scratch_store_b32 off, v163, s33 offset:408
-; DAGISEL64-NEXT:    scratch_store_b32 off, v164, s33 offset:412
-; DAGISEL64-NEXT:    scratch_store_b32 off, v165, s33 offset:416
-; DAGISEL64-NEXT:    scratch_store_b32 off, v166, s33 offset:420
-; DAGISEL64-NEXT:    scratch_store_b32 off, v167, s33 offset:424
-; DAGISEL64-NEXT:    scratch_store_b32 off, v176, s33 offset:428
-; DAGISEL64-NEXT:    scratch_store_b32 off, v177, s33 offset:432
-; DAGISEL64-NEXT:    scratch_store_b32 off, v178, s33 offset:436
-; DAGISEL64-NEXT:    scratch_store_b32 off, v179, s33 offset:440
-; DAGISEL64-NEXT:    scratch_store_b32 off, v180, s33 offset:444
-; DAGISEL64-NEXT:    scratch_store_b32 off, v181, s33 offset:448
-; DAGISEL64-NEXT:    scratch_store_b32 off, v182, s33 offset:452
-; DAGISEL64-NEXT:    scratch_store_b32 off, v183, s33 offset:456
-; DAGISEL64-NEXT:    scratch_store_b32 off, v192, s33 offset:460
-; DAGISEL64-NEXT:    scratch_store_b32 off, v193, s33 offset:464
-; DAGISEL64-NEXT:    scratch_store_b32 off, v194, s33 offset:468
-; DAGISEL64-NEXT:    scratch_store_b32 off, v195, s33 offset:472
-; DAGISEL64-NEXT:    scratch_store_b32 off, v196, s33 offset:476
-; DAGISEL64-NEXT:    scratch_store_b32 off, v197, s33 offset:480
-; DAGISEL64-NEXT:    scratch_store_b32 off, v198, s33 offset:484
-; DAGISEL64-NEXT:    scratch_store_b32 off, v199, s33 offset:488
-; DAGISEL64-NEXT:    scratch_store_b32 off, v208, s33 offset:492
-; DAGISEL64-NEXT:    scratch_store_b32 off, v209, s33 offset:496
-; DAGISEL64-NEXT:    scratch_store_b32 off, v210, s33 offset:500
-; DAGISEL64-NEXT:    scratch_store_b32 off, v211, s33 offset:504
-; DAGISEL64-NEXT:    scratch_store_b32 off, v212, s33 offset:508
-; DAGISEL64-NEXT:    scratch_store_b32 off, v213, s33 offset:512
-; DAGISEL64-NEXT:    scratch_store_b32 off, v214, s33 offset:516
-; DAGISEL64-NEXT:    scratch_store_b32 off, v215, s33 offset:520
-; DAGISEL64-NEXT:    s_clause 0xf
-; DAGISEL64-NEXT:    scratch_store_b32 off, v224, s33 offset:524
-; DAGISEL64-NEXT:    scratch_store_b32 off, v225, s33 offset:528
-; DAGISEL64-NEXT:    scratch_store_b32 off, v226, s33 offset:532
-; DAGISEL64-NEXT:    scratch_store_b32 off, v227, s33 offset:536
-; DAGISEL64-NEXT:    scratch_store_b32 off, v228, s33 offset:540
-; DAGISEL64-NEXT:    scratch_store_b32 off, v229, s33 offset:544
-; DAGISEL64-NEXT:    scratch_store_b32 off, v230, s33 offset:548
-; DAGISEL64-NEXT:    scratch_store_b32 off, v231, s33 offset:552
-; DAGISEL64-NEXT:    scratch_store_b32 off, v240, s33 offset:556
-; DAGISEL64-NEXT:    scratch_store_b32 off, v241, s33 offset:560
-; DAGISEL64-NEXT:    scratch_store_b32 off, v242, s33 offset:564
-; DAGISEL64-NEXT:    scratch_store_b32 off, v243, s33 offset:568
-; DAGISEL64-NEXT:    scratch_store_b32 off, v244, s33 offset:572
-; DAGISEL64-NEXT:    scratch_store_b32 off, v245, s33 offset:576
-; DAGISEL64-NEXT:    scratch_store_b32 off, v246, s33 offset:580
-; DAGISEL64-NEXT:    scratch_store_b32 off, v247, s33 offset:584
-; DAGISEL64-NEXT:    s_mov_b64 exec, -1
-; DAGISEL64-NEXT:    s_clause 0x2
-; DAGISEL64-NEXT:    scratch_store_b32 off, v42, s33
-; DAGISEL64-NEXT:    scratch_store_b32 off, v40, s33 offset:164
-; DAGISEL64-NEXT:    scratch_store_b32 off, v41, s33 offset:168
-; DAGISEL64-NEXT:    s_wait_alu 0xfffe
-; DAGISEL64-NEXT:    v_writelane_b32 v42, s0, 4
-; DAGISEL64-NEXT:    s_mov_b32 s1, callee at abs32@hi
-; DAGISEL64-NEXT:    s_mov_b32 s0, callee at abs32@lo
-; DAGISEL64-NEXT:    s_addk_co_i32 s32, 0x250
-; DAGISEL64-NEXT:    v_mov_b32_e32 v41, v9
-; DAGISEL64-NEXT:    v_writelane_b32 v42, s4, 0
-; DAGISEL64-NEXT:    v_mov_b32_e32 v40, v8
-; DAGISEL64-NEXT:    v_writelane_b32 v42, s5, 1
-; DAGISEL64-NEXT:    v_writelane_b32 v42, s30, 2
-; DAGISEL64-NEXT:    v_writelane_b32 v42, s31, 3
-; DAGISEL64-NEXT:    s_wait_alu 0xfffe
-; DAGISEL64-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-; DAGISEL64-NEXT:    flat_store_b32 v[40:41], v0
-; DAGISEL64-NEXT:    v_readlane_b32 s31, v42, 3
-; DAGISEL64-NEXT:    v_readlane_b32 s30, v42, 2
-; DAGISEL64-NEXT:    v_readlane_b32 s5, v42, 1
-; DAGISEL64-NEXT:    v_readlane_b32 s4, v42, 0
-; DAGISEL64-NEXT:    v_readlane_b32 s0, v42, 4
-; DAGISEL64-NEXT:    s_clause 0x2
-; DAGISEL64-NEXT:    scratch_load_b32 v42, off, s33
-; DAGISEL64-NEXT:    scratch_load_b32 v40, off, s33 offset:164
-; DAGISEL64-NEXT:    scratch_load_b32 v41, off, s33 offset:168
-; DAGISEL64-NEXT:    s_mov_b32 s32, s33
-; DAGISEL64-NEXT:    s_xor_b64 exec, s[4:5], -1
-; DAGISEL64-NEXT:    s_clause 0x1f
-; DAGISEL64-NEXT:    scratch_load_b32 v0, off, s33 offset:4
-; DAGISEL64-NEXT:    scratch_load_b32 v1, off, s33 offset:8
-; DAGISEL64-NEXT:    scratch_load_b32 v2, off, s33 offset:12
-; DAGISEL64-NEXT:    scratch_load_b32 v3, off, s33 offset:16
-; DAGISEL64-NEXT:    scratch_load_b32 v4, off, s33 offset:20
-; DAGISEL64-NEXT:    scratch_load_b32 v5, off, s33 offset:24
-; DAGISEL64-NEXT:    scratch_load_b32 v6, off, s33 offset:28
-; DAGISEL64-NEXT:    scratch_load_b32 v7, off, s33 offset:32
-; DAGISEL64-NEXT:    scratch_load_b32 v8, off, s33 offset:36
-; DAGISEL64-NEXT:    scratch_load_b32 v9, off, s33 offset:40
-; DAGISEL64-NEXT:    scratch_load_b32 v10, off, s33 offset:44
-; DAGISEL64-NEXT:    scratch_load_b32 v11, off, s33 offset:48
-; DAGISEL64-NEXT:    scratch_load_b32 v12, off, s33 offset:52
-; DAGISEL64-NEXT:    scratch_load_b32 v13, off, s33 offset:56
-; DAGISEL64-NEXT:    scratch_load_b32 v14, off, s33 offset:60
-; DAGISEL64-NEXT:    scratch_load_b32 v15, off, s33 offset:64
-; DAGISEL64-NEXT:    scratch_load_b32 v16, off, s33 offset:68
-; DAGISEL64-NEXT:    scratch_load_b32 v17, off, s33 offset:72
-; DAGISEL64-NEXT:    scratch_load_b32 v18, off, s33 offset:76
-; DAGISEL64-NEXT:    scratch_load_b32 v19, off, s33 offset:80
-; DAGISEL64-NEXT:    scratch_load_b32 v20, off, s33 offset:84
-; DAGISEL64-NEXT:    scratch_load_b32 v21, off, s33 offset:88
-; DAGISEL64-NEXT:    scratch_load_b32 v22, off, s33 offset:92
-; DAGISEL64-NEXT:    scratch_load_b32 v23, off, s33 offset:96
-; DAGISEL64-NEXT:    scratch_load_b32 v24, off, s33 offset:100
-; DAGISEL64-NEXT:    scratch_load_b32 v25, off, s33 offset:104
-; DAGISEL64-NEXT:    scratch_load_b32 v26, off, s33 offset:108
-; DAGISEL64-NEXT:    scratch_load_b32 v27, off, s33 offset:112
-; DAGISEL64-NEXT:    scratch_load_b32 v28, off, s33 offset:116
-; DAGISEL64-NEXT:    scratch_load_b32 v29, off, s33 offset:120
-; DAGISEL64-NEXT:    scratch_load_b32 v30, off, s33 offset:124
-; DAGISEL64-NEXT:    scratch_load_b32 v31, off, s33 offset:128
-; DAGISEL64-NEXT:    s_clause 0x1f
-; DAGISEL64-NEXT:    scratch_load_b32 v32, off, s33 offset:132
-; DAGISEL64-NEXT:    scratch_load_b32 v33, off, s33 offset:136
-; DAGISEL64-NEXT:    scratch_load_b32 v34, off, s33 offset:140
-; DAGISEL64-NEXT:    scratch_load_b32 v35, off, s33 offset:144
-; DAGISEL64-NEXT:    scratch_load_b32 v36, off, s33 offset:148
-; DAGISEL64-NEXT:    scratch_load_b32 v37, off, s33 offset:152
-; DAGISEL64-NEXT:    scratch_load_b32 v38, off, s33 offset:156
-; DAGISEL64-NEXT:    scratch_load_b32 v39, off, s33 offset:160
-; DAGISEL64-NEXT:    scratch_load_b32 v48, off, s33 offset:172
-; DAGISEL64-NEXT:    scratch_load_b32 v49, off, s33 offset:176
-; DAGISEL64-NEXT:    scratch_load_b32 v50, off, s33 offset:180
-; DAGISEL64-NEXT:    scratch_load_b32 v51, off, s33 offset:184
-; DAGISEL64-NEXT:    scratch_load_b32 v52, off, s33 offset:188
-; DAGISEL64-NEXT:    scratch_load_b32 v53, off, s33 offset:192
-; DAGISEL64-NEXT:    scratch_load_b32 v54, off, s33 offset:196
-; DAGISEL64-NEXT:    scratch_load_b32 v55, off, s33 offset:200
-; DAGISEL64-NEXT:    scratch_load_b32 v64, off, s33 offset:204
-; DAGISEL64-NEXT:    scratch_load_b32 v65, off, s33 offset:208
-; DAGISEL64-NEXT:    scratch_load_b32 v66, off, s33 offset:212
-; DAGISEL64-NEXT:    scratch_load_b32 v67, off, s33 offset:216
-; DAGISEL64-NEXT:    scratch_load_b32 v68, off, s33 offset:220
-; DAGISEL64-NEXT:    scratch_load_b32 v69, off, s33 offset:224
-; DAGISEL64-NEXT:    scratch_load_b32 v70, off, s33 offset:228
-; DAGISEL64-NEXT:    scratch_load_b32 v71, off, s33 offset:232
-; DAGISEL64-NEXT:    scratch_load_b32 v80, off, s33 offset:236
-; DAGISEL64-NEXT:    scratch_load_b32 v81, off, s33 offset:240
-; DAGISEL64-NEXT:    scratch_load_b32 v82, off, s33 offset:244
-; DAGISEL64-NEXT:    scratch_load_b32 v83, off, s33 offset:248
-; DAGISEL64-NEXT:    scratch_load_b32 v84, off, s33 offset:252
-; DAGISEL64-NEXT:    scratch_load_b32 v85, off, s33 offset:256
-; DAGISEL64-NEXT:    scratch_load_b32 v86, off, s33 offset:260
-; DAGISEL64-NEXT:    scratch_load_b32 v87, off, s33 offset:264
-; DAGISEL64-NEXT:    s_clause 0x1f
-; DAGISEL64-NEXT:    scratch_load_b32 v96, off, s33 offset:268
-; DAGISEL64-NEXT:    scratch_load_b32 v97, off, s33 offset:272
-; DAGISEL64-NEXT:    scratch_load_b32 v98, off, s33 offset:276
-; DAGISEL64-NEXT:    scratch_load_b32 v99, off, s33 offset:280
-; DAGISEL64-NEXT:    scratch_load_b32 v100, off, s33 offset:284
-; DAGISEL64-NEXT:    scratch_load_b32 v101, off, s33 offset:288
-; DAGISEL64-NEXT:    scratch_load_b32 v102, off, s33 offset:292
-; DAGISEL64-NEXT:    scratch_load_b32 v103, off, s33 offset:296
-; DAGISEL64-NEXT:    scratch_load_b32 v112, off, s33 offset:300
-; DAGISEL64-NEXT:    scratch_load_b32 v113, off, s33 offset:304
-; DAGISEL64-NEXT:    scratch_load_b32 v114, off, s33 offset:308
-; DAGISEL64-NEXT:    scratch_load_b32 v115, off, s33 offset:312
-; DAGISEL64-NEXT:    scratch_load_b32 v116, off, s33 offset:316
-; DAGISEL64-NEXT:    scratch_load_b32 v117, off, s33 offset:320
-; DAGISEL64-NEXT:    scratch_load_b32 v118, off, s33 offset:324
-; DAGISEL64-NEXT:    scratch_load_b32 v119, off, s33 offset:328
-; DAGISEL64-NEXT:    scratch_load_b32 v128, off, s33 offset:332
-; DAGISEL64-NEXT:    scratch_load_b32 v129, off, s33 offset:336
-; DAGISEL64-NEXT:    scratch_load_b32 v130, off, s33 offset:340
-; DAGISEL64-NEXT:    scratch_load_b32 v131, off, s33 offset:344
-; DAGISEL64-NEXT:    scratch_load_b32 v132, off, s33 offset:348
-; DAGISEL64-NEXT:    scratch_load_b32 v133, off, s33 offset:352
-; DAGISEL64-NEXT:    scratch_load_b32 v134, off, s33 offset:356
-; DAGISEL64-NEXT:    scratch_load_b32 v135, off, s33 offset:360
-; DAGISEL64-NEXT:    scratch_load_b32 v144, off, s33 offset:364
-; DAGISEL64-NEXT:    scratch_load_b32 v145, off, s33 offset:368
-; DAGISEL64-NEXT:    scratch_load_b32 v146, off, s33 offset:372
-; DAGISEL64-NEXT:    scratch_load_b32 v147, off, s33 offset:376
-; DAGISEL64-NEXT:    scratch_load_b32 v148, off, s33 offset:380
-; DAGISEL64-NEXT:    scratch_load_b32 v149, off, s33 offset:384
-; DAGISEL64-NEXT:    scratch_load_b32 v150, off, s33 offset:388
-; DAGISEL64-NEXT:    scratch_load_b32 v151, off, s33 offset:392
-; DAGISEL64-NEXT:    s_clause 0x1f
-; DAGISEL64-NEXT:    scratch_load_b32 v160, off, s33 offset:396
-; DAGISEL64-NEXT:    scratch_load_b32 v161, off, s33 offset:400
-; DAGISEL64-NEXT:    scratch_load_b32 v162, off, s33 offset:404
-; DAGISEL64-NEXT:    scratch_load_b32 v163, off, s33 offset:408
-; DAGISEL64-NEXT:    scratch_load_b32 v164, off, s33 offset:412
-; DAGISEL64-NEXT:    scratch_load_b32 v165, off, s33 offset:416
-; DAGISEL64-NEXT:    scratch_load_b32 v166, off, s33 offset:420
-; DAGISEL64-NEXT:    scratch_load_b32 v167, off, s33 offset:424
-; DAGISEL64-NEXT:    scratch_load_b32 v176, off, s33 offset:428
-; DAGISEL64-NEXT:    scratch_load_b32 v177, off, s33 offset:432
-; DAGISEL64-NEXT:    scratch_load_b32 v178, off, s33 offset:436
-; DAGISEL64-NEXT:    scratch_load_b32 v179, off, s33 offset:440
-; DAGISEL64-NEXT:    scratch_load_b32 v180, off, s33 offset:444
-; DAGISEL64-NEXT:    scratch_load_b32 v181, off, s33 offset:448
-; DAGISEL64-NEXT:    scratch_load_b32 v182, off, s33 offset:452
-; DAGISEL64-NEXT:    scratch_load_b32 v183, off, s33 offset:456
-; DAGISEL64-NEXT:    scratch_load_b32 v192, off, s33 offset:460
-; DAGISEL64-NEXT:    scratch_load_b32 v193, off, s33 offset:464
-; DAGISEL64-NEXT:    scratch_load_b32 v194, off, s33 offset:468
-; DAGISEL64-NEXT:    scratch_load_b32 v195, off, s33 offset:472
-; DAGISEL64-NEXT:    scratch_load_b32 v196, off, s33 offset:476
-; DAGISEL64-NEXT:    scratch_load_b32 v197, off, s33 offset:480
-; DAGISEL64-NEXT:    scratch_load_b32 v198, off, s33 offset:484
-; DAGISEL64-NEXT:    scratch_load_b32 v199, off, s33 offset:488
-; DAGISEL64-NEXT:    scratch_load_b32 v208, off, s33 offset:492
-; DAGISEL64-NEXT:    scratch_load_b32 v209, off, s33 offset:496
-; DAGISEL64-NEXT:    scratch_load_b32 v210, off, s33 offset:500
-; DAGISEL64-NEXT:    scratch_load_b32 v211, off, s33 offset:504
-; DAGISEL64-NEXT:    scratch_load_b32 v212, off, s33 offset:508
-; DAGISEL64-NEXT:    scratch_load_b32 v213, off, s33 offset:512
-; DAGISEL64-NEXT:    scratch_load_b32 v214, off, s33 offset:516
-; DAGISEL64-NEXT:    scratch_load_b32 v215, off, s33 offset:520
-; DAGISEL64-NEXT:    s_clause 0xf
-; DAGISEL64-NEXT:    scratch_load_b32 v224, off, s33 offset:524
-; DAGISEL64-NEXT:    scratch_load_b32 v225, off, s33 offset:528
-; DAGISEL64-NEXT:    scratch_load_b32 v226, off, s33 offset:532
-; DAGISEL64-NEXT:    scratch_load_b32 v227, off, s33 offset:536
-; DAGISEL64-NEXT:    scratch_load_b32 v228, off, s33 offset:540
-; DAGISEL64-NEXT:    scratch_load_b32 v229, off, s33 offset:544
-; DAGISEL64-NEXT:    scratch_load_b32 v230, off, s33 offset:548
-; DAGISEL64-NEXT:    scratch_load_b32 v231, off, s33 offset:552
-; DAGISEL64-NEXT:    scratch_load_b32 v240, off, s33 offset:556
-; DAGISEL64-NEXT:    scratch_load_b32 v241, off, s33 offset:560
-; DAGISEL64-NEXT:    scratch_load_b32 v242, off, s33 offset:564
-; DAGISEL64-NEXT:    scratch_load_b32 v243, off, s33 offset:568
-; DAGISEL64-NEXT:    scratch_load_b32 v244, off, s33 offset:572
-; DAGISEL64-NEXT:    scratch_load_b32 v245, off, s33 offset:576
-; DAGISEL64-NEXT:    scratch_load_b32 v246, off, s33 offset:580
-; DAGISEL64-NEXT:    scratch_load_b32 v247, off, s33 offset:584
-; DAGISEL64-NEXT:    s_mov_b64 exec, s[4:5]
-; DAGISEL64-NEXT:    s_mov_b32 s33, s0
-; DAGISEL64-NEXT:    s_wait_loadcnt_dscnt 0x0
-; DAGISEL64-NEXT:    s_wait_alu 0xfffe
-; DAGISEL64-NEXT:    s_setpc_b64 s[30:31]
-;
-; GISEL64-LABEL: call_from_whole_wave:
-; GISEL64:       ; %bb.0:
-; GISEL64-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GISEL64-NEXT:    s_wait_expcnt 0x0
-; GISEL64-NEXT:    s_wait_samplecnt 0x0
-; GISEL64-NEXT:    s_wait_bvhcnt 0x0
-; GISEL64-NEXT:    s_wait_kmcnt 0x0
-; GISEL64-NEXT:    s_mov_b32 s0, s33
-; GISEL64-NEXT:    s_mov_b32 s33, s32
-; GISEL64-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GISEL64-NEXT:    s_clause 0x1f
-; GISEL64-NEXT:    scratch_store_b32 off, v0, s33 offset:4
-; GISEL64-NEXT:    scratch_store_b32 off, v1, s33 offset:8
-; GISEL64-NEXT:    scratch_store_b32 off, v2, s33 offset:12
-; GISEL64-NEXT:    scratch_store_b32 off, v3, s33 offset:16
-; GISEL64-NEXT:    scratch_store_b32 off, v4, s33 offset:20
-; GISEL64-NEXT:    scratch_store_b32 off, v5, s33 offset:24
-; GISEL64-NEXT:    scratch_store_b32 off, v6, s33 offset:28
-; GISEL64-NEXT:    scratch_store_b32 off, v7, s33 offset:32
-; GISEL64-NEXT:    scratch_store_b32 off, v8, s33 offset:36
-; GISEL64-NEXT:    scratch_store_b32 off, v9, s33 offset:40
-; GISEL64-NEXT:    scratch_store_b32 off, v10, s33 offset:44
-; GISEL64-NEXT:    scratch_store_b32 off, v11, s33 offset:48
-; GISEL64-NEXT:    scratch_store_b32 off, v12, s33 offset:52
-; GISEL64-NEXT:    scratch_store_b32 off, v13, s33 offset:56
-; GISEL64-NEXT:    scratch_store_b32 off, v14, s33 offset:60
-; GISEL64-NEXT:    scratch_store_b32 off, v15, s33 offset:64
-; GISEL64-NEXT:    scratch_store_b32 off, v16, s33 offset:68
-; GISEL64-NEXT:    scratch_store_b32 off, v17, s33 offset:72
-; GISEL64-NEXT:    scratch_store_b32 off, v18, s33 offset:76
-; GISEL64-NEXT:    scratch_store_b32 off, v19, s33 offset:80
-; GISEL64-NEXT:    scratch_store_b32 off, v20, s33 offset:84
-; GISEL64-NEXT:    scratch_store_b32 off, v21, s33 offset:88
-; GISEL64-NEXT:    scratch_store_b32 off, v22, s33 offset:92
-; GISEL64-NEXT:    scratch_store_b32 off, v23, s33 offset:96
-; GISEL64-NEXT:    scratch_store_b32 off, v24, s33 offset:100
-; GISEL64-NEXT:    scratch_store_b32 off, v25, s33 offset:104
-; GISEL64-NEXT:    scratch_store_b32 off, v26, s33 offset:108
-; GISEL64-NEXT:    scratch_store_b32 off, v27, s33 offset:112
-; GISEL64-NEXT:    scratch_store_b32 off, v28, s33 offset:116
-; GISEL64-NEXT:    scratch_store_b32 off, v29, s33 offset:120
-; GISEL64-NEXT:    scratch_store_b32 off, v30, s33 offset:124
-; GISEL64-NEXT:    scratch_store_b32 off, v31, s33 offset:128
-; GISEL64-NEXT:    s_clause 0x1f
-; GISEL64-NEXT:    scratch_store_b32 off, v32, s33 offset:132
-; GISEL64-NEXT:    scratch_store_b32 off, v33, s33 offset:136
-; GISEL64-NEXT:    scratch_store_b32 off, v34, s33 offset:140
-; GISEL64-NEXT:    scratch_store_b32 off, v35, s33 offset:144
-; GISEL64-NEXT:    scratch_store_b32 off, v36, s33 offset:148
-; GISEL64-NEXT:    scratch_store_b32 off, v37, s33 offset:152
-; GISEL64-NEXT:    scratch_store_b32 off, v38, s33 offset:156
-; GISEL64-NEXT:    scratch_store_b32 off, v39, s33 offset:160
-; GISEL64-NEXT:    scratch_store_b32 off, v48, s33 offset:172
-; GISEL64-NEXT:    scratch_store_b32 off, v49, s33 offset:176
-; GISEL64-NEXT:    scratch_store_b32 off, v50, s33 offset:180
-; GISEL64-NEXT:    scratch_store_b32 off, v51, s33 offset:184
-; GISEL64-NEXT:    scratch_store_b32 off, v52, s33 offset:188
-; GISEL64-NEXT:    scratch_store_b32 off, v53, s33 offset:192
-; GISEL64-NEXT:    scratch_store_b32 off, v54, s33 offset:196
-; GISEL64-NEXT:    scratch_store_b32 off, v55, s33 offset:200
-; GISEL64-NEXT:    scratch_store_b32 off, v64, s33 offset:204
-; GISEL64-NEXT:    scratch_store_b32 off, v65, s33 offset:208
-; GISEL64-NEXT:    scratch_store_b32 off, v66, s33 offset:212
-; GISEL64-NEXT:    scratch_store_b32 off, v67, s33 offset:216
-; GISEL64-NEXT:    scratch_store_b32 off, v68, s33 offset:220
-; GISEL64-NEXT:    scratch_store_b32 off, v69, s33 offset:224
-; GISEL64-NEXT:    scratch_store_b32 off, v70, s33 offset:228
-; GISEL64-NEXT:    scratch_store_b32 off, v71, s33 offset:232
-; GISEL64-NEXT:    scratch_store_b32 off, v80, s33 offset:236
-; GISEL64-NEXT:    scratch_store_b32 off, v81, s33 offset:240
-; GISEL64-NEXT:    scratch_store_b32 off, v82, s33 offset:244
-; GISEL64-NEXT:    scratch_store_b32 off, v83, s33 offset:248
-; GISEL64-NEXT:    scratch_store_b32 off, v84, s33 offset:252
-; GISEL64-NEXT:    scratch_store_b32 off, v85, s33 offset:256
-; GISEL64-NEXT:    scratch_store_b32 off, v86, s33 offset:260
-; GISEL64-NEXT:    scratch_store_b32 off, v87, s33 offset:264
-; GISEL64-NEXT:    s_clause 0x1f
-; GISEL64-NEXT:    scratch_store_b32 off, v96, s33 offset:268
-; GISEL64-NEXT:    scratch_store_b32 off, v97, s33 offset:272
-; GISEL64-NEXT:    scratch_store_b32 off, v98, s33 offset:276
-; GISEL64-NEXT:    scratch_store_b32 off, v99, s33 offset:280
-; GISEL64-NEXT:    scratch_store_b32 off, v100, s33 offset:284
-; GISEL64-NEXT:    scratch_store_b32 off, v101, s33 offset:288
-; GISEL64-NEXT:    scratch_store_b32 off, v102, s33 offset:292
-; GISEL64-NEXT:    scratch_store_b32 off, v103, s33 offset:296
-; GISEL64-NEXT:    scratch_store_b32 off, v112, s33 offset:300
-; GISEL64-NEXT:    scratch_store_b32 off, v113, s33 offset:304
-; GISEL64-NEXT:    scratch_store_b32 off, v114, s33 offset:308
-; GISEL64-NEXT:    scratch_store_b32 off, v115, s33 offset:312
-; GISEL64-NEXT:    scratch_store_b32 off, v116, s33 offset:316
-; GISEL64-NEXT:    scratch_store_b32 off, v117, s33 offset:320
-; GISEL64-NEXT:    scratch_store_b32 off, v118, s33 offset:324
-; GISEL64-NEXT:    scratch_store_b32 off, v119, s33 offset:328
-; GISEL64-NEXT:    scratch_store_b32 off, v128, s33 offset:332
-; GISEL64-NEXT:    scratch_store_b32 off, v129, s33 offset:336
-; GISEL64-NEXT:    scratch_store_b32 off, v130, s33 offset:340
-; GISEL64-NEXT:    scratch_store_b32 off, v131, s33 offset:344
-; GISEL64-NEXT:    scratch_store_b32 off, v132, s33 offset:348
-; GISEL64-NEXT:    scratch_store_b32 off, v133, s33 offset:352
-; GISEL64-NEXT:    scratch_store_b32 off, v134, s33 offset:356
-; GISEL64-NEXT:    scratch_store_b32 off, v135, s33 offset:360
-; GISEL64-NEXT:    scratch_store_b32 off, v144, s33 offset:364
-; GISEL64-NEXT:    scratch_store_b32 off, v145, s33 offset:368
-; GISEL64-NEXT:    scratch_store_b32 off, v146, s33 offset:372
-; GISEL64-NEXT:    scratch_store_b32 off, v147, s33 offset:376
-; GISEL64-NEXT:    scratch_store_b32 off, v148, s33 offset:380
-; GISEL64-NEXT:    scratch_store_b32 off, v149, s33 offset:384
-; GISEL64-NEXT:    scratch_store_b32 off, v150, s33 offset:388
-; GISEL64-NEXT:    scratch_store_b32 off, v151, s33 offset:392
-; GISEL64-NEXT:    s_clause 0x1f
-; GISEL64-NEXT:    scratch_store_b32 off, v160, s33 offset:396
-; GISEL64-NEXT:    scratch_store_b32 off, v161, s33 offset:400
-; GISEL64-NEXT:    scratch_store_b32 off, v162, s33 offset:404
-; GISEL64-NEXT:    scratch_store_b32 off, v163, s33 offset:408
-; GISEL64-NEXT:    scratch_store_b32 off, v164, s33 offset:412
-; GISEL64-NEXT:    scratch_store_b32 off, v165, s33 offset:416
-; GISEL64-NEXT:    scratch_store_b32 off, v166, s33 offset:420
-; GISEL64-NEXT:    scratch_store_b32 off, v167, s33 offset:424
-; GISEL64-NEXT:    scratch_store_b32 off, v176, s33 offset:428
-; GISEL64-NEXT:    scratch_store_b32 off, v177, s33 offset:432
-; GISEL64-NEXT:    scratch_store_b32 off, v178, s33 offset:436
-; GISEL64-NEXT:    scratch_store_b32 off, v179, s33 offset:440
-; GISEL64-NEXT:    scratch_store_b32 off, v180, s33 offset:444
-; GISEL64-NEXT:    scratch_store_b32 off, v181, s33 offset:448
-; GISEL64-NEXT:    scratch_store_b32 off, v182, s33 offset:452
-; GISEL64-NEXT:    scratch_store_b32 off, v183, s33 offset:456
-; GISEL64-NEXT:    scratch_store_b32 off, v192, s33 offset:460
-; GISEL64-NEXT:    scratch_store_b32 off, v193, s33 offset:464
-; GISEL64-NEXT:    scratch_store_b32 off, v194, s33 offset:468
-; GISEL64-NEXT:    scratch_store_b32 off, v195, s33 offset:472
-; GISEL64-NEXT:    scratch_store_b32 off, v196, s33 offset:476
-; GISEL64-NEXT:    scratch_store_b32 off, v197, s33 offset:480
-; GISEL64-NEXT:    scratch_store_b32 off, v198, s33 offset:484
-; GISEL64-NEXT:    scratch_store_b32 off, v199, s33 offset:488
-; GISEL64-NEXT:    scratch_store_b32 off, v208, s33 offset:492
-; GISEL64-NEXT:    scratch_store_b32 off, v209, s33 offset:496
-; GISEL64-NEXT:    scratch_store_b32 off, v210, s33 offset:500
-; GISEL64-NEXT:    scratch_store_b32 off, v211, s33 offset:504
-; GISEL64-NEXT:    scratch_store_b32 off, v212, s33 offset:508
-; GISEL64-NEXT:    scratch_store_b32 off, v213, s33 offset:512
-; GISEL64-NEXT:    scratch_store_b32 off, v214, s33 offset:516
-; GISEL64-NEXT:    scratch_store_b32 off, v215, s33 offset:520
-; GISEL64-NEXT:    s_clause 0xf
-; GISEL64-NEXT:    scratch_store_b32 off, v224, s33 offset:524
-; GISEL64-NEXT:    scratch_store_b32 off, v225, s33 offset:528
-; GISEL64-NEXT:    scratch_store_b32 off, v226, s33 offset:532
-; GISEL64-NEXT:    scratch_store_b32 off, v227, s33 offset:536
-; GISEL64-NEXT:    scratch_store_b32 off, v228, s33 offset:540
-; GISEL64-NEXT:    scratch_store_b32 off, v229, s33 offset:544
-; GISEL64-NEXT:    scratch_store_b32 off, v230, s33 offset:548
-; GISEL64-NEXT:    scratch_store_b32 off, v231, s33 offset:552
-; GISEL64-NEXT:    scratch_store_b32 off, v240, s33 offset:556
-; GISEL64-NEXT:    scratch_store_b32 off, v241, s33 offset:560
-; GISEL64-NEXT:    scratch_store_b32 off, v242, s33 offset:564
-; GISEL64-NEXT:    scratch_store_b32 off, v243, s33 offset:568
-; GISEL64-NEXT:    scratch_store_b32 off, v244, s33 offset:572
-; GISEL64-NEXT:    scratch_store_b32 off, v245, s33 offset:576
-; GISEL64-NEXT:    scratch_store_b32 off, v246, s33 offset:580
-; GISEL64-NEXT:    scratch_store_b32 off, v247, s33 offset:584
-; GISEL64-NEXT:    s_mov_b64 exec, -1
-; GISEL64-NEXT:    s_clause 0x2
-; GISEL64-NEXT:    scratch_store_b32 off, v42, s33
-; GISEL64-NEXT:    scratch_store_b32 off, v40, s33 offset:164
-; GISEL64-NEXT:    scratch_store_b32 off, v41, s33 offset:168
-; GISEL64-NEXT:    s_wait_alu 0xfffe
-; GISEL64-NEXT:    v_writelane_b32 v42, s0, 4
-; GISEL64-NEXT:    s_mov_b32 s0, callee at abs32@lo
-; GISEL64-NEXT:    s_mov_b32 s1, callee at abs32@hi
-; GISEL64-NEXT:    s_addk_co_i32 s32, 0x250
-; GISEL64-NEXT:    v_mov_b32_e32 v40, v8
-; GISEL64-NEXT:    v_writelane_b32 v42, s4, 0
-; GISEL64-NEXT:    v_mov_b32_e32 v41, v9
-; GISEL64-NEXT:    v_writelane_b32 v42, s5, 1
-; GISEL64-NEXT:    v_writelane_b32 v42, s30, 2
-; GISEL64-NEXT:    v_writelane_b32 v42, s31, 3
-; GISEL64-NEXT:    s_wait_alu 0xfffe
-; GISEL64-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-; GISEL64-NEXT:    flat_store_b32 v[40:41], v0
-; GISEL64-NEXT:    v_readlane_b32 s31, v42, 3
-; GISEL64-NEXT:    v_readlane_b32 s30, v42, 2
-; GISEL64-NEXT:    v_readlane_b32 s5, v42, 1
-; GISEL64-NEXT:    v_readlane_b32 s4, v42, 0
-; GISEL64-NEXT:    v_readlane_b32 s0, v42, 4
-; GISEL64-NEXT:    s_clause 0x2
-; GISEL64-NEXT:    scratch_load_b32 v42, off, s33
-; GISEL64-NEXT:    scratch_load_b32 v40, off, s33 offset:164
-; GISEL64-NEXT:    scratch_load_b32 v41, off, s33 offset:168
-; GISEL64-NEXT:    s_mov_b32 s32, s33
-; GISEL64-NEXT:    s_xor_b64 exec, s[4:5], -1
-; GISEL64-NEXT:    s_clause 0x1f
-; GISEL64-NEXT:    scratch_load_b32 v0, off, s33 offset:4
-; GISEL64-NEXT:    scratch_load_b32 v1, off, s33 offset:8
-; GISEL64-NEXT:    scratch_load_b32 v2, off, s33 offset:12
-; GISEL64-NEXT:    scratch_load_b32 v3, off, s33 offset:16
-; GISEL64-NEXT:    scratch_load_b32 v4, off, s33 offset:20
-; GISEL64-NEXT:    scratch_load_b32 v5, off, s33 offset:24
-; GISEL64-NEXT:    scratch_load_b32 v6, off, s33 offset:28
-; GISEL64-NEXT:    scratch_load_b32 v7, off, s33 offset:32
-; GISEL64-NEXT:    scratch_load_b32 v8, off, s33 offset:36
-; GISEL64-NEXT:    scratch_load_b32 v9, off, s33 offset:40
-; GISEL64-NEXT:    scratch_load_b32 v10, off, s33 offset:44
-; GISEL64-NEXT:    scratch_load_b32 v11, off, s33 offset:48
-; GISEL64-NEXT:    scratch_load_b32 v12, off, s33 offset:52
-; GISEL64-NEXT:    scratch_load_b32 v13, off, s33 offset:56
-; GISEL64-NEXT:    scratch_load_b32 v14, off, s33 offset:60
-; GISEL64-NEXT:    scratch_load_b32 v15, off, s33 offset:64
-; GISEL64-NEXT:    scratch_load_b32 v16, off, s33 offset:68
-; GISEL64-NEXT:    scratch_load_b32 v17, off, s33 offset:72
-; GISEL64-NEXT:    scratch_load_b32 v18, off, s33 offset:76
-; GISEL64-NEXT:    scratch_load_b32 v19, off, s33 offset:80
-; GISEL64-NEXT:    scratch_load_b32 v20, off, s33 offset:84
-; GISEL64-NEXT:    scratch_load_b32 v21, off, s33 offset:88
-; GISEL64-NEXT:    scratch_load_b32 v22, off, s33 offset:92
-; GISEL64-NEXT:    scratch_load_b32 v23, off, s33 offset:96
-; GISEL64-NEXT:    scratch_load_b32 v24, off, s33 offset:100
-; GISEL64-NEXT:    scratch_load_b32 v25, off, s33 offset:104
-; GISEL64-NEXT:    scratch_load_b32 v26, off, s33 offset:108
-; GISEL64-NEXT:    scratch_load_b32 v27, off, s33 offset:112
-; GISEL64-NEXT:    scratch_load_b32 v28, off, s33 offset:116
-; GISEL64-NEXT:    scratch_load_b32 v29, off, s33 offset:120
-; GISEL64-NEXT:    scratch_load_b32 v30, off, s33 offset:124
-; GISEL64-NEXT:    scratch_load_b32 v31, off, s33 offset:128
-; GISEL64-NEXT:    s_clause 0x1f
-; GISEL64-NEXT:    scratch_load_b32 v32, off, s33 offset:132
-; GISEL64-NEXT:    scratch_load_b32 v33, off, s33 offset:136
-; GISEL64-NEXT:    scratch_load_b32 v34, off, s33 offset:140
-; GISEL64-NEXT:    scratch_load_b32 v35, off, s33 offset:144
-; GISEL64-NEXT:    scratch_load_b32 v36, off, s33 offset:148
-; GISEL64-NEXT:    scratch_load_b32 v37, off, s33 offset:152
-; GISEL64-NEXT:    scratch_load_b32 v38, off, s33 offset:156
-; GISEL64-NEXT:    scratch_load_b32 v39, off, s33 offset:160
-; GISEL64-NEXT:    scratch_load_b32 v48, off, s33 offset:172
-; GISEL64-NEXT:    scratch_load_b32 v49, off, s33 offset:176
-; GISEL64-NEXT:    scratch_load_b32 v50, off, s33 offset:180
-; GISEL64-NEXT:    scratch_load_b32 v51, off, s33 offset:184
-; GISEL64-NEXT:    scratch_load_b32 v52, off, s33 offset:188
-; GISEL64-NEXT:    scratch_load_b32 v53, off, s33 offset:192
-; GISEL64-NEXT:    scratch_load_b32 v54, off, s33 offset:196
-; GISEL64-NEXT:    scratch_load_b32 v55, off, s33 offset:200
-; GISEL64-NEXT:    scratch_load_b32 v64, off, s33 offset:204
-; GISEL64-NEXT:    scratch_load_b32 v65, off, s33 offset:208
-; GISEL64-NEXT:    scratch_load_b32 v66, off, s33 offset:212
-; GISEL64-NEXT:    scratch_load_b32 v67, off, s33 offset:216
-; GISEL64-NEXT:    scratch_load_b32 v68, off, s33 offset:220
-; GISEL64-NEXT:    scratch_load_b32 v69, off, s33 offset:224
-; GISEL64-NEXT:    scratch_load_b32 v70, off, s33 offset:228
-; GISEL64-NEXT:    scratch_load_b32 v71, off, s33 offset:232
-; GISEL64-NEXT:    scratch_load_b32 v80, off, s33 offset:236
-; GISEL64-NEXT:    scratch_load_b32 v81, off, s33 offset:240
-; GISEL64-NEXT:    scratch_load_b32 v82, off, s33 offset:244
-; GISEL64-NEXT:    scratch_load_b32 v83, off, s33 offset:248
-; GISEL64-NEXT:    scratch_load_b32 v84, off, s33 offset:252
-; GISEL64-NEXT:    scratch_load_b32 v85, off, s33 offset:256
-; GISEL64-NEXT:    scratch_load_b32 v86, off, s33 offset:260
-; GISEL64-NEXT:    scratch_load_b32 v87, off, s33 offset:264
-; GISEL64-NEXT:    s_clause 0x1f
-; GISEL64-NEXT:    scratch_load_b32 v96, off, s33 offset:268
-; GISEL64-NEXT:    scratch_load_b32 v97, off, s33 offset:272
-; GISEL64-NEXT:    scratch_load_b32 v98, off, s33 offset:276
-; GISEL64-NEXT:    scratch_load_b32 v99, off, s33 offset:280
-; GISEL64-NEXT:    scratch_load_b32 v100, off, s33 offset:284
-; GISEL64-NEXT:    scratch_load_b32 v101, off, s33 offset:288
-; GISEL64-NEXT:    scratch_load_b32 v102, off, s33 offset:292
-; GISEL64-NEXT:    scratch_load_b32 v103, off, s33 offset:296
-; GISEL64-NEXT:    scratch_load_b32 v112, off, s33 offset:300
-; GISEL64-NEXT:    scratch_load_b32 v113, off, s33 offset:304
-; GISEL64-NEXT:    scratch_load_b32 v114, off, s33 offset:308
-; GISEL64-NEXT:    scratch_load_b32 v115, off, s33 offset:312
-; GISEL64-NEXT:    scratch_load_b32 v116, off, s33 offset:316
-; GISEL64-NEXT:    scratch_load_b32 v117, off, s33 offset:320
-; GISEL64-NEXT:    scratch_load_b32 v118, off, s33 offset:324
-; GISEL64-NEXT:    scratch_load_b32 v119, off, s33 offset:328
-; GISEL64-NEXT:    scratch_load_b32 v128, off, s33 offset:332
-; GISEL64-NEXT:    scratch_load_b32 v129, off, s33 offset:336
-; GISEL64-NEXT:    scratch_load_b32 v130, off, s33 offset:340
-; GISEL64-NEXT:    scratch_load_b32 v131, off, s33 offset:344
-; GISEL64-NEXT:    scratch_load_b32 v132, off, s33 offset:348
-; GISEL64-NEXT:    scratch_load_b32 v133, off, s33 offset:352
-; GISEL64-NEXT:    scratch_load_b32 v134, off, s33 offset:356
-; GISEL64-NEXT:    scratch_load_b32 v135, off, s33 offset:360
-; GISEL64-NEXT:    scratch_load_b32 v144, off, s33 offset:364
-; GISEL64-NEXT:    scratch_load_b32 v145, off, s33 offset:368
-; GISEL64-NEXT:    scratch_load_b32 v146, off, s33 offset:372
-; GISEL64-NEXT:    scratch_load_b32 v147, off, s33 offset:376
-; GISEL64-NEXT:    scratch_load_b32 v148, off, s33 offset:380
-; GISEL64-NEXT:    scratch_load_b32 v149, off, s33 offset:384
-; GISEL64-NEXT:    scratch_load_b32 v150, off, s33 offset:388
-; GISEL64-NEXT:    scratch_load_b32 v151, off, s33 offset:392
-; GISEL64-NEXT:    s_clause 0x1f
-; GISEL64-NEXT:    scratch_load_b32 v160, off, s33 offset:396
-; GISEL64-NEXT:    scratch_load_b32 v161, off, s33 offset:400
-; GISEL64-NEXT:    scratch_load_b32 v162, off, s33 offset:404
-; GISEL64-NEXT:    scratch_load_b32 v163, off, s33 offset:408
-; GISEL64-NEXT:    scratch_load_b32 v164, off, s33 offset:412
-; GISEL64-NEXT:    scratch_load_b32 v165, off, s33 offset:416
-; GISEL64-NEXT:    scratch_load_b32 v166, off, s33 offset:420
-; GISEL64-NEXT:    scratch_load_b32 v167, off, s33 offset:424
-; GISEL64-NEXT:    scratch_load_b32 v176, off, s33 offset:428
-; GISEL64-NEXT:    scratch_load_b32 v177, off, s33 offset:432
-; GISEL64-NEXT:    scratch_load_b32 v178, off, s33 offset:436
-; GISEL64-NEXT:    scratch_load_b32 v179, off, s33 offset:440
-; GISEL64-NEXT:    scratch_load_b32 v180, off, s33 offset:444
-; GISEL64-NEXT:    scratch_load_b32 v181, off, s33 offset:448
-; GISEL64-NEXT:    scratch_load_b32 v182, off, s33 offset:452
-; GISEL64-NEXT:    scratch_load_b32 v183, off, s33 offset:456
-; GISEL64-NEXT:    scratch_load_b32 v192, off, s33 offset:460
-; GISEL64-NEXT:    scratch_load_b32 v193, off, s33 offset:464
-; GISEL64-NEXT:    scratch_load_b32 v194, off, s33 offset:468
-; GISEL64-NEXT:    scratch_load_b32 v195, off, s33 offset:472
-; GISEL64-NEXT:    scratch_load_b32 v196, off, s33 offset:476
-; GISEL64-NEXT:    scratch_load_b32 v197, off, s33 offset:480
-; GISEL64-NEXT:    scratch_load_b32 v198, off, s33 offset:484
-; GISEL64-NEXT:    scratch_load_b32 v199, off, s33 offset:488
-; GISEL64-NEXT:    scratch_load_b32 v208, off, s33 offset:492
-; GISEL64-NEXT:    scratch_load_b32 v209, off, s33 offset:496
-; GISEL64-NEXT:    scratch_load_b32 v210, off, s33 offset:500
-; GISEL64-NEXT:    scratch_load_b32 v211, off, s33 offset:504
-; GISEL64-NEXT:    scratch_load_b32 v212, off, s33 offset:508
-; GISEL64-NEXT:    scratch_load_b32 v213, off, s33 offset:512
-; GISEL64-NEXT:    scratch_load_b32 v214, off, s33 offset:516
-; GISEL64-NEXT:    scratch_load_b32 v215, off, s33 offset:520
-; GISEL64-NEXT:    s_clause 0xf
-; GISEL64-NEXT:    scratch_load_b32 v224, off, s33 offset:524
-; GISEL64-NEXT:    scratch_load_b32 v225, off, s33 offset:528
-; GISEL64-NEXT:    scratch_load_b32 v226, off, s33 offset:532
-; GISEL64-NEXT:    scratch_load_b32 v227, off, s33 offset:536
-; GISEL64-NEXT:    scratch_load_b32 v228, off, s33 offset:540
-; GISEL64-NEXT:    scratch_load_b32 v229, off, s33 offset:544
-; GISEL64-NEXT:    scratch_load_b32 v230, off, s33 offset:548
-; GISEL64-NEXT:    scratch_load_b32 v231, off, s33 offset:552
-; GISEL64-NEXT:    scratch_load_b32 v240, off, s33 offset:556
-; GISEL64-NEXT:    scratch_load_b32 v241, off, s33 offset:560
-; GISEL64-NEXT:    scratch_load_b32 v242, off, s33 offset:564
-; GISEL64-NEXT:    scratch_load_b32 v243, off, s33 offset:568
-; GISEL64-NEXT:    scratch_load_b32 v244, off, s33 offset:572
-; GISEL64-NEXT:    scratch_load_b32 v245, off, s33 offset:576
-; GISEL64-NEXT:    scratch_load_b32 v246, off, s33 offset:580
-; GISEL64-NEXT:    scratch_load_b32 v247, off, s33 offset:584
-; GISEL64-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL64-NEXT:    s_mov_b32 s33, s0
-; GISEL64-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GISEL64-NEXT:    s_wait_alu 0xfffe
-; GISEL64-NEXT:    s_setpc_b64 s[30:31]
-  %ret = call float(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @callee, <8 x float> %x) convergent
-  store float %ret, ptr %p
-  ret void
-}

diff  --git a/llvm/test/Verifier/AMDGPU/intrinsic-amdgcn-call-whole-wave.ll b/llvm/test/Verifier/AMDGPU/intrinsic-amdgcn-call-whole-wave.ll
deleted file mode 100644
index a744bf318be9a..0000000000000
--- a/llvm/test/Verifier/AMDGPU/intrinsic-amdgcn-call-whole-wave.ll
+++ /dev/null
@@ -1,46 +0,0 @@
-; RUN: not llvm-as %s -disable-output 2>&1 | FileCheck %s
-
-define amdgpu_cs void @indirect(ptr %fn, i32 %x) {
-  ; CHECK: Indirect whole wave calls are not allowed
-  %whatever = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr %fn, i32 %x)
-  ret void
-}
-
-declare amdgpu_gfx_whole_wave void @variadic_callee(i1 %active, i32 %x, ...)
-
-define amdgpu_cs void @variadic(ptr %fn, i32 %x) {
-  ; CHECK: Variadic whole wave calls are not allowed
-  %whatever = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @variadic_callee, i32 %x)
-  ret void
-}
-
-declare amdgpu_gfx void @bad_cc_callee(i1 %active, i32 %x)
-
-define amdgpu_cs void @bad_cc(i32 %x) {
-  ; CHECK: Callee must have the amdgpu_gfx_whole_wave calling convention
-  %whatever = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @bad_cc_callee, i32 %x)
-  ret void
-}
-
-declare amdgpu_gfx_whole_wave i32 @no_i1_callee(i32 %active, i32 %y, i32 %z)
-
-define amdgpu_cs void @no_i1(i32 %x) {
-  ; CHECK: Callee must have i1 as its first argument
-  %whatever = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @no_i1_callee, i32 %x, i32 0)
-  ret void
-}
-
-declare amdgpu_gfx_whole_wave i32 @good_callee(i1 %active, i32 %x, i32 inreg %y)
-
-define amdgpu_cs void @bad_args(i32 %x) {
-  ; CHECK: Call argument count must match callee argument count
-  %whatever.0 = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @good_callee, i32 %x)
-
-  ; CHECK: Argument types must match
-  %whatever.1 = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @good_callee, i32 %x, i64 inreg 0)
-
-  ; CHECK: Argument inreg attributes must match
-  %whatever.2 = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr @good_callee, i32 %x, i32 0)
-
-  ret void
-}


        


More information about the llvm-branch-commits mailing list