[llvm] ac0f64f - [AMDGPU] Split vgpr regalloc pipeline (#93526)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 30 07:25:52 PDT 2024
Author: Christudasan Devadasan
Date: 2024-09-30T19:55:42+05:30
New Revision: ac0f64f06d67a93817ccd9a3c529ad40920115c9
URL: https://github.com/llvm/llvm-project/commit/ac0f64f06d67a93817ccd9a3c529ad40920115c9
DIFF: https://github.com/llvm/llvm-project/commit/ac0f64f06d67a93817ccd9a3c529ad40920115c9.diff
LOG: [AMDGPU] Split vgpr regalloc pipeline (#93526)
Allocating wwm-registers and per-thread VGPR operands
together imposes many challenges in the way the
registers are reused during allocation. There are
times when regalloc reuses the registers of regular
VGPRs operations for wwm-operations in a small range
leading to unwantedly clobbering their inactive lanes
causing correctness issues that are hard to trace.
This patch splits the VGPR allocation pipeline further
to allocate wwm-registers first and the regular VGPR
operands in a separate pipeline. The splitting would
ensure that the physical registers used for wwm
allocations won't take part in the next allocation
pipeline to avoid any such clobbering.
Added:
llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp
llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll
Modified:
llvm/include/llvm/CodeGen/MachineRegisterInfo.h
llvm/lib/Target/AMDGPU/AMDGPU.h
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/lib/Target/AMDGPU/CMakeLists.txt
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.h
llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir
llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir
llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll
llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
llvm/test/CodeGen/AMDGPU/div_i128.ll
llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir
llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir
llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir
llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir
llvm/test/CodeGen/AMDGPU/frame-index.mir
llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir
llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir
llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir
llvm/test/CodeGen/AMDGPU/pr51516.mir
llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir
llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir
llvm/test/CodeGen/AMDGPU/rem_i128.ll
llvm/test/CodeGen/AMDGPU/remat-vop.mir
llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll
llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir
llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll
llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
llvm/test/CodeGen/AMDGPU/sgpr-spills-empty-prolog-block.mir
llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir
llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir
llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll
llvm/test/CodeGen/AMDGPU/spill192.mir
llvm/test/CodeGen/AMDGPU/spill224.mir
llvm/test/CodeGen/AMDGPU/spill288.mir
llvm/test/CodeGen/AMDGPU/spill320.mir
llvm/test/CodeGen/AMDGPU/spill352.mir
llvm/test/CodeGen/AMDGPU/spill384.mir
llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll
llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir
llvm/test/CodeGen/AMDGPU/trap-abis.ll
llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir
llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll
llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll
llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
Removed:
llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
################################################################################
diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index 2367d8d04787d9..7a2c23c13a3ce6 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -184,6 +184,8 @@ class MachineRegisterInfo {
TheDelegate->MRI_NoteCloneVirtualRegister(NewReg, SrcReg);
}
+ const MachineFunction &getMF() const { return *MF; }
+
//===--------------------------------------------------------------------===//
// Function State
//===--------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 4abb5a63ab6d2c..342d55e828bca5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -57,6 +57,7 @@ FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *);
ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
FunctionPass *createAMDGPUCodeGenPreparePass();
FunctionPass *createAMDGPULateCodeGenPrepareLegacyPass();
+FunctionPass *createAMDGPUReserveWWMRegsPass();
FunctionPass *createAMDGPURewriteOutArgumentsPass();
ModulePass *
createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr);
@@ -154,6 +155,9 @@ struct AMDGPULowerBufferFatPointersPass
const TargetMachine &TM;
};
+void initializeAMDGPUReserveWWMRegsPass(PassRegistry &);
+extern char &AMDGPUReserveWWMRegsID;
+
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
extern char &AMDGPURewriteOutArgumentsID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp
new file mode 100644
index 00000000000000..7dc492a8f7adfd
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp
@@ -0,0 +1,96 @@
+//===-- AMDGPUReserveWWMRegs.cpp - Add WWM Regs to reserved regs list -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass should be invoked at the end of wwm-regalloc pipeline.
+/// It identifies the WWM regs allocated during this pipeline and add
+/// them to the list of reserved registers so that they won't be available for
+/// per-thread VGPR allocation in the subsequent regalloc pipeline.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-reserve-wwm-regs"
+
+namespace {
+
+class AMDGPUReserveWWMRegs : public MachineFunctionPass {
+public:
+ static char ID;
+
+ AMDGPUReserveWWMRegs() : MachineFunctionPass(ID) {
+ initializeAMDGPUReserveWWMRegsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return "AMDGPU Reserve WWM Registers";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(AMDGPUReserveWWMRegs, DEBUG_TYPE,
+ "AMDGPU Reserve WWM Registers", false, false)
+
+char AMDGPUReserveWWMRegs::ID = 0;
+
+char &llvm::AMDGPUReserveWWMRegsID = AMDGPUReserveWWMRegs::ID;
+
+bool AMDGPUReserveWWMRegs::runOnMachineFunction(MachineFunction &MF) {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ unsigned Opc = MI.getOpcode();
+ if (Opc != AMDGPU::SI_SPILL_S32_TO_VGPR &&
+ Opc != AMDGPU::SI_RESTORE_S32_FROM_VGPR)
+ continue;
+
+ Register Reg = Opc == AMDGPU::SI_SPILL_S32_TO_VGPR
+ ? MI.getOperand(0).getReg()
+ : MI.getOperand(1).getReg();
+
+ assert(Reg.isPhysical() &&
+ "All WWM registers should have been allocated by now.");
+
+ MFI->reserveWWMRegister(Reg);
+ Changed |= true;
+ }
+ }
+
+ // The renamable flag can't be set for reserved registers. Reset the flag for
+ // MOs involving wwm-regs as they will be reserved during vgpr-regalloc
+ // pipeline.
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (Register Reg : MFI->getWWMReservedRegs()) {
+ for (MachineOperand &MO : MRI.reg_operands(Reg))
+ MO.setIsRenamable(false);
+ }
+
+ // Now clear the NonWWMRegMask earlier set during wwm-regalloc.
+ MFI->clearNonWWMRegAllocMask();
+
+ return Changed;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index ce3d70332d0a67..1f2148c2922de9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -108,6 +108,12 @@ class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
: RegisterRegAllocBase(N, D, C) {}
};
+class WWMRegisterRegAlloc : public RegisterRegAllocBase<WWMRegisterRegAlloc> {
+public:
+ WWMRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
+ : RegisterRegAllocBase(N, D, C) {}
+};
+
static bool onlyAllocateSGPRs(const TargetRegisterInfo &TRI,
const MachineRegisterInfo &MRI,
const Register Reg) {
@@ -122,13 +128,24 @@ static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC);
}
-/// -{sgpr|vgpr}-regalloc=... command line option.
+static bool onlyAllocateWWMRegs(const TargetRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI,
+ const Register Reg) {
+ const SIMachineFunctionInfo *MFI =
+ MRI.getMF().getInfo<SIMachineFunctionInfo>();
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC) &&
+ MFI->checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
+}
+
+/// -{sgpr|wwm|vgpr}-regalloc=... command line option.
static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
/// A dummy default pass factory indicates whether the register allocator is
/// overridden on the command line.
static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
+static llvm::once_flag InitializeDefaultWWMRegisterAllocatorFlag;
static SGPRRegisterRegAlloc
defaultSGPRRegAlloc("default",
@@ -145,6 +162,11 @@ static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
VGPRRegAlloc("vgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
cl::desc("Register allocator to use for VGPRs"));
+static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false,
+ RegisterPassParser<WWMRegisterRegAlloc>>
+ WWMRegAlloc("wwm-regalloc", cl::Hidden,
+ cl::init(&useDefaultRegisterAllocator),
+ cl::desc("Register allocator to use for WWM registers"));
static void initializeDefaultSGPRRegisterAllocatorOnce() {
RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
@@ -164,6 +186,15 @@ static void initializeDefaultVGPRRegisterAllocatorOnce() {
}
}
+static void initializeDefaultWWMRegisterAllocatorOnce() {
+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault();
+
+ if (!Ctor) {
+ Ctor = WWMRegAlloc;
+ WWMRegisterRegAlloc::setDefault(WWMRegAlloc);
+ }
+}
+
static FunctionPass *createBasicSGPRRegisterAllocator() {
return createBasicRegisterAllocator(onlyAllocateSGPRs);
}
@@ -188,6 +219,18 @@ static FunctionPass *createFastVGPRRegisterAllocator() {
return createFastRegisterAllocator(onlyAllocateVGPRs, true);
}
+static FunctionPass *createBasicWWMRegisterAllocator() {
+ return createBasicRegisterAllocator(onlyAllocateWWMRegs);
+}
+
+static FunctionPass *createGreedyWWMRegisterAllocator() {
+ return createGreedyRegisterAllocator(onlyAllocateWWMRegs);
+}
+
+static FunctionPass *createFastWWMRegisterAllocator() {
+ return createFastRegisterAllocator(onlyAllocateWWMRegs, false);
+}
+
static SGPRRegisterRegAlloc basicRegAllocSGPR(
"basic", "basic register allocator", createBasicSGPRRegisterAllocator);
static SGPRRegisterRegAlloc greedyRegAllocSGPR(
@@ -204,6 +247,14 @@ static VGPRRegisterRegAlloc greedyRegAllocVGPR(
static VGPRRegisterRegAlloc fastRegAllocVGPR(
"fast", "fast register allocator", createFastVGPRRegisterAllocator);
+static WWMRegisterRegAlloc basicRegAllocWWMReg("basic",
+ "basic register allocator",
+ createBasicWWMRegisterAllocator);
+static WWMRegisterRegAlloc
+ greedyRegAllocWWMReg("greedy", "greedy register allocator",
+ createGreedyWWMRegisterAllocator);
+static WWMRegisterRegAlloc fastRegAllocWWMReg("fast", "fast register allocator",
+ createFastWWMRegisterAllocator);
} // anonymous namespace
static cl::opt<bool>
@@ -440,6 +491,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPURemoveIncompatibleFunctionsPass(*PR);
initializeAMDGPULowerModuleLDSLegacyPass(*PR);
initializeAMDGPULowerBufferFatPointersPass(*PR);
+ initializeAMDGPUReserveWWMRegsPass(*PR);
initializeAMDGPURewriteOutArgumentsPass(*PR);
initializeAMDGPURewriteUndefForPHILegacyPass(*PR);
initializeAMDGPUUnifyMetadataPass(*PR);
@@ -989,6 +1041,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
FunctionPass *createSGPRAllocPass(bool Optimized);
FunctionPass *createVGPRAllocPass(bool Optimized);
+ FunctionPass *createWWMRegAllocPass(bool Optimized);
FunctionPass *createRegAllocPass(bool Optimized) override;
bool addRegAssignAndRewriteFast() override;
@@ -1382,7 +1435,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
}
bool GCNPassConfig::addPreRewrite() {
- addPass(&SILowerWWMCopiesID);
if (EnableRegReassign)
addPass(&GCNNSAReassignID);
return true;
@@ -1418,12 +1470,28 @@ FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
return createFastVGPRRegisterAllocator();
}
+FunctionPass *GCNPassConfig::createWWMRegAllocPass(bool Optimized) {
+ // Initialize the global default.
+ llvm::call_once(InitializeDefaultWWMRegisterAllocatorFlag,
+ initializeDefaultWWMRegisterAllocatorOnce);
+
+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault();
+ if (Ctor != useDefaultRegisterAllocator)
+ return Ctor();
+
+ if (Optimized)
+ return createGreedyWWMRegisterAllocator();
+
+ return createFastWWMRegisterAllocator();
+}
+
FunctionPass *GCNPassConfig::createRegAllocPass(bool Optimized) {
llvm_unreachable("should not be used");
}
static const char RegAllocOptNotSupportedMessage[] =
- "-regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc";
+ "-regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
+ "and -vgpr-regalloc";
bool GCNPassConfig::addRegAssignAndRewriteFast() {
if (!usingDefaultRegAlloc())
@@ -1435,11 +1503,19 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
// Equivalent of PEI for SGPRs.
addPass(&SILowerSGPRSpillsLegacyID);
+
+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
addPass(&SIPreAllocateWWMRegsID);
- addPass(createVGPRAllocPass(false));
+ // For allocating other wwm register operands.
+ addPass(createWWMRegAllocPass(false));
addPass(&SILowerWWMCopiesID);
+ addPass(&AMDGPUReserveWWMRegsID);
+
+ // For allocating per-thread VGPRs.
+ addPass(createVGPRAllocPass(false));
+
return true;
}
@@ -1459,8 +1535,17 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
// Equivalent of PEI for SGPRs.
addPass(&SILowerSGPRSpillsLegacyID);
+
+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
addPass(&SIPreAllocateWWMRegsID);
+ // For allocating other whole wave mode registers.
+ addPass(createWWMRegAllocPass(true));
+ addPass(&SILowerWWMCopiesID);
+ addPass(createVirtRegRewriter(false));
+ addPass(&AMDGPUReserveWWMRegsID);
+
+ // For allocating per-thread VGPRs.
addPass(createVGPRAllocPass(true));
addPreRewrite();
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 4605be344f7316..fed29c3e14aae2 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -95,6 +95,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPURegBankSelect.cpp
AMDGPURegisterBankInfo.cpp
AMDGPURemoveIncompatibleFunctions.cpp
+ AMDGPUReserveWWMRegs.cpp
AMDGPUResourceUsageAnalysis.cpp
AMDGPURewriteOutArguments.cpp
AMDGPURewriteUndefForPHI.cpp
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 07505110476b5d..3d1657392884f5 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1341,13 +1341,6 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
- // Allocate spill slots for WWM reserved VGPRs.
- for (Register Reg : FuncInfo->getWWMReservedRegs()) {
- const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
- FuncInfo->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),
- TRI->getSpillAlign(*RC));
- }
-
const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
&& EnableSpillVGPRToAGPR;
@@ -1573,11 +1566,7 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall())
return;
- MFI->shiftSpillPhysVGPRsToLowestRange(MF);
-
TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
- if (MFI->isEntryFunction())
- return;
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
@@ -1587,19 +1576,9 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
MachineInstr *ReturnMI = nullptr;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
- // WRITELANE instructions used for SGPR spills can overwrite the inactive
- // lanes of VGPRs and callee must spill and restore them even if they are
- // marked Caller-saved.
-
- // TODO: Handle this elsewhere at an early point. Walking through all MBBs
- // here would be a bad heuristic. A better way should be by calling
- // allocateWWMSpill during the regalloc pipeline whenever a physical
- // register is allocated for the intended virtual registers.
- if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR)
- MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg());
- else if (MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
- MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg());
- else if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
+ // TODO: Walking through all MBBs here would be a bad heuristic. Better
+ // handle them elsewhere.
+ if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
NeedExecCopyReservedReg = true;
else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
@@ -1614,6 +1593,23 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
+ SmallVector<Register> SortedWWMVGPRs;
+ for (Register Reg : MFI->getWWMReservedRegs()) {
+ // The shift-back is needed only for the VGPRs used for SGPR spills and they
+ // are of 32-bit size. SIPreAllocateWWMRegs pass can add tuples into WWM
+ // reserved registers.
+ const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
+ if (TRI->getRegSizeInBits(*RC) > 32)
+ continue;
+ SortedWWMVGPRs.push_back(Reg);
+ }
+
+ sort(SortedWWMVGPRs, std::greater<Register>());
+ MFI->shiftWwmVGPRsToLowestRange(MF, SortedWWMVGPRs, SavedVGPRs);
+
+ if (MFI->isEntryFunction())
+ return;
+
// Remove any VGPRs used in the return value because these do not need to be saved.
// This prevents CSR restore from clobbering return VGPRs.
if (ReturnMI) {
@@ -1623,6 +1619,13 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
+ // Create the stack objects for WWM registers now.
+ for (Register Reg : MFI->getWWMReservedRegs()) {
+ const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
+ MFI->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),
+ TRI->getSpillAlign(*RC));
+ }
+
// Ignore the SGPRs the default implementation found.
SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
@@ -1638,14 +1641,6 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
// allow the default insertion to handle them.
for (auto &Reg : MFI->getWWMSpills())
SavedVGPRs.reset(Reg.first);
-
- // Mark all lane VGPRs as BB LiveIns.
- for (MachineBasicBlock &MBB : MF) {
- for (auto &Reg : MFI->getWWMSpills())
- MBB.addLiveIn(Reg.first);
-
- MBB.sortUniqueLiveIns();
- }
}
void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 9afb29d95abd7d..8073aca7f197fb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -931,6 +931,7 @@ def SI_SPILL_S32_TO_VGPR : PseudoInstSI <(outs VGPR_32:$vdst),
let hasSideEffects = 0;
let mayLoad = 0;
let mayStore = 0;
+ let hasExtraDefRegAllocReq = 1;
let Constraints = "$vdst = $vdst_in";
}
@@ -941,6 +942,7 @@ def SI_RESTORE_S32_FROM_VGPR : PseudoInstSI <(outs SReg_32:$sdst),
let hasSideEffects = 0;
let mayLoad = 0;
let mayStore = 0;
+ let hasExtraSrcRegAllocReq = 1;
}
} // End Spill = 1, VALU = 1, isConvergent = 1
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index 35e5bea9ae16e2..822336ebaf5dc2 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -21,6 +21,7 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/InitializePasses.h"
@@ -33,12 +34,18 @@ using MBBVector = SmallVector<MachineBasicBlock *, 4>;
namespace {
+static cl::opt<unsigned> MaxNumVGPRsForWwmAllocation(
+ "amdgpu-num-vgprs-for-wwm-alloc",
+ cl::desc("Max num VGPRs for whole-wave register allocation."),
+ cl::ReallyHidden, cl::init(10));
+
class SILowerSGPRSpills {
private:
const SIRegisterInfo *TRI = nullptr;
const SIInstrInfo *TII = nullptr;
LiveIntervals *LIS = nullptr;
SlotIndexes *Indexes = nullptr;
+ MachineDominatorTree *MDT = nullptr;
// Save and Restore blocks of the current function. Typically there is a
// single save block, unless Windows EH funclets are involved.
@@ -46,13 +53,17 @@ class SILowerSGPRSpills {
MBBVector RestoreBlocks;
public:
- SILowerSGPRSpills(LiveIntervals *LIS, SlotIndexes *Indexes)
- : LIS(LIS), Indexes(Indexes) {}
+ SILowerSGPRSpills(LiveIntervals *LIS, SlotIndexes *Indexes,
+ MachineDominatorTree *MDT)
+ : LIS(LIS), Indexes(Indexes), MDT(MDT) {}
bool run(MachineFunction &MF);
void calculateSaveRestoreBlocks(MachineFunction &MF);
bool spillCalleeSavedRegs(MachineFunction &MF,
SmallVectorImpl<int> &CalleeSavedFIs);
- void extendWWMVirtRegLiveness(MachineFunction &MF, LiveIntervals *LIS);
+ void updateLaneVGPRDomInstr(
+ int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt,
+ DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr);
+ void determineRegsForWWMAllocation(MachineFunction &MF, BitVector &RegMask);
};
class SILowerSGPRSpillsLegacy : public MachineFunctionPass {
@@ -64,6 +75,7 @@ class SILowerSGPRSpillsLegacy : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTreeWrapperPass>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -84,6 +96,7 @@ INITIALIZE_PASS_BEGIN(SILowerSGPRSpillsLegacy, DEBUG_TYPE,
"SI lower SGPR spill instructions", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_END(SILowerSGPRSpillsLegacy, DEBUG_TYPE,
"SI lower SGPR spill instructions", false, false)
@@ -266,51 +279,90 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(
return false;
}
-void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF,
- LiveIntervals *LIS) {
- // TODO: This is a workaround to avoid the unmodelled liveness computed with
- // whole-wave virtual registers when allocated together with the regular VGPR
- // virtual registers. Presently, the liveness computed during the regalloc is
- // only uniform (or single lane aware) and it doesn't take account of the
- // divergent control flow that exists for our GPUs. Since the WWM registers
- // can modify inactive lanes, the wave-aware liveness should be computed for
- // the virtual registers to accurately plot their interferences. Without
- // having the divergent CFG for the function, it is
diff icult to implement the
- // wave-aware liveness info. Until then, we conservatively extend the liveness
- // of the wwm registers into the entire function so that they won't be reused
- // without first spilling/splitting their liveranges.
- SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-
- // Insert the IMPLICIT_DEF for the wwm-registers in the entry blocks.
- for (auto Reg : MFI->getSGPRSpillVGPRs()) {
- for (MachineBasicBlock *SaveBlock : SaveBlocks) {
- MachineBasicBlock::iterator InsertBefore = SaveBlock->begin();
- DebugLoc DL = SaveBlock->findDebugLoc(InsertBefore);
- auto MIB = BuildMI(*SaveBlock, InsertBefore, DL,
- TII->get(AMDGPU::IMPLICIT_DEF), Reg);
- MFI->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
- // Set SGPR_SPILL asm printer flag
- MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL);
- if (LIS) {
- LIS->InsertMachineInstrInMaps(*MIB);
+void SILowerSGPRSpills::updateLaneVGPRDomInstr(
+ int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt,
+ DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr) {
+ // For the Def of a virtual LaneVPGR to dominate all its uses, we should
+ // insert an IMPLICIT_DEF before the dominating spill. Switching to a
+ // depth first order doesn't really help since the machine function can be in
+ // the unstructured control flow post-SSA. For each virtual register, hence
+ // finding the common dominator to get either the dominating spill or a block
+ // dominating all spills.
+ SIMachineFunctionInfo *FuncInfo =
+ MBB->getParent()->getInfo<SIMachineFunctionInfo>();
+ ArrayRef<SIRegisterInfo::SpilledReg> VGPRSpills =
+ FuncInfo->getSGPRSpillToVirtualVGPRLanes(FI);
+ Register PrevLaneVGPR;
+ for (auto &Spill : VGPRSpills) {
+ if (PrevLaneVGPR == Spill.VGPR)
+ continue;
+
+ PrevLaneVGPR = Spill.VGPR;
+ auto I = LaneVGPRDomInstr.find(Spill.VGPR);
+ if (Spill.Lane == 0 && I == LaneVGPRDomInstr.end()) {
+ // Initially add the spill instruction itself for Insertion point.
+ LaneVGPRDomInstr[Spill.VGPR] = InsertPt;
+ } else {
+ assert(I != LaneVGPRDomInstr.end());
+ auto PrevInsertPt = I->second;
+ MachineBasicBlock *DomMBB = PrevInsertPt->getParent();
+ if (DomMBB == MBB) {
+ // The insertion point earlier selected in a predecessor block whose
+ // spills are currently being lowered. The earlier InsertPt would be
+ // the one just before the block terminator and it should be changed
+ // if we insert any new spill in it.
+ if (MDT->dominates(&*InsertPt, &*PrevInsertPt))
+ I->second = InsertPt;
+
+ continue;
}
+
+ // Find the common dominator block between PrevInsertPt and the
+ // current spill.
+ DomMBB = MDT->findNearestCommonDominator(DomMBB, MBB);
+ if (DomMBB == MBB)
+ I->second = InsertPt;
+ else if (DomMBB != PrevInsertPt->getParent())
+ I->second = &(*DomMBB->getFirstTerminator());
}
}
+}
- // Insert the KILL in the return blocks to extend their liveness untill the
- // end of function. Insert a separate KILL for each VGPR.
- for (MachineBasicBlock *RestoreBlock : RestoreBlocks) {
- MachineBasicBlock::iterator InsertBefore =
- RestoreBlock->getFirstTerminator();
- DebugLoc DL = RestoreBlock->findDebugLoc(InsertBefore);
- for (auto Reg : MFI->getSGPRSpillVGPRs()) {
- auto MIB = BuildMI(*RestoreBlock, InsertBefore, DL,
- TII->get(TargetOpcode::KILL));
- MIB.addReg(Reg);
- if (LIS)
- LIS->InsertMachineInstrInMaps(*MIB);
+void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF,
+ BitVector &RegMask) {
+ // Determine an optimal number of VGPRs for WWM allocation. The complement
+ // list will be available for allocating other VGPR virtual registers.
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ BitVector ReservedRegs = TRI->getReservedRegs(MF);
+ BitVector NonWwmAllocMask(TRI->getNumRegs());
+
+ // FIXME: MaxNumVGPRsForWwmAllocation might need to be adjusted in the future
+ // to have a balanced allocation between WWM values and per-thread vector
+ // register operands.
+ unsigned NumRegs = MaxNumVGPRsForWwmAllocation;
+ NumRegs =
+ std::min(static_cast<unsigned>(MFI->getSGPRSpillVGPRs().size()), NumRegs);
+
+ auto [MaxNumVGPRs, MaxNumAGPRs] = TRI->getMaxNumVectorRegs(MF);
+ // Try to use the highest available registers for now. Later after
+ // vgpr-regalloc, they can be shifted to the lowest range.
+ unsigned I = 0;
+ for (unsigned Reg = AMDGPU::VGPR0 + MaxNumVGPRs - 1;
+ (I < NumRegs) && (Reg >= AMDGPU::VGPR0); --Reg) {
+ if (!ReservedRegs.test(Reg) &&
+ !MRI.isPhysRegUsed(Reg, /*SkipRegMaskTest=*/true)) {
+ TRI->markSuperRegs(RegMask, Reg);
+ ++I;
}
}
+
+ if (I != NumRegs) {
+ // Reserve an arbitrary register and report the error.
+ TRI->markSuperRegs(RegMask, AMDGPU::VGPR0);
+ MF.getFunction().getContext().emitError(
+ "can't find enough VGPRs for wwm-regalloc");
+ }
}
bool SILowerSGPRSpillsLegacy::runOnMachineFunction(MachineFunction &MF) {
@@ -318,7 +370,9 @@ bool SILowerSGPRSpillsLegacy::runOnMachineFunction(MachineFunction &MF) {
LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
SlotIndexes *Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr;
- return SILowerSGPRSpills(LIS, Indexes).run(MF);
+ MachineDominatorTree *MDT =
+ &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
+ return SILowerSGPRSpills(LIS, Indexes, MDT).run(MF);
}
bool SILowerSGPRSpills::run(MachineFunction &MF) {
@@ -361,6 +415,9 @@ bool SILowerSGPRSpills::run(MachineFunction &MF) {
// To track the spill frame indices handled in this pass.
BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
+ // To track the IMPLICIT_DEF insertion point for the lane vgprs.
+ DenseMap<Register, MachineBasicBlock::iterator> LaneVGPRDomInstr;
+
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
if (!TII->isSGPRSpill(MI))
@@ -390,6 +447,7 @@ bool SILowerSGPRSpills::run(MachineFunction &MF) {
"failed to spill SGPR to physical VGPR lane when allocated");
}
} else {
+ MachineInstrSpan MIS(&MI, &MBB);
if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
MI, FI, nullptr, Indexes, LIS);
@@ -397,21 +455,47 @@ bool SILowerSGPRSpills::run(MachineFunction &MF) {
llvm_unreachable(
"failed to spill SGPR to virtual VGPR lane when allocated");
SpillFIs.set(FI);
+ updateLaneVGPRDomInstr(FI, &MBB, MIS.begin(), LaneVGPRDomInstr);
SpilledToVirtVGPRLanes = true;
}
}
}
}
- if (SpilledToVirtVGPRLanes) {
- extendWWMVirtRegLiveness(MF, LIS);
+ for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) {
+ auto InsertPt = LaneVGPRDomInstr[Reg];
+ // Insert the IMPLICIT_DEF at the identified points.
+ MachineBasicBlock &Block = *InsertPt->getParent();
+ DebugLoc DL = Block.findDebugLoc(InsertPt);
+ auto MIB =
+ BuildMI(Block, *InsertPt, DL, TII->get(AMDGPU::IMPLICIT_DEF), Reg);
+
+ // Add WWM flag to the virtual register.
+ FuncInfo->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
+
+ // Set SGPR_SPILL asm printer flag
+ MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL);
if (LIS) {
- // Compute the LiveInterval for the newly created virtual registers.
- for (auto Reg : FuncInfo->getSGPRSpillVGPRs())
- LIS->createAndComputeVirtRegInterval(Reg);
+ LIS->InsertMachineInstrInMaps(*MIB);
+ LIS->createAndComputeVirtRegInterval(Reg);
}
}
+ // Determine the registers for WWM allocation and also compute the register
+ // mask for non-wwm VGPR allocation.
+ if (FuncInfo->getSGPRSpillVGPRs().size()) {
+ BitVector WwmRegMask(TRI->getNumRegs());
+
+ determineRegsForWWMAllocation(MF, WwmRegMask);
+
+ BitVector NonWwmRegMask(WwmRegMask);
+ NonWwmRegMask.flip().clearBitsNotInMask(TRI->getAllVGPRRegMask());
+
+ // The complement set will be the registers for non-wwm (per-thread) vgpr
+ // allocation.
+ FuncInfo->updateNonWWMRegMask(NonWwmRegMask);
+ }
+
for (MachineBasicBlock &MBB : MF) {
// FIXME: The dead frame indices are replaced with a null register from
// the debug value instructions. We should instead, update it with the
@@ -468,6 +552,7 @@ SILowerSGPRSpillsPass::run(MachineFunction &MF,
MFPropsModifier _(*this, MF);
auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF);
auto *Indexes = MFAM.getCachedResult<SlotIndexesAnalysis>(MF);
- SILowerSGPRSpills(LIS, Indexes).run(MF);
+ MachineDominatorTree *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
+ SILowerSGPRSpills(LIS, Indexes, MDT).run(MF);
return PreservedAnalyses::all();
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index f59d29bd81403a..8be9a082a7fd08 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -325,11 +325,13 @@ bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
return false;
}
-void SIMachineFunctionInfo::shiftSpillPhysVGPRsToLowestRange(
- MachineFunction &MF) {
+void SIMachineFunctionInfo::shiftWwmVGPRsToLowestRange(
+ MachineFunction &MF, SmallVectorImpl<Register> &WWMVGPRs,
+ BitVector &SavedVGPRs) {
const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
- for (Register &Reg : SpillPhysVGPRs) {
+ for (unsigned I = 0, E = WWMVGPRs.size(); I < E; ++I) {
+ Register Reg = WWMVGPRs[I];
Register NewReg =
TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
if (!NewReg || NewReg >= Reg)
@@ -338,10 +340,22 @@ void SIMachineFunctionInfo::shiftSpillPhysVGPRsToLowestRange(
MRI.replaceRegWith(Reg, NewReg);
// Update various tables with the new VGPR.
+ WWMVGPRs[I] = NewReg;
WWMReservedRegs.remove(Reg);
WWMReservedRegs.insert(NewReg);
- WWMSpills.insert(std::make_pair(NewReg, WWMSpills[Reg]));
- WWMSpills.erase(Reg);
+ MRI.reserveReg(NewReg, TRI);
+
+ // Replace the register in SpillPhysVGPRs. This is needed to look for free
+ // lanes while spilling special SGPRs like FP, BP, etc. during PEI.
+ auto RegItr = std::find(SpillPhysVGPRs.begin(), SpillPhysVGPRs.end(), Reg);
+ if (RegItr != SpillPhysVGPRs.end()) {
+ unsigned Idx = std::distance(SpillPhysVGPRs.begin(), RegItr);
+ SpillPhysVGPRs[Idx] = NewReg;
+ }
+
+ // The generic `determineCalleeSaves` might have set the old register if it
+ // is in the CSR range.
+ SavedVGPRs.reset(Reg);
for (MachineBasicBlock &MBB : MF) {
MBB.removeLiveIn(Reg);
@@ -386,7 +400,9 @@ bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
return false;
}
- allocateWWMSpill(MF, LaneVGPR);
+ if (IsPrologEpilog)
+ allocateWWMSpill(MF, LaneVGPR);
+
reserveWWMRegister(LaneVGPR);
for (MachineBasicBlock &MBB : MF) {
MBB.addLiveIn(LaneVGPR);
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index aff0b34947d688..669f98dd865d61 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -524,6 +524,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
// the VGPR and its stack slot index.
WWMSpillsMap WWMSpills;
+ // Before allocation, the VGPR registers are partitioned into two distinct
+ // sets, the first one for WWM-values and the second set for non-WWM values.
+ // The latter set should be reserved during WWM-regalloc.
+ BitVector NonWWMRegMask;
+
using ReservedRegSet = SmallSetVector<Register, 8>;
// To track the VGPRs reserved for WWM instructions. They get stack slots
// later during PrologEpilogInserter and get added into the superset WWMSpills
@@ -590,6 +595,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(Reg); }
+ void updateNonWWMRegMask(BitVector &RegMask) { NonWWMRegMask = RegMask; }
+ BitVector getNonWWMRegMask() const { return NonWWMRegMask; }
+ void clearNonWWMRegAllocMask() { NonWWMRegMask.clear(); }
+
SIModeRegisterDefaults getMode() const { return Mode; }
ArrayRef<SIRegisterInfo::SpilledReg>
@@ -729,9 +738,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
I->second.IsDead = true;
}
- // To bring the Physical VGPRs in the highest range allocated for CSR SGPR
- // spilling into the lowest available range.
- void shiftSpillPhysVGPRsToLowestRange(MachineFunction &MF);
+ // To bring the allocated WWM registers in \p WWMVGPRs to the lowest available
+ // range.
+ void shiftWwmVGPRsToLowestRange(MachineFunction &MF,
+ SmallVectorImpl<Register> &WWMVGPRs,
+ BitVector &SavedVGPRs);
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI,
bool SpillToPhysVGPRLane = false,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 2d1cd1bda3afe1..d7421a1ceff0f4 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -561,6 +561,37 @@ MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg(
return getAlignedHighSGPRForRC(MF, /*Align=*/4, &AMDGPU::SGPR_128RegClass);
}
+std::pair<unsigned, unsigned>
+SIRegisterInfo::getMaxNumVectorRegs(const MachineFunction &MF) const {
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
+ unsigned MaxNumAGPRs = MaxNumVGPRs;
+ unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
+
+ // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
+ // a wave may have up to 512 total vector registers combining together both
+ // VGPRs and AGPRs. Hence, in an entry function without calls and without
+ // AGPRs used within it, it is possible to use the whole vector register
+ // budget for VGPRs.
+ //
+ // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
+ // register file accordingly.
+ if (ST.hasGFX90AInsts()) {
+ if (MFI->usesAGPRs(MF)) {
+ MaxNumVGPRs /= 2;
+ MaxNumAGPRs = MaxNumVGPRs;
+ } else {
+ if (MaxNumVGPRs > TotalNumVGPRs) {
+ MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
+ MaxNumVGPRs = TotalNumVGPRs;
+ } else
+ MaxNumAGPRs = 0;
+ }
+ }
+
+ return std::pair(MaxNumVGPRs, MaxNumAGPRs);
+}
+
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(AMDGPU::MODE);
@@ -668,30 +699,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Reserve VGPRs/AGPRs.
//
- unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
- unsigned MaxNumAGPRs = MaxNumVGPRs;
- unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
-
- // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
- // a wave may have up to 512 total vector registers combining together both
- // VGPRs and AGPRs. Hence, in an entry function without calls and without
- // AGPRs used within it, it is possible to use the whole vector register
- // budget for VGPRs.
- //
- // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
- // register file accordingly.
- if (ST.hasGFX90AInsts()) {
- if (MFI->usesAGPRs(MF)) {
- MaxNumVGPRs /= 2;
- MaxNumAGPRs = MaxNumVGPRs;
- } else {
- if (MaxNumVGPRs > TotalNumVGPRs) {
- MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
- MaxNumVGPRs = TotalNumVGPRs;
- } else
- MaxNumAGPRs = 0;
- }
- }
+ auto [MaxNumVGPRs, MaxNumAGPRs] = getMaxNumVectorRegs(MF);
for (const TargetRegisterClass *RC : regclasses()) {
if (RC->isBaseClass() && isVGPRClass(RC)) {
@@ -724,6 +732,18 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy());
}
+ // During wwm-regalloc, reserve the registers for perlane VGPR allocation. The
+ // MFI->getNonWWMRegMask() field will have a valid bitmask only during
+ // wwm-regalloc and it would be empty otherwise.
+ BitVector NonWWMRegMask = MFI->getNonWWMRegMask();
+ if (!NonWWMRegMask.empty()) {
+ for (unsigned RegI = AMDGPU::VGPR0, RegE = AMDGPU::VGPR0 + MaxNumVGPRs;
+ RegI < RegE; ++RegI) {
+ if (NonWWMRegMask.test(RegI))
+ reserveRegisterTuples(Reserved, RegI);
+ }
+ }
+
for (Register Reg : MFI->getWWMReservedRegs())
reserveRegisterTuples(Reserved, Reg);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 88d5686720985e..409e5418abc8ec 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -82,6 +82,11 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
/// spilling is needed.
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
+ /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
+ /// of waves per execution unit required for the function \p MF.
+ std::pair<unsigned, unsigned>
+ getMaxNumVectorRegs(const MachineFunction &MF) const;
+
BitVector getReservedRegs(const MachineFunction &MF) const override;
bool isAsmClobberable(const MachineFunction &MF,
MCRegister PhysReg) const override;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
index de973481f82308..e9e7360733581a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
@@ -12,97 +12,90 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) {
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_xor_saveexec_b32 s4, -1
-; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b32 exec_lo, s4
-; CHECK-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
-; CHECK-NEXT: v_mov_b32_e32 v8, v0
-; CHECK-NEXT: s_or_saveexec_b32 s21, -1
-; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; CHECK-NEXT: s_mov_b32 exec_lo, s21
-; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
-; CHECK-NEXT: v_mov_b32_e32 v15, v1
-; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
-; CHECK-NEXT: v_mov_b32_e32 v14, v2
-; CHECK-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
-; CHECK-NEXT: v_mov_b32_e32 v13, v3
-; CHECK-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
-; CHECK-NEXT: v_mov_b32_e32 v12, v4
-; CHECK-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
-; CHECK-NEXT: v_mov_b32_e32 v11, v5
-; CHECK-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; CHECK-NEXT: v_mov_b32_e32 v10, v6
-; CHECK-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
-; CHECK-NEXT: v_mov_b32_e32 v9, v7
-; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
-; CHECK-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 killed $exec
-; CHECK-NEXT: v_mov_b32_e32 v2, v15
-; CHECK-NEXT: v_mov_b32_e32 v3, v14
-; CHECK-NEXT: v_mov_b32_e32 v4, v13
-; CHECK-NEXT: v_mov_b32_e32 v5, v12
-; CHECK-NEXT: v_mov_b32_e32 v6, v11
-; CHECK-NEXT: v_mov_b32_e32 v7, v10
-; CHECK-NEXT: v_mov_b32_e32 v8, v9
-; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
-; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
-; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; CHECK-NEXT: v_mov_b32_e32 v14, v1
+; CHECK-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; CHECK-NEXT: v_mov_b32_e32 v13, v2
+; CHECK-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; CHECK-NEXT: v_mov_b32_e32 v12, v3
+; CHECK-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; CHECK-NEXT: v_mov_b32_e32 v11, v4
+; CHECK-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; CHECK-NEXT: v_mov_b32_e32 v10, v5
+; CHECK-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; CHECK-NEXT: v_mov_b32_e32 v9, v6
+; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; CHECK-NEXT: v_mov_b32_e32 v8, v7
+; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
+; CHECK-NEXT: v_mov_b32_e32 v1, v14
+; CHECK-NEXT: v_mov_b32_e32 v2, v13
+; CHECK-NEXT: v_mov_b32_e32 v3, v12
+; CHECK-NEXT: v_mov_b32_e32 v4, v11
+; CHECK-NEXT: v_mov_b32_e32 v5, v10
+; CHECK-NEXT: v_mov_b32_e32 v6, v9
+; CHECK-NEXT: v_mov_b32_e32 v7, v8
+; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b32 s8, 0
; CHECK-NEXT: s_mov_b32 s4, s8
; CHECK-NEXT: s_mov_b32 s5, s8
; CHECK-NEXT: s_mov_b32 s6, s8
; CHECK-NEXT: s_mov_b32 s7, s8
-; CHECK-NEXT: v_writelane_b32 v0, s4, 0
-; CHECK-NEXT: v_writelane_b32 v0, s5, 1
-; CHECK-NEXT: v_writelane_b32 v0, s6, 2
-; CHECK-NEXT: v_writelane_b32 v0, s7, 3
+; CHECK-NEXT: ; implicit-def: $vgpr16 : SGPR spill to VGPR lane
+; CHECK-NEXT: v_writelane_b32 v16, s4, 0
+; CHECK-NEXT: v_writelane_b32 v16, s5, 1
+; CHECK-NEXT: v_writelane_b32 v16, s6, 2
+; CHECK-NEXT: v_writelane_b32 v16, s7, 3
; CHECK-NEXT: s_mov_b32 s6, 0
; CHECK-NEXT: s_mov_b32 s4, s6
; CHECK-NEXT: s_mov_b32 s5, s6
-; CHECK-NEXT: v_mov_b32_e32 v1, s4
-; CHECK-NEXT: v_mov_b32_e32 v2, s5
-; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; CHECK-NEXT: v_mov_b32_e32 v0, s4
+; CHECK-NEXT: v_mov_b32_e32 v1, s5
+; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b32 s4, exec_lo
-; CHECK-NEXT: v_writelane_b32 v0, s4, 4
+; CHECK-NEXT: v_writelane_b32 v16, s4, 4
; CHECK-NEXT: s_or_saveexec_b32 s21, -1
-; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b32 exec_lo, s21
; CHECK-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
; CHECK-NEXT: s_or_saveexec_b32 s21, -1
-; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b32 exec_lo, s21
-; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_readfirstlane_b32 s12, v8
-; CHECK-NEXT: v_readfirstlane_b32 s10, v7
-; CHECK-NEXT: v_readfirstlane_b32 s9, v6
-; CHECK-NEXT: v_readfirstlane_b32 s8, v5
-; CHECK-NEXT: v_readfirstlane_b32 s7, v4
-; CHECK-NEXT: v_readfirstlane_b32 s6, v3
-; CHECK-NEXT: v_readfirstlane_b32 s5, v2
-; CHECK-NEXT: v_readfirstlane_b32 s4, v1
+; CHECK-NEXT: s_waitcnt vmcnt(1)
+; CHECK-NEXT: v_readfirstlane_b32 s12, v7
+; CHECK-NEXT: v_readfirstlane_b32 s10, v6
+; CHECK-NEXT: v_readfirstlane_b32 s9, v5
+; CHECK-NEXT: v_readfirstlane_b32 s8, v4
+; CHECK-NEXT: v_readfirstlane_b32 s7, v3
+; CHECK-NEXT: v_readfirstlane_b32 s6, v2
+; CHECK-NEXT: v_readfirstlane_b32 s5, v1
+; CHECK-NEXT: v_readfirstlane_b32 s4, v0
; CHECK-NEXT: ; kill: def $sgpr12 killed $sgpr12 def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
; CHECK-NEXT: s_mov_b32 s13, s10
; CHECK-NEXT: s_mov_b32 s14, s9
@@ -111,59 +104,59 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) {
; CHECK-NEXT: s_mov_b32 s17, s6
; CHECK-NEXT: s_mov_b32 s18, s5
; CHECK-NEXT: s_mov_b32 s19, s4
-; CHECK-NEXT: v_writelane_b32 v0, s12, 5
-; CHECK-NEXT: v_writelane_b32 v0, s13, 6
-; CHECK-NEXT: v_writelane_b32 v0, s14, 7
-; CHECK-NEXT: v_writelane_b32 v0, s15, 8
-; CHECK-NEXT: v_writelane_b32 v0, s16, 9
-; CHECK-NEXT: v_writelane_b32 v0, s17, 10
-; CHECK-NEXT: v_writelane_b32 v0, s18, 11
-; CHECK-NEXT: v_writelane_b32 v0, s19, 12
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: v_writelane_b32 v16, s12, 5
+; CHECK-NEXT: v_writelane_b32 v16, s13, 6
+; CHECK-NEXT: v_writelane_b32 v16, s14, 7
+; CHECK-NEXT: v_writelane_b32 v16, s15, 8
+; CHECK-NEXT: v_writelane_b32 v16, s16, 9
+; CHECK-NEXT: v_writelane_b32 v16, s17, 10
+; CHECK-NEXT: v_writelane_b32 v16, s18, 11
+; CHECK-NEXT: v_writelane_b32 v16, s19, 12
+; CHECK-NEXT: v_mov_b32_e32 v6, v8
; CHECK-NEXT: v_mov_b32_e32 v7, v9
-; CHECK-NEXT: v_mov_b32_e32 v8, v10
+; CHECK-NEXT: v_mov_b32_e32 v4, v10
; CHECK-NEXT: v_mov_b32_e32 v5, v11
-; CHECK-NEXT: v_mov_b32_e32 v6, v12
+; CHECK-NEXT: v_mov_b32_e32 v2, v12
; CHECK-NEXT: v_mov_b32_e32 v3, v13
-; CHECK-NEXT: v_mov_b32_e32 v4, v14
+; CHECK-NEXT: v_mov_b32_e32 v0, v14
; CHECK-NEXT: v_mov_b32_e32 v1, v15
-; CHECK-NEXT: v_mov_b32_e32 v2, v16
; CHECK-NEXT: s_mov_b64 s[4:5], s[12:13]
; CHECK-NEXT: s_mov_b64 s[10:11], s[14:15]
; CHECK-NEXT: s_mov_b64 s[8:9], s[16:17]
; CHECK-NEXT: s_mov_b64 s[6:7], s[18:19]
-; CHECK-NEXT: v_cmp_eq_u64_e64 s4, s[4:5], v[7:8]
-; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[10:11], v[5:6]
+; CHECK-NEXT: v_cmp_eq_u64_e64 s4, s[4:5], v[6:7]
+; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[10:11], v[4:5]
; CHECK-NEXT: s_and_b32 s4, s4, s5
-; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[8:9], v[3:4]
+; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[8:9], v[2:3]
; CHECK-NEXT: s_and_b32 s4, s4, s5
-; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[6:7], v[1:2]
+; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[6:7], v[0:1]
; CHECK-NEXT: s_and_b32 s4, s4, s5
; CHECK-NEXT: s_and_saveexec_b32 s4, s4
-; CHECK-NEXT: v_writelane_b32 v0, s4, 13
+; CHECK-NEXT: v_writelane_b32 v16, s4, 13
; CHECK-NEXT: s_or_saveexec_b32 s21, -1
-; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b32 exec_lo, s21
; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: s_or_saveexec_b32 s21, -1
-; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload
-; CHECK-NEXT: s_mov_b32 exec_lo, s21
-; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_readlane_b32 s4, v2, 13
-; CHECK-NEXT: v_readlane_b32 s8, v2, 5
-; CHECK-NEXT: v_readlane_b32 s9, v2, 6
-; CHECK-NEXT: v_readlane_b32 s10, v2, 7
-; CHECK-NEXT: v_readlane_b32 s11, v2, 8
-; CHECK-NEXT: v_readlane_b32 s12, v2, 9
-; CHECK-NEXT: v_readlane_b32 s13, v2, 10
-; CHECK-NEXT: v_readlane_b32 s14, v2, 11
-; CHECK-NEXT: v_readlane_b32 s15, v2, 12
-; CHECK-NEXT: v_readlane_b32 s16, v2, 0
-; CHECK-NEXT: v_readlane_b32 s17, v2, 1
-; CHECK-NEXT: v_readlane_b32 s18, v2, 2
-; CHECK-NEXT: v_readlane_b32 s19, v2, 3
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; CHECK-NEXT: s_or_saveexec_b32 s21, -1
+; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
+; CHECK-NEXT: s_mov_b32 exec_lo, s21
; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: v_readlane_b32 s4, v16, 13
+; CHECK-NEXT: v_readlane_b32 s8, v16, 5
+; CHECK-NEXT: v_readlane_b32 s9, v16, 6
+; CHECK-NEXT: v_readlane_b32 s10, v16, 7
+; CHECK-NEXT: v_readlane_b32 s11, v16, 8
+; CHECK-NEXT: v_readlane_b32 s12, v16, 9
+; CHECK-NEXT: v_readlane_b32 s13, v16, 10
+; CHECK-NEXT: v_readlane_b32 s14, v16, 11
+; CHECK-NEXT: v_readlane_b32 s15, v16, 12
+; CHECK-NEXT: v_readlane_b32 s16, v16, 0
+; CHECK-NEXT: v_readlane_b32 s17, v16, 1
+; CHECK-NEXT: v_readlane_b32 s18, v16, 2
+; CHECK-NEXT: v_readlane_b32 s19, v16, 3
; CHECK-NEXT: image_sample v0, v[0:1], s[8:15], s[16:19] dmask:0x1 dim:SQ_RSRC_IMG_2D
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
@@ -171,24 +164,19 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) {
; CHECK-NEXT: s_cbranch_execnz .LBB0_1
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: s_or_saveexec_b32 s21, -1
-; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b32 exec_lo, s21
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_readlane_b32 s4, v0, 4
+; CHECK-NEXT: v_readlane_b32 s4, v16, 4
; CHECK-NEXT: s_mov_b32 exec_lo, s4
; CHECK-NEXT: ; %bb.4:
-; CHECK-NEXT: s_or_saveexec_b32 s21, -1
-; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; CHECK-NEXT: s_mov_b32 exec_lo, s21
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
; CHECK-NEXT: ; implicit-def: $sgpr4
; CHECK-NEXT: v_mov_b32_e32 v1, s4
; CHECK-NEXT: v_mov_b32_e32 v2, s4
; CHECK-NEXT: v_mov_b32_e32 v3, s4
-; CHECK-NEXT: ; kill: killed $vgpr4
; CHECK-NEXT: s_xor_saveexec_b32 s4, -1
-; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b32 exec_lo, s4
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
index 9794130d2b0007..c91b686697b9df 100644
--- a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
@@ -20,7 +20,7 @@ body: |
; GFX908-LABEL: name: agpr32_restore_clobber_scc
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX908-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -36,7 +36,7 @@ body: |
; GFX908-NEXT: S_NOP 0
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
- ; GFX908-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: S_ENDPGM 0, amdgpu_allvgprs
;
@@ -514,7 +514,7 @@ body: |
; GFX908-FLATSCR-LABEL: name: agpr32_restore_clobber_scc
; GFX908-FLATSCR: bb.0:
; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX908-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX908-FLATSCR-NEXT: {{ $}}
; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -531,7 +531,7 @@ body: |
; GFX908-FLATSCR-NEXT: S_NOP 0
; GFX908-FLATSCR-NEXT: {{ $}}
; GFX908-FLATSCR-NEXT: bb.2:
- ; GFX908-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX908-FLATSCR-NEXT: {{ $}}
; GFX908-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs
;
@@ -1038,7 +1038,7 @@ body: |
; GFX908-LABEL: name: agpr64_restore_clobber_scc
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX908-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -1056,7 +1056,7 @@ body: |
; GFX908-NEXT: S_NOP 0
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
- ; GFX908-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: S_ENDPGM 0, amdgpu_allvgprs
;
@@ -1535,7 +1535,7 @@ body: |
; GFX908-FLATSCR-LABEL: name: agpr64_restore_clobber_scc
; GFX908-FLATSCR: bb.0:
; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX908-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX908-FLATSCR-NEXT: {{ $}}
; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -1554,7 +1554,7 @@ body: |
; GFX908-FLATSCR-NEXT: S_NOP 0
; GFX908-FLATSCR-NEXT: {{ $}}
; GFX908-FLATSCR-NEXT: bb.2:
- ; GFX908-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX908-FLATSCR-NEXT: {{ $}}
; GFX908-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs
;
@@ -2061,7 +2061,7 @@ body: |
; GFX908-LABEL: name: agpr96_restore_clobber_scc
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX908-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -2081,7 +2081,7 @@ body: |
; GFX908-NEXT: S_NOP 0
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
- ; GFX908-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: S_ENDPGM 0, amdgpu_allvgprs
;
@@ -2561,7 +2561,7 @@ body: |
; GFX908-FLATSCR-LABEL: name: agpr96_restore_clobber_scc
; GFX908-FLATSCR: bb.0:
; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX908-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX908-FLATSCR-NEXT: {{ $}}
; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -2582,7 +2582,7 @@ body: |
; GFX908-FLATSCR-NEXT: S_NOP 0
; GFX908-FLATSCR-NEXT: {{ $}}
; GFX908-FLATSCR-NEXT: bb.2:
- ; GFX908-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX908-FLATSCR-NEXT: {{ $}}
; GFX908-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs
;
@@ -3089,7 +3089,7 @@ body: |
; GFX908-LABEL: name: agpr32_save_clobber_scc
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX908-NEXT: liveins: $agpr0, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -3105,7 +3105,7 @@ body: |
; GFX908-NEXT: S_NOP 0
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
- ; GFX908-NEXT: liveins: $agpr0, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: S_ENDPGM 0, amdgpu_allvgprs
;
@@ -3583,7 +3583,7 @@ body: |
; GFX908-FLATSCR-LABEL: name: agpr32_save_clobber_scc
; GFX908-FLATSCR: bb.0:
; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX908-FLATSCR-NEXT: liveins: $agpr0, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0
; GFX908-FLATSCR-NEXT: {{ $}}
; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -3600,7 +3600,7 @@ body: |
; GFX908-FLATSCR-NEXT: S_NOP 0
; GFX908-FLATSCR-NEXT: {{ $}}
; GFX908-FLATSCR-NEXT: bb.2:
- ; GFX908-FLATSCR-NEXT: liveins: $agpr0, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0
; GFX908-FLATSCR-NEXT: {{ $}}
; GFX908-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs
;
@@ -4106,7 +4106,7 @@ body: |
; GFX908-LABEL: name: agpr64_save_clobber_scc
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX908-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -4124,7 +4124,7 @@ body: |
; GFX908-NEXT: S_NOP 0
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
- ; GFX908-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: S_ENDPGM 0, amdgpu_allvgprs
;
@@ -4603,7 +4603,7 @@ body: |
; GFX908-FLATSCR-LABEL: name: agpr64_save_clobber_scc
; GFX908-FLATSCR: bb.0:
; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX908-FLATSCR-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
; GFX908-FLATSCR-NEXT: {{ $}}
; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -4622,7 +4622,7 @@ body: |
; GFX908-FLATSCR-NEXT: S_NOP 0
; GFX908-FLATSCR-NEXT: {{ $}}
; GFX908-FLATSCR-NEXT: bb.2:
- ; GFX908-FLATSCR-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
; GFX908-FLATSCR-NEXT: {{ $}}
; GFX908-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs
;
@@ -5127,7 +5127,7 @@ body: |
; GFX908-LABEL: name: agpr96_save_clobber_scc
; GFX908: bb.0:
; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX908-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -5147,7 +5147,7 @@ body: |
; GFX908-NEXT: S_NOP 0
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: bb.2:
- ; GFX908-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
; GFX908-NEXT: {{ $}}
; GFX908-NEXT: S_ENDPGM 0, amdgpu_allvgprs
;
@@ -5627,7 +5627,7 @@ body: |
; GFX908-FLATSCR-LABEL: name: agpr96_save_clobber_scc
; GFX908-FLATSCR: bb.0:
; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX908-FLATSCR-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
; GFX908-FLATSCR-NEXT: {{ $}}
; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -5648,7 +5648,7 @@ body: |
; GFX908-FLATSCR-NEXT: S_NOP 0
; GFX908-FLATSCR-NEXT: {{ $}}
; GFX908-FLATSCR-NEXT: bb.2:
- ; GFX908-FLATSCR-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
; GFX908-FLATSCR-NEXT: {{ $}}
; GFX908-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs
;
diff --git a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir
index 80923dfc6f522e..3c3c9839755a25 100644
--- a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir
+++ b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,GFX908 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -start-before=greedy,0 -stop-after=virtregrewriter,2 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,GFX908 %s
---
# GCN-LABEL: name: alloc_vgpr_64
diff --git a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir
index 0f0cd0e8171d10..c42b570b40812c 100644
--- a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir
+++ b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,GFX90A %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -start-before=greedy,0 -stop-after=virtregrewriter,2 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,GFX90A %s
# Using the unaligned vector tuples are OK as long as they aren't used
# in a real instruction.
diff --git a/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll b/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll
index 3ed2cb856eaea8..2b98f61748066f 100644
--- a/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -O0 -verify-machineinstrs --stop-after=regallocfast,1 -o - %s | FileCheck -check-prefix=REGALLOC %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -O0 -verify-machineinstrs --stop-after=regallocfast,2 -o - %s | FileCheck -check-prefix=REGALLOC %s
; Test to check if the bb prolog spills are inserted correctly during regalloc.
define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
@@ -8,22 +8,20 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
; REGALLOC-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
; REGALLOC-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
; REGALLOC-NEXT: {{ $}}
- ; REGALLOC-NEXT: renamable $vgpr3 = IMPLICIT_DEF
; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
- ; REGALLOC-NEXT: renamable $vgpr1 = COPY $vgpr0
- ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
; REGALLOC-NEXT: renamable $sgpr4 = S_MOV_B32 49
- ; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = V_CMP_GT_I32_e64 killed $vgpr1, killed $sgpr4, implicit $exec
+ ; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = V_CMP_GT_I32_e64 killed $vgpr0, killed $sgpr4, implicit $exec
; REGALLOC-NEXT: renamable $sgpr6 = IMPLICIT_DEF
- ; REGALLOC-NEXT: renamable $vgpr1 = COPY killed renamable $sgpr6
- ; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+ ; REGALLOC-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr6
+ ; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
; REGALLOC-NEXT: renamable $sgpr6_sgpr7 = COPY $exec, implicit-def $exec
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 renamable $sgpr6_sgpr7, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; REGALLOC-NEXT: renamable $sgpr6_sgpr7 = S_XOR_B64 renamable $sgpr4_sgpr5, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
- ; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr6, 0, $vgpr0, implicit-def $sgpr6_sgpr7, implicit $sgpr6_sgpr7
- ; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr7, 1, $vgpr0, implicit killed $sgpr6_sgpr7
- ; REGALLOC-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr0, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; REGALLOC-NEXT: $vgpr63 = IMPLICIT_DEF
+ ; REGALLOC-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr6, 0, $vgpr63, implicit-def $sgpr6_sgpr7, implicit $sgpr6_sgpr7
+ ; REGALLOC-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr7, 1, $vgpr63, implicit killed $sgpr6_sgpr7
+ ; REGALLOC-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr63, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; REGALLOC-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr4_sgpr5
; REGALLOC-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
; REGALLOC-NEXT: S_BRANCH %bb.3
@@ -31,16 +29,16 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
; REGALLOC-NEXT: bb.1.Flow:
; REGALLOC-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; REGALLOC-NEXT: {{ $}}
- ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
- ; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr4_sgpr5
- ; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
+ ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+ ; REGALLOC-NEXT: $vgpr63 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 0, implicit-def $sgpr4_sgpr5
+ ; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 1
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; REGALLOC-NEXT: $vgpr1 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
- ; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+ ; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
- ; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr4, 2, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
- ; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr5, 3, $vgpr0, implicit $sgpr4_sgpr5
- ; REGALLOC-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr0, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; REGALLOC-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr4, 2, $vgpr63, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
+ ; REGALLOC-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr5, 3, $vgpr63, implicit $sgpr4_sgpr5
+ ; REGALLOC-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr63, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; REGALLOC-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; REGALLOC-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec
; REGALLOC-NEXT: S_BRANCH %bb.2
@@ -64,13 +62,12 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
; REGALLOC-NEXT: S_BRANCH %bb.1
; REGALLOC-NEXT: {{ $}}
; REGALLOC-NEXT: bb.4.bb.3:
- ; REGALLOC-NEXT: $vgpr1 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
- ; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 2, implicit-def $sgpr4_sgpr5
- ; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 3
- ; REGALLOC-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+ ; REGALLOC-NEXT: $vgpr63 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 2, implicit-def $sgpr4_sgpr5
+ ; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 3
+ ; REGALLOC-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; REGALLOC-NEXT: renamable $vgpr0 = V_LSHL_ADD_U32_e64 killed $vgpr0, 2, $vgpr0, implicit $exec
- ; REGALLOC-NEXT: KILL killed renamable $vgpr1
; REGALLOC-NEXT: SI_RETURN implicit killed $vgpr0
bb.0:
%cmp = icmp slt i32 %arg0, 50
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
index adfc177c8bf749..0047b6b0ee9348 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
@@ -971,12 +971,12 @@ define void @spill_func(ptr addrspace(1) %arg) #0 {
; CHECK-NEXT: v_writelane_b32 v1, s98, 3
; CHECK-NEXT: v_writelane_b32 v0, s92, 61
; CHECK-NEXT: v_writelane_b32 v1, s99, 4
+; CHECK-NEXT: s_mov_b32 s49, s12
; CHECK-NEXT: v_writelane_b32 v0, s93, 62
; CHECK-NEXT: v_writelane_b32 v1, s100, 5
-; CHECK-NEXT: s_mov_b32 s49, s12
+; CHECK-NEXT: s_cmp_eq_u32 s49, 0
; CHECK-NEXT: v_writelane_b32 v0, s94, 63
; CHECK-NEXT: v_writelane_b32 v1, s101, 6
-; CHECK-NEXT: s_cmp_eq_u32 s49, 0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: s_mov_b32 s0, 0
; CHECK-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
index b46cdb8ab3ba0a..3e25904aa044dd 100644
--- a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
@@ -36,66 +36,56 @@ define amdgpu_kernel void @test_loop(ptr addrspace(3) %ptr, i32 %n) nounwind {
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
; GCN_DBG-NEXT: s_add_u32 s12, s12, s9
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
-; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_load_dword s0, s[2:3], 0x9
+; GCN_DBG-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
+; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 0
; GCN_DBG-NEXT: s_load_dword s1, s[2:3], 0xa
; GCN_DBG-NEXT: s_mov_b32 s0, 0
; GCN_DBG-NEXT: s_mov_b32 s2, -1
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
; GCN_DBG-NEXT: s_cmp_lg_u32 s1, s2
-; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1
+; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1
; GCN_DBG-NEXT: s_mov_b64 s[4:5], exec
; GCN_DBG-NEXT: s_mov_b64 exec, -1
-; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5]
; GCN_DBG-NEXT: s_cbranch_scc1 .LBB0_2
; GCN_DBG-NEXT: ; %bb.1: ; %for.exit
-; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT: s_waitcnt expcnt(0)
-; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5]
-; GCN_DBG-NEXT: ; kill: killed $vgpr0
; GCN_DBG-NEXT: s_endpgm
; GCN_DBG-NEXT: .LBB0_2: ; %for.body
; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN_DBG-NEXT: s_waitcnt expcnt(0)
-; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN_DBG-NEXT: buffer_load_dword v2, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5]
; GCN_DBG-NEXT: s_waitcnt vmcnt(0)
-; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 1
-; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 0
+; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 1
+; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 0
; GCN_DBG-NEXT: s_mov_b32 s1, 2
; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1
; GCN_DBG-NEXT: s_add_i32 s1, s1, s2
; GCN_DBG-NEXT: s_mov_b32 s2, 0x80
; GCN_DBG-NEXT: s_add_i32 s1, s1, s2
; GCN_DBG-NEXT: s_mov_b32 m0, -1
-; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT: ds_read_b32 v1, v1
+; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT: ds_read_b32 v0, v0
; GCN_DBG-NEXT: s_mov_b32 s2, 1.0
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT: v_add_f32_e64 v2, v1, s2
+; GCN_DBG-NEXT: v_add_f32_e64 v1, v0, s2
; GCN_DBG-NEXT: s_mov_b32 m0, -1
-; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT: ds_write_b32 v1, v2
+; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT: ds_write_b32 v0, v1
; GCN_DBG-NEXT: s_mov_b32 s1, 1
; GCN_DBG-NEXT: s_add_i32 s0, s0, s1
; GCN_DBG-NEXT: s_mov_b64 s[2:3], -1
; GCN_DBG-NEXT: s_and_b64 vcc, exec, s[2:3]
-; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1
+; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1
; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5]
; GCN_DBG-NEXT: s_cbranch_vccnz .LBB0_2
; GCN_DBG-NEXT: ; %bb.3: ; %DummyReturnBlock
-; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT: s_waitcnt expcnt(0)
-; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5]
-; GCN_DBG-NEXT: ; kill: killed $vgpr0
; GCN_DBG-NEXT: s_endpgm
entry:
%cmp = icmp eq i32 %n, -1
@@ -144,53 +134,48 @@ define amdgpu_kernel void @loop_const_true(ptr addrspace(3) %ptr, i32 %n) nounwi
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
; GCN_DBG-NEXT: s_add_u32 s12, s12, s9
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
-; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_load_dword s0, s[2:3], 0x9
+; GCN_DBG-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
+; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 0
; GCN_DBG-NEXT: s_mov_b32 s0, 0
-; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1
+; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1
; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5]
; GCN_DBG-NEXT: s_branch .LBB1_2
; GCN_DBG-NEXT: .LBB1_1: ; %for.exit
-; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT: s_waitcnt expcnt(0)
-; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5]
-; GCN_DBG-NEXT: ; kill: killed $vgpr0
; GCN_DBG-NEXT: s_endpgm
; GCN_DBG-NEXT: .LBB1_2: ; %for.body
; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN_DBG-NEXT: s_waitcnt expcnt(0)
-; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN_DBG-NEXT: buffer_load_dword v2, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5]
; GCN_DBG-NEXT: s_waitcnt vmcnt(0)
-; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 1
-; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 0
+; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 1
+; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 0
; GCN_DBG-NEXT: s_mov_b32 s1, 2
; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1
; GCN_DBG-NEXT: s_add_i32 s1, s1, s2
; GCN_DBG-NEXT: s_mov_b32 s2, 0x80
; GCN_DBG-NEXT: s_add_i32 s1, s1, s2
; GCN_DBG-NEXT: s_mov_b32 m0, -1
-; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT: ds_read_b32 v1, v1
+; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT: ds_read_b32 v0, v0
; GCN_DBG-NEXT: s_mov_b32 s2, 1.0
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT: v_add_f32_e64 v2, v1, s2
+; GCN_DBG-NEXT: v_add_f32_e64 v1, v0, s2
; GCN_DBG-NEXT: s_mov_b32 m0, -1
-; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT: ds_write_b32 v1, v2
+; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT: ds_write_b32 v0, v1
; GCN_DBG-NEXT: s_mov_b32 s1, 1
; GCN_DBG-NEXT: s_add_i32 s0, s0, s1
; GCN_DBG-NEXT: s_mov_b64 s[2:3], 0
; GCN_DBG-NEXT: s_and_b64 vcc, exec, s[2:3]
-; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1
+; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1
; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5]
; GCN_DBG-NEXT: s_cbranch_vccnz .LBB1_1
; GCN_DBG-NEXT: s_branch .LBB1_2
@@ -232,53 +217,48 @@ define amdgpu_kernel void @loop_const_false(ptr addrspace(3) %ptr, i32 %n) nounw
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
; GCN_DBG-NEXT: s_add_u32 s12, s12, s9
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
-; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_load_dword s0, s[2:3], 0x9
+; GCN_DBG-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
+; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 0
; GCN_DBG-NEXT: s_mov_b32 s0, 0
-; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1
+; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1
; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5]
; GCN_DBG-NEXT: s_branch .LBB2_2
; GCN_DBG-NEXT: .LBB2_1: ; %for.exit
-; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT: s_waitcnt expcnt(0)
-; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5]
-; GCN_DBG-NEXT: ; kill: killed $vgpr0
; GCN_DBG-NEXT: s_endpgm
; GCN_DBG-NEXT: .LBB2_2: ; %for.body
; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN_DBG-NEXT: s_waitcnt expcnt(0)
-; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN_DBG-NEXT: buffer_load_dword v2, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5]
; GCN_DBG-NEXT: s_waitcnt vmcnt(0)
-; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 1
-; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 0
+; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 1
+; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 0
; GCN_DBG-NEXT: s_mov_b32 s1, 2
; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1
; GCN_DBG-NEXT: s_add_i32 s1, s1, s2
; GCN_DBG-NEXT: s_mov_b32 s2, 0x80
; GCN_DBG-NEXT: s_add_i32 s1, s1, s2
; GCN_DBG-NEXT: s_mov_b32 m0, -1
-; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT: ds_read_b32 v1, v1
+; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT: ds_read_b32 v0, v0
; GCN_DBG-NEXT: s_mov_b32 s2, 1.0
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT: v_add_f32_e64 v2, v1, s2
+; GCN_DBG-NEXT: v_add_f32_e64 v1, v0, s2
; GCN_DBG-NEXT: s_mov_b32 m0, -1
-; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT: ds_write_b32 v1, v2
+; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT: ds_write_b32 v0, v1
; GCN_DBG-NEXT: s_mov_b32 s1, 1
; GCN_DBG-NEXT: s_add_i32 s0, s0, s1
; GCN_DBG-NEXT: s_mov_b64 s[2:3], -1
; GCN_DBG-NEXT: s_and_b64 vcc, exec, s[2:3]
-; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1
+; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1
; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5]
; GCN_DBG-NEXT: s_cbranch_vccnz .LBB2_1
; GCN_DBG-NEXT: s_branch .LBB2_2
@@ -321,51 +301,46 @@ define amdgpu_kernel void @loop_const_undef(ptr addrspace(3) %ptr, i32 %n) nounw
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
; GCN_DBG-NEXT: s_add_u32 s12, s12, s9
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
-; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_load_dword s0, s[2:3], 0x9
+; GCN_DBG-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
+; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 0
; GCN_DBG-NEXT: s_mov_b32 s0, 0
-; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1
+; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1
; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5]
; GCN_DBG-NEXT: s_branch .LBB3_2
; GCN_DBG-NEXT: .LBB3_1: ; %for.exit
-; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT: s_waitcnt expcnt(0)
-; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5]
-; GCN_DBG-NEXT: ; kill: killed $vgpr0
; GCN_DBG-NEXT: s_endpgm
; GCN_DBG-NEXT: .LBB3_2: ; %for.body
; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN_DBG-NEXT: s_waitcnt expcnt(0)
-; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN_DBG-NEXT: buffer_load_dword v2, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5]
; GCN_DBG-NEXT: s_waitcnt vmcnt(0)
-; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 1
-; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 0
+; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 1
+; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 0
; GCN_DBG-NEXT: s_mov_b32 s1, 2
; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1
; GCN_DBG-NEXT: s_add_i32 s1, s1, s2
; GCN_DBG-NEXT: s_mov_b32 s2, 0x80
; GCN_DBG-NEXT: s_add_i32 s1, s1, s2
; GCN_DBG-NEXT: s_mov_b32 m0, -1
-; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT: ds_read_b32 v1, v1
+; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT: ds_read_b32 v0, v0
; GCN_DBG-NEXT: s_mov_b32 s2, 1.0
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT: v_add_f32_e64 v2, v1, s2
+; GCN_DBG-NEXT: v_add_f32_e64 v1, v0, s2
; GCN_DBG-NEXT: s_mov_b32 m0, -1
-; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT: ds_write_b32 v1, v2
+; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT: ds_write_b32 v0, v1
; GCN_DBG-NEXT: s_mov_b32 s1, 1
; GCN_DBG-NEXT: s_add_i32 s0, s0, s1
-; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1
+; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1
; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5]
; GCN_DBG-NEXT: s_cbranch_scc1 .LBB3_1
; GCN_DBG-NEXT: s_branch .LBB3_2
@@ -422,66 +397,61 @@ define amdgpu_kernel void @loop_arg_0(ptr addrspace(3) %ptr, i32 %n) nounwind {
; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000
; GCN_DBG-NEXT: s_add_u32 s12, s12, s9
; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0
-; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_load_dword s0, s[2:3], 0x9
+; GCN_DBG-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0
-; GCN_DBG-NEXT: v_mov_b32_e32 v1, 0
+; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 0
+; GCN_DBG-NEXT: v_mov_b32_e32 v0, 0
; GCN_DBG-NEXT: s_mov_b32 m0, -1
-; GCN_DBG-NEXT: ds_read_u8 v1, v1
+; GCN_DBG-NEXT: ds_read_u8 v0, v0
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT: v_readfirstlane_b32 s0, v1
+; GCN_DBG-NEXT: v_readfirstlane_b32 s0, v0
; GCN_DBG-NEXT: s_and_b32 s0, 1, s0
; GCN_DBG-NEXT: s_cmp_eq_u32 s0, 1
; GCN_DBG-NEXT: s_cselect_b64 s[0:1], -1, 0
; GCN_DBG-NEXT: s_mov_b64 s[2:3], -1
; GCN_DBG-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
-; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1
-; GCN_DBG-NEXT: v_writelane_b32 v0, s1, 2
+; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1
+; GCN_DBG-NEXT: v_writelane_b32 v2, s1, 2
; GCN_DBG-NEXT: s_mov_b32 s0, 0
-; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 3
+; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 3
; GCN_DBG-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN_DBG-NEXT: s_mov_b64 exec, s[6:7]
; GCN_DBG-NEXT: s_branch .LBB4_2
; GCN_DBG-NEXT: .LBB4_1: ; %for.exit
-; GCN_DBG-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GCN_DBG-NEXT: s_waitcnt expcnt(0)
-; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN_DBG-NEXT: s_mov_b64 exec, s[6:7]
-; GCN_DBG-NEXT: ; kill: killed $vgpr0
; GCN_DBG-NEXT: s_endpgm
; GCN_DBG-NEXT: .LBB4_2: ; %for.body
; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN_DBG-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN_DBG-NEXT: s_waitcnt expcnt(0)
-; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN_DBG-NEXT: buffer_load_dword v2, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN_DBG-NEXT: s_mov_b64 exec, s[6:7]
; GCN_DBG-NEXT: s_waitcnt vmcnt(0)
-; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 3
-; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 1
-; GCN_DBG-NEXT: v_readlane_b32 s3, v0, 2
-; GCN_DBG-NEXT: v_readlane_b32 s4, v0, 0
+; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 3
+; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 1
+; GCN_DBG-NEXT: v_readlane_b32 s3, v2, 2
+; GCN_DBG-NEXT: v_readlane_b32 s4, v2, 0
; GCN_DBG-NEXT: s_mov_b32 s1, 2
; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1
; GCN_DBG-NEXT: s_add_i32 s1, s1, s4
; GCN_DBG-NEXT: s_mov_b32 s4, 0x80
; GCN_DBG-NEXT: s_add_i32 s1, s1, s4
; GCN_DBG-NEXT: s_mov_b32 m0, -1
-; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT: ds_read_b32 v1, v1
+; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT: ds_read_b32 v0, v0
; GCN_DBG-NEXT: s_mov_b32 s4, 1.0
; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0)
-; GCN_DBG-NEXT: v_add_f32_e64 v2, v1, s4
+; GCN_DBG-NEXT: v_add_f32_e64 v1, v0, s4
; GCN_DBG-NEXT: s_mov_b32 m0, -1
-; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1
-; GCN_DBG-NEXT: ds_write_b32 v1, v2
+; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1
+; GCN_DBG-NEXT: ds_write_b32 v0, v1
; GCN_DBG-NEXT: s_mov_b32 s1, 1
; GCN_DBG-NEXT: s_add_i32 s0, s0, s1
; GCN_DBG-NEXT: s_and_b64 vcc, exec, s[2:3]
-; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 3
+; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 3
; GCN_DBG-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN_DBG-NEXT: s_mov_b64 exec, s[6:7]
; GCN_DBG-NEXT: s_cbranch_vccnz .LBB4_1
; GCN_DBG-NEXT: s_branch .LBB4_2
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index 67a084068941a4..7cec15ea5be87a 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -48,72 +48,67 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
; GCN-O0-NEXT: s_add_u32 s12, s12, s9
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
-; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
-; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
-; GCN-O0-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN-O0-NEXT: v_writelane_b32 v0, s0, 0
-; GCN-O0-NEXT: v_writelane_b32 v0, s1, 1
-; GCN-O0-NEXT: v_mov_b32_e32 v2, v1
-; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
+; GCN-O0-NEXT: ; implicit-def: $vgpr4 : SGPR spill to VGPR lane
+; GCN-O0-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-O0-NEXT: v_writelane_b32 v4, s0, 0
+; GCN-O0-NEXT: v_writelane_b32 v4, s1, 1
+; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
+; GCN-O0-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b32 s0, 1
-; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v1, s0
+; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v0, s0
; GCN-O0-NEXT: s_mov_b64 s[0:1], exec
-; GCN-O0-NEXT: v_writelane_b32 v0, s0, 2
-; GCN-O0-NEXT: v_writelane_b32 v0, s1, 3
+; GCN-O0-NEXT: v_writelane_b32 v4, s0, 2
+; GCN-O0-NEXT: v_writelane_b32 v4, s1, 3
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
; GCN-O0-NEXT: s_cbranch_execz .LBB0_4
; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then
+; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s4, v0, 0
-; GCN-O0-NEXT: v_readlane_b32 s5, v0, 1
-; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: v_readlane_b32 s4, v4, 0
+; GCN-O0-NEXT: v_readlane_b32 s5, v4, 1
; GCN-O0-NEXT: s_mov_b32 s2, 0xf000
; GCN-O0-NEXT: s_mov_b32 s0, 0
; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1
; GCN-O0-NEXT: s_mov_b32 s1, s2
; GCN-O0-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7
; GCN-O0-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_ashrrev_i32_e64 v4, 31, v1
-; GCN-O0-NEXT: v_mov_b32_e32 v2, v1
-; GCN-O0-NEXT: v_mov_b32_e32 v3, v4
+; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v0
+; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
+; GCN-O0-NEXT: v_mov_b32_e32 v2, v3
; GCN-O0-NEXT: s_mov_b32 s0, 2
-; GCN-O0-NEXT: v_lshl_b64 v[3:4], v[2:3], s0
-; GCN-O0-NEXT: v_mov_b32_e32 v2, 0
-; GCN-O0-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
-; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[2:3], v1, s0
+; GCN-O0-NEXT: v_lshl_b64 v[2:3], v[1:2], s0
+; GCN-O0-NEXT: v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT: buffer_store_dword v1, v[2:3], s[4:7], 0 addr64
+; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[2:3], v0, s0
; GCN-O0-NEXT: s_mov_b64 s[0:1], exec
-; GCN-O0-NEXT: v_writelane_b32 v0, s0, 4
-; GCN-O0-NEXT: v_writelane_b32 v0, s1, 5
+; GCN-O0-NEXT: v_writelane_b32 v4, s0, 4
+; GCN-O0-NEXT: v_writelane_b32 v4, s1, 5
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
; GCN-O0-NEXT: s_cbranch_execz .LBB0_3
; GCN-O0-NEXT: ; %bb.2: ; %bb.inner.then
+; GCN-O0-NEXT: s_waitcnt expcnt(1)
+; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0
-; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1
-; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0
+; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1
; GCN-O0-NEXT: v_mov_b32_e32 v0, 1
-; GCN-O0-NEXT: s_waitcnt vmcnt(0)
; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1
; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -130,26 +125,25 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: .LBB0_3: ; %Flow
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s0, v0, 4
-; GCN-O0-NEXT: v_readlane_b32 s1, v0, 5
+; GCN-O0-NEXT: v_readlane_b32 s0, v4, 4
+; GCN-O0-NEXT: v_readlane_b32 s1, v4, 5
; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1]
; GCN-O0-NEXT: .LBB0_4: ; %bb.outer.end
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s0, v0, 2
-; GCN-O0-NEXT: v_readlane_b32 s1, v0, 3
+; GCN-O0-NEXT: v_readlane_b32 s0, v4, 2
+; GCN-O0-NEXT: v_readlane_b32 s1, v4, 3
; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1]
-; GCN-O0-NEXT: v_mov_b32_e32 v2, 3
-; GCN-O0-NEXT: v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT: v_mov_b32_e32 v1, 3
+; GCN-O0-NEXT: v_mov_b32_e32 v0, 0
; GCN-O0-NEXT: s_mov_b32 m0, -1
-; GCN-O0-NEXT: ds_write_b32 v1, v2
-; GCN-O0-NEXT: ; kill: killed $vgpr0
+; GCN-O0-NEXT: ds_write_b32 v0, v1
; GCN-O0-NEXT: s_endpgm
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -222,72 +216,67 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
; GCN-O0-NEXT: s_add_u32 s12, s12, s9
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
-; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
-; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
-; GCN-O0-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN-O0-NEXT: v_writelane_b32 v0, s0, 0
-; GCN-O0-NEXT: v_writelane_b32 v0, s1, 1
-; GCN-O0-NEXT: v_mov_b32_e32 v2, v1
-; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
+; GCN-O0-NEXT: ; implicit-def: $vgpr4 : SGPR spill to VGPR lane
+; GCN-O0-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-O0-NEXT: v_writelane_b32 v4, s0, 0
+; GCN-O0-NEXT: v_writelane_b32 v4, s1, 1
+; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
+; GCN-O0-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b32 s0, 1
-; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v1, s0
+; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v0, s0
; GCN-O0-NEXT: s_mov_b64 s[0:1], exec
-; GCN-O0-NEXT: v_writelane_b32 v0, s0, 2
-; GCN-O0-NEXT: v_writelane_b32 v0, s1, 3
+; GCN-O0-NEXT: v_writelane_b32 v4, s0, 2
+; GCN-O0-NEXT: v_writelane_b32 v4, s1, 3
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
; GCN-O0-NEXT: s_cbranch_execz .LBB1_3
; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then
+; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s4, v0, 0
-; GCN-O0-NEXT: v_readlane_b32 s5, v0, 1
-; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: v_readlane_b32 s4, v4, 0
+; GCN-O0-NEXT: v_readlane_b32 s5, v4, 1
; GCN-O0-NEXT: s_mov_b32 s2, 0xf000
; GCN-O0-NEXT: s_mov_b32 s0, 0
; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1
; GCN-O0-NEXT: s_mov_b32 s1, s2
; GCN-O0-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7
; GCN-O0-NEXT: s_mov_b64 s[6:7], s[0:1]
-; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_ashrrev_i32_e64 v4, 31, v1
-; GCN-O0-NEXT: v_mov_b32_e32 v2, v1
-; GCN-O0-NEXT: v_mov_b32_e32 v3, v4
+; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v0
+; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
+; GCN-O0-NEXT: v_mov_b32_e32 v2, v3
; GCN-O0-NEXT: s_mov_b32 s0, 2
-; GCN-O0-NEXT: v_lshl_b64 v[3:4], v[2:3], s0
-; GCN-O0-NEXT: v_mov_b32_e32 v2, 0
-; GCN-O0-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
-; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[2:3], v1, s0
+; GCN-O0-NEXT: v_lshl_b64 v[2:3], v[1:2], s0
+; GCN-O0-NEXT: v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT: buffer_store_dword v1, v[2:3], s[4:7], 0 addr64
+; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[2:3], v0, s0
; GCN-O0-NEXT: s_mov_b64 s[0:1], exec
-; GCN-O0-NEXT: v_writelane_b32 v0, s0, 4
-; GCN-O0-NEXT: v_writelane_b32 v0, s1, 5
+; GCN-O0-NEXT: v_writelane_b32 v4, s0, 4
+; GCN-O0-NEXT: v_writelane_b32 v4, s1, 5
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
; GCN-O0-NEXT: s_cbranch_execz .LBB1_4
; GCN-O0-NEXT: ; %bb.2: ; %bb.inner.then
+; GCN-O0-NEXT: s_waitcnt expcnt(1)
+; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0
-; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1
-; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0
+; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1
; GCN-O0-NEXT: v_mov_b32_e32 v0, 1
-; GCN-O0-NEXT: s_waitcnt vmcnt(0)
; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1
; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -305,27 +294,27 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
; GCN-O0-NEXT: .LBB1_3: ; %Flow
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s0, v0, 2
-; GCN-O0-NEXT: v_readlane_b32 s1, v0, 3
+; GCN-O0-NEXT: v_readlane_b32 s0, v4, 2
+; GCN-O0-NEXT: v_readlane_b32 s1, v4, 3
; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1]
; GCN-O0-NEXT: s_branch .LBB1_5
; GCN-O0-NEXT: .LBB1_4: ; %bb.inner.end
+; GCN-O0-NEXT: s_waitcnt expcnt(1)
+; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s2, v0, 4
-; GCN-O0-NEXT: v_readlane_b32 s3, v0, 5
+; GCN-O0-NEXT: v_readlane_b32 s2, v4, 4
+; GCN-O0-NEXT: v_readlane_b32 s3, v4, 5
; GCN-O0-NEXT: s_or_b64 exec, exec, s[2:3]
-; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0
-; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1
-; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0
+; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1
; GCN-O0-NEXT: v_mov_b32_e32 v0, 2
-; GCN-O0-NEXT: s_waitcnt vmcnt(0)
; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1
; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -340,14 +329,10 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
; GCN-O0-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-O0-NEXT: s_branch .LBB1_3
; GCN-O0-NEXT: .LBB1_5: ; %bb.outer.end
-; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
-; GCN-O0-NEXT: v_mov_b32_e32 v2, 3
-; GCN-O0-NEXT: v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT: v_mov_b32_e32 v1, 3
+; GCN-O0-NEXT: v_mov_b32_e32 v0, 0
; GCN-O0-NEXT: s_mov_b32 m0, -1
-; GCN-O0-NEXT: ds_write_b32 v1, v2
-; GCN-O0-NEXT: ; kill: killed $vgpr0
+; GCN-O0-NEXT: ds_write_b32 v0, v1
; GCN-O0-NEXT: s_endpgm
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -433,19 +418,14 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
; GCN-O0-NEXT: s_add_u32 s12, s12, s9
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
-; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
-; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; GCN-O0-NEXT: s_waitcnt lgkmcnt(0)
; GCN-O0-NEXT: s_mov_b64 s[2:3], s[0:1]
-; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_writelane_b32 v0, s2, 0
-; GCN-O0-NEXT: v_writelane_b32 v0, s3, 1
-; GCN-O0-NEXT: v_mov_b32_e32 v2, v1
-; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
+; GCN-O0-NEXT: ; implicit-def: $vgpr4 : SGPR spill to VGPR lane
+; GCN-O0-NEXT: v_writelane_b32 v4, s2, 0
+; GCN-O0-NEXT: v_writelane_b32 v4, s3, 1
+; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
+; GCN-O0-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b32 s2, 0xf000
; GCN-O0-NEXT: s_mov_b32 s4, 0
; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
@@ -453,42 +433,43 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5]
; GCN-O0-NEXT: s_mov_b32 s4, 2
-; GCN-O0-NEXT: v_lshlrev_b32_e64 v3, s4, v1
+; GCN-O0-NEXT: v_lshlrev_b32_e64 v2, s4, v0
; GCN-O0-NEXT: s_mov_b32 s4, 0
; GCN-O0-NEXT: ; implicit-def: $sgpr4
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: v_mov_b32_e32 v2, 0
-; GCN-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
-; GCN-O0-NEXT: v_mov_b32_e32 v4, v2
-; GCN-O0-NEXT: v_mov_b32_e32 v2, 0
-; GCN-O0-NEXT: buffer_store_dword v2, v[3:4], s[0:3], 0 addr64
+; GCN-O0-NEXT: v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GCN-O0-NEXT: v_mov_b32_e32 v3, v1
+; GCN-O0-NEXT: v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT: buffer_store_dword v1, v[2:3], s[0:3], 0 addr64
; GCN-O0-NEXT: s_mov_b32 s0, 1
-; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v1, s0
+; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v0, s0
; GCN-O0-NEXT: s_mov_b64 s[0:1], exec
-; GCN-O0-NEXT: v_writelane_b32 v0, s0, 2
-; GCN-O0-NEXT: v_writelane_b32 v0, s1, 3
+; GCN-O0-NEXT: v_writelane_b32 v4, s0, 2
+; GCN-O0-NEXT: v_writelane_b32 v4, s1, 3
; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
; GCN-O0-NEXT: s_cbranch_execz .LBB2_6
; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then
+; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
-; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b32 s0, 2
-; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[0:1], v1, s0
+; GCN-O0-NEXT: s_waitcnt vmcnt(1)
+; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[0:1], v0, s0
; GCN-O0-NEXT: s_mov_b64 s[2:3], exec
; GCN-O0-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
; GCN-O0-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3]
-; GCN-O0-NEXT: v_writelane_b32 v0, s2, 4
-; GCN-O0-NEXT: v_writelane_b32 v0, s3, 5
+; GCN-O0-NEXT: s_waitcnt vmcnt(0)
+; GCN-O0-NEXT: v_writelane_b32 v4, s2, 4
+; GCN-O0-NEXT: v_writelane_b32 v4, s3, 5
; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
; GCN-O0-NEXT: s_cbranch_execz .LBB2_2
@@ -496,31 +477,30 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: .LBB2_2: ; %Flow
; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s0, v0, 4
-; GCN-O0-NEXT: v_readlane_b32 s1, v0, 5
+; GCN-O0-NEXT: v_readlane_b32 s0, v4, 4
+; GCN-O0-NEXT: v_readlane_b32 s1, v4, 5
; GCN-O0-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GCN-O0-NEXT: s_and_b64 s[0:1], exec, s[0:1]
-; GCN-O0-NEXT: v_writelane_b32 v0, s0, 6
-; GCN-O0-NEXT: v_writelane_b32 v0, s1, 7
+; GCN-O0-NEXT: v_writelane_b32 v4, s0, 6
+; GCN-O0-NEXT: v_writelane_b32 v4, s1, 7
; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
; GCN-O0-NEXT: s_xor_b64 exec, exec, s[0:1]
; GCN-O0-NEXT: s_cbranch_execz .LBB2_5
; GCN-O0-NEXT: ; %bb.3: ; %bb.then
+; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0
-; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1
-; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0
+; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1
; GCN-O0-NEXT: v_mov_b32_e32 v0, 1
-; GCN-O0-NEXT: s_waitcnt vmcnt(0)
; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1
; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -536,16 +516,15 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
; GCN-O0-NEXT: s_branch .LBB2_5
; GCN-O0-NEXT: .LBB2_4: ; %bb.else
+; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0
-; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1
-; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0
+; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1
; GCN-O0-NEXT: v_mov_b32_e32 v0, 2
-; GCN-O0-NEXT: s_waitcnt vmcnt(0)
; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1
; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -562,26 +541,25 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: .LBB2_5: ; %Flow1
; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s0, v0, 6
-; GCN-O0-NEXT: v_readlane_b32 s1, v0, 7
+; GCN-O0-NEXT: v_readlane_b32 s0, v4, 6
+; GCN-O0-NEXT: v_readlane_b32 s1, v4, 7
; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1]
; GCN-O0-NEXT: .LBB2_6: ; %bb.outer.end
; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s0, v0, 2
-; GCN-O0-NEXT: v_readlane_b32 s1, v0, 3
+; GCN-O0-NEXT: v_readlane_b32 s0, v4, 2
+; GCN-O0-NEXT: v_readlane_b32 s1, v4, 3
; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1]
-; GCN-O0-NEXT: v_mov_b32_e32 v2, 3
-; GCN-O0-NEXT: v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT: v_mov_b32_e32 v1, 3
+; GCN-O0-NEXT: v_mov_b32_e32 v0, 0
; GCN-O0-NEXT: s_mov_b32 m0, -1
-; GCN-O0-NEXT: ds_write_b32 v1, v2
-; GCN-O0-NEXT: ; kill: killed $vgpr0
+; GCN-O0-NEXT: ds_write_b32 v0, v1
; GCN-O0-NEXT: s_endpgm
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -681,51 +659,46 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
; GCN-O0-NEXT: s_add_u32 s12, s12, s9
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
-; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
-; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x9
-; GCN-O0-NEXT: v_mov_b32_e32 v2, v1
-; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill
+; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
+; GCN-O0-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b32 s0, 2
-; GCN-O0-NEXT: v_lshlrev_b32_e64 v3, s0, v1
+; GCN-O0-NEXT: v_lshlrev_b32_e64 v2, s0, v0
; GCN-O0-NEXT: s_mov_b32 s1, 0
; GCN-O0-NEXT: ; implicit-def: $sgpr1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: v_mov_b32_e32 v2, 0
-; GCN-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
-; GCN-O0-NEXT: v_mov_b32_e32 v4, v2
+; GCN-O0-NEXT: v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GCN-O0-NEXT: v_mov_b32_e32 v3, v1
; GCN-O0-NEXT: s_waitcnt lgkmcnt(0)
; GCN-O0-NEXT: s_mov_b32 s2, s4
-; GCN-O0-NEXT: v_mov_b32_e32 v2, v3
+; GCN-O0-NEXT: v_mov_b32_e32 v1, v2
; GCN-O0-NEXT: s_mov_b32 s1, s5
-; GCN-O0-NEXT: v_mov_b32_e32 v6, v4
-; GCN-O0-NEXT: v_add_i32_e64 v5, s[2:3], s2, v2
-; GCN-O0-NEXT: v_mov_b32_e32 v2, s1
-; GCN-O0-NEXT: v_addc_u32_e64 v2, s[2:3], v2, v6, s[2:3]
-; GCN-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GCN-O0-NEXT: v_mov_b32_e32 v6, v2
-; GCN-O0-NEXT: buffer_store_dword v5, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
-; GCN-O0-NEXT: buffer_store_dword v6, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill
+; GCN-O0-NEXT: v_mov_b32_e32 v5, v3
+; GCN-O0-NEXT: v_add_i32_e64 v4, s[2:3], s2, v1
+; GCN-O0-NEXT: v_mov_b32_e32 v1, s1
+; GCN-O0-NEXT: v_addc_u32_e64 v1, s[2:3], v1, v5, s[2:3]
+; GCN-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GCN-O0-NEXT: v_mov_b32_e32 v5, v1
+; GCN-O0-NEXT: buffer_store_dword v4, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v5, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b32 s1, 0xf000
; GCN-O0-NEXT: s_mov_b32 s2, 0
; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3
; GCN-O0-NEXT: s_mov_b32 s3, s1
; GCN-O0-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7
; GCN-O0-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GCN-O0-NEXT: v_mov_b32_e32 v2, 0
-; GCN-O0-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
-; GCN-O0-NEXT: v_cmp_lt_u32_e64 s[0:1], v1, s0
+; GCN-O0-NEXT: v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT: buffer_store_dword v1, v[2:3], s[4:7], 0 addr64
+; GCN-O0-NEXT: v_cmp_lt_u32_e64 s[0:1], v0, s0
; GCN-O0-NEXT: s_mov_b64 s[2:3], exec
; GCN-O0-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
; GCN-O0-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3]
-; GCN-O0-NEXT: s_waitcnt vmcnt(4)
-; GCN-O0-NEXT: v_writelane_b32 v0, s2, 0
-; GCN-O0-NEXT: v_writelane_b32 v0, s3, 1
+; GCN-O0-NEXT: ; implicit-def: $vgpr6 : SGPR spill to VGPR lane
+; GCN-O0-NEXT: v_writelane_b32 v6, s2, 0
+; GCN-O0-NEXT: v_writelane_b32 v6, s3, 1
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v6, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
; GCN-O0-NEXT: s_cbranch_execz .LBB3_1
@@ -733,28 +706,28 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: .LBB3_1: ; %Flow2
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0
-; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1
+; GCN-O0-NEXT: v_readlane_b32 s0, v6, 0
+; GCN-O0-NEXT: v_readlane_b32 s1, v6, 1
; GCN-O0-NEXT: s_or_saveexec_b64 s[0:1], s[0:1]
; GCN-O0-NEXT: s_and_b64 s[0:1], exec, s[0:1]
-; GCN-O0-NEXT: v_writelane_b32 v0, s0, 2
-; GCN-O0-NEXT: v_writelane_b32 v0, s1, 3
+; GCN-O0-NEXT: v_writelane_b32 v6, s0, 2
+; GCN-O0-NEXT: v_writelane_b32 v6, s1, 3
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v6, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_xor_b64 exec, exec, s[0:1]
; GCN-O0-NEXT: s_cbranch_execz .LBB3_8
; GCN-O0-NEXT: ; %bb.2: ; %bb.outer.then
+; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
-; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
-; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
-; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b32 s0, 0xf000
; GCN-O0-NEXT: s_mov_b32 s2, 0
; GCN-O0-NEXT: s_mov_b32 s4, s2
@@ -763,23 +736,24 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_mov_b32 s1, s2
; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5]
-; GCN-O0-NEXT: v_mov_b32_e32 v2, 1
-; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: buffer_store_dword v2, v[3:4], s[0:3], 0 addr64 offset:4
+; GCN-O0-NEXT: v_mov_b32_e32 v1, 1
+; GCN-O0-NEXT: s_waitcnt vmcnt(1)
+; GCN-O0-NEXT: buffer_store_dword v1, v[2:3], s[0:3], 0 addr64 offset:4
; GCN-O0-NEXT: s_mov_b32 s0, 2
-; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v1, s0
+; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v0, s0
; GCN-O0-NEXT: s_mov_b64 s[0:1], exec
-; GCN-O0-NEXT: v_writelane_b32 v0, s0, 4
-; GCN-O0-NEXT: v_writelane_b32 v0, s1, 5
+; GCN-O0-NEXT: s_waitcnt vmcnt(1)
+; GCN-O0-NEXT: v_writelane_b32 v6, s0, 4
+; GCN-O0-NEXT: v_writelane_b32 v6, s1, 5
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v6, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
; GCN-O0-NEXT: s_cbranch_execz .LBB3_7
; GCN-O0-NEXT: ; %bb.3: ; %bb.inner.then
-; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_waitcnt expcnt(1)
+; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b32 s0, 0xf000
; GCN-O0-NEXT: s_mov_b32 s2, 0
@@ -789,19 +763,18 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_mov_b32 s1, s2
; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5]
-; GCN-O0-NEXT: s_waitcnt expcnt(0)
; GCN-O0-NEXT: v_mov_b32_e32 v0, 2
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
; GCN-O0-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 offset:8
; GCN-O0-NEXT: s_branch .LBB3_7
; GCN-O0-NEXT: .LBB3_4: ; %bb.outer.else
+; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
-; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload
-; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
-; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b32 s1, 0xf000
; GCN-O0-NEXT: s_mov_b32 s0, 0
; GCN-O0-NEXT: s_mov_b32 s2, s0
@@ -810,22 +783,23 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_mov_b32 s5, s0
; GCN-O0-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7
; GCN-O0-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GCN-O0-NEXT: v_mov_b32_e32 v2, 3
-; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64 offset:12
-; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v1, s0
+; GCN-O0-NEXT: v_mov_b32_e32 v1, 3
+; GCN-O0-NEXT: s_waitcnt vmcnt(1)
+; GCN-O0-NEXT: buffer_store_dword v1, v[2:3], s[4:7], 0 addr64 offset:12
+; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v0, s0
; GCN-O0-NEXT: s_mov_b64 s[0:1], exec
-; GCN-O0-NEXT: v_writelane_b32 v0, s0, 6
-; GCN-O0-NEXT: v_writelane_b32 v0, s1, 7
+; GCN-O0-NEXT: s_waitcnt vmcnt(1)
+; GCN-O0-NEXT: v_writelane_b32 v6, s0, 6
+; GCN-O0-NEXT: v_writelane_b32 v6, s1, 7
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v6, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
; GCN-O0-NEXT: s_cbranch_execz .LBB3_6
; GCN-O0-NEXT: ; %bb.5: ; %bb.inner.then2
-; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_waitcnt expcnt(1)
+; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b32 s0, 0xf000
; GCN-O0-NEXT: s_mov_b32 s2, 0
@@ -835,43 +809,41 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) {
; GCN-O0-NEXT: s_mov_b32 s1, s2
; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5]
-; GCN-O0-NEXT: s_waitcnt expcnt(0)
; GCN-O0-NEXT: v_mov_b32_e32 v0, 4
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
; GCN-O0-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 offset:16
; GCN-O0-NEXT: .LBB3_6: ; %Flow
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s0, v0, 6
-; GCN-O0-NEXT: v_readlane_b32 s1, v0, 7
+; GCN-O0-NEXT: v_readlane_b32 s0, v6, 6
+; GCN-O0-NEXT: v_readlane_b32 s1, v6, 7
; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1]
; GCN-O0-NEXT: s_branch .LBB3_1
; GCN-O0-NEXT: .LBB3_7: ; %Flow1
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s0, v0, 4
-; GCN-O0-NEXT: v_readlane_b32 s1, v0, 5
+; GCN-O0-NEXT: v_readlane_b32 s0, v6, 4
+; GCN-O0-NEXT: v_readlane_b32 s1, v6, 5
; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1]
; GCN-O0-NEXT: .LBB3_8: ; %bb.outer.end
; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s0, v0, 2
-; GCN-O0-NEXT: v_readlane_b32 s1, v0, 3
+; GCN-O0-NEXT: v_readlane_b32 s0, v6, 2
+; GCN-O0-NEXT: v_readlane_b32 s1, v6, 3
; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1]
-; GCN-O0-NEXT: v_mov_b32_e32 v2, 3
-; GCN-O0-NEXT: v_mov_b32_e32 v1, 0
+; GCN-O0-NEXT: v_mov_b32_e32 v1, 3
+; GCN-O0-NEXT: v_mov_b32_e32 v0, 0
; GCN-O0-NEXT: s_mov_b32 m0, -1
-; GCN-O0-NEXT: ds_write_b32 v1, v2
-; GCN-O0-NEXT: ; kill: killed $vgpr0
+; GCN-O0-NEXT: ds_write_b32 v0, v1
; GCN-O0-NEXT: s_endpgm
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -935,44 +907,39 @@ define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %a
; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000
; GCN-O0-NEXT: s_add_u32 s12, s12, s9
; GCN-O0-NEXT: s_addc_u32 s13, s13, 0
-; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
-; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
-; GCN-O0-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN-O0-NEXT: v_writelane_b32 v0, s0, 0
-; GCN-O0-NEXT: v_writelane_b32 v0, s1, 1
-; GCN-O0-NEXT: v_mov_b32_e32 v2, v1
-; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
+; GCN-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
+; GCN-O0-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-O0-NEXT: v_writelane_b32 v3, s0, 0
+; GCN-O0-NEXT: v_writelane_b32 v3, s1, 1
+; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
+; GCN-O0-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b32 s0, 1
-; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v1, s0
+; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v0, s0
; GCN-O0-NEXT: s_mov_b64 s[0:1], exec
-; GCN-O0-NEXT: v_writelane_b32 v0, s0, 2
-; GCN-O0-NEXT: v_writelane_b32 v0, s1, 3
+; GCN-O0-NEXT: v_writelane_b32 v3, s0, 2
+; GCN-O0-NEXT: v_writelane_b32 v3, s1, 3
; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v3, off, s[12:15], 0 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
; GCN-O0-NEXT: s_cbranch_execz .LBB4_2
; GCN-O0-NEXT: ; %bb.1: ; %bb.then
+; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
-; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: s_waitcnt expcnt(0)
+; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0
-; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1
-; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: v_readlane_b32 s0, v3, 0
+; GCN-O0-NEXT: v_readlane_b32 s1, v3, 1
; GCN-O0-NEXT: s_mov_b32 s2, 0xf000
; GCN-O0-NEXT: s_mov_b32 s4, 0
; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
; GCN-O0-NEXT: s_mov_b32 s5, s2
; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5]
-; GCN-O0-NEXT: s_waitcnt vmcnt(0)
; GCN-O0-NEXT: v_ashrrev_i32_e64 v2, 31, v0
; GCN-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GCN-O0-NEXT: v_mov_b32_e32 v1, v2
@@ -983,14 +950,13 @@ define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %a
; GCN-O0-NEXT: .LBB4_2: ; %bb.end
; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s0, v0, 2
-; GCN-O0-NEXT: v_readlane_b32 s1, v0, 3
+; GCN-O0-NEXT: v_readlane_b32 s0, v3, 2
+; GCN-O0-NEXT: v_readlane_b32 s1, v3, 3
; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1]
; GCN-O0-NEXT: s_barrier
-; GCN-O0-NEXT: ; kill: killed $vgpr0
; GCN-O0-NEXT: s_endpgm
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -1082,91 +1048,84 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
; GCN-O0: ; %bb.0: ; %bb
; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
-; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[4:5]
-; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; GCN-O0-NEXT: v_mov_b32_e32 v1, v0
-; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT: s_waitcnt expcnt(1)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
-; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 s[4:5], 0
; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GCN-O0-NEXT: s_waitcnt vmcnt(1)
-; GCN-O0-NEXT: v_writelane_b32 v0, s6, 0
-; GCN-O0-NEXT: v_writelane_b32 v0, s7, 1
-; GCN-O0-NEXT: v_writelane_b32 v0, s4, 2
-; GCN-O0-NEXT: v_writelane_b32 v0, s5, 3
+; GCN-O0-NEXT: ; implicit-def: $vgpr6 : SGPR spill to VGPR lane
+; GCN-O0-NEXT: s_waitcnt expcnt(1)
+; GCN-O0-NEXT: v_writelane_b32 v6, s6, 0
+; GCN-O0-NEXT: v_writelane_b32 v6, s7, 1
+; GCN-O0-NEXT: v_writelane_b32 v6, s4, 2
+; GCN-O0-NEXT: v_writelane_b32 v6, s5, 3
; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
; GCN-O0-NEXT: .LBB5_1: ; %bb1
; GCN-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; GCN-O0-NEXT: s_waitcnt expcnt(1)
+; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s8, v0, 2
-; GCN-O0-NEXT: v_readlane_b32 s9, v0, 3
-; GCN-O0-NEXT: v_readlane_b32 s6, v0, 0
-; GCN-O0-NEXT: v_readlane_b32 s7, v0, 1
-; GCN-O0-NEXT: v_writelane_b32 v0, s6, 4
-; GCN-O0-NEXT: v_writelane_b32 v0, s7, 5
-; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: v_readlane_b32 s8, v6, 2
+; GCN-O0-NEXT: v_readlane_b32 s9, v6, 3
+; GCN-O0-NEXT: v_readlane_b32 s6, v6, 0
+; GCN-O0-NEXT: v_readlane_b32 s7, v6, 1
+; GCN-O0-NEXT: v_writelane_b32 v6, s6, 4
+; GCN-O0-NEXT: v_writelane_b32 v6, s7, 5
; GCN-O0-NEXT: s_mov_b32 s4, 0x207
-; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, s4
+; GCN-O0-NEXT: v_cmp_lt_i32_e64 s[4:5], v0, s4
; GCN-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
-; GCN-O0-NEXT: v_writelane_b32 v0, s4, 6
-; GCN-O0-NEXT: v_writelane_b32 v0, s5, 7
-; GCN-O0-NEXT: v_writelane_b32 v0, s6, 0
-; GCN-O0-NEXT: v_writelane_b32 v0, s7, 1
+; GCN-O0-NEXT: v_writelane_b32 v6, s4, 6
+; GCN-O0-NEXT: v_writelane_b32 v6, s5, 7
+; GCN-O0-NEXT: v_writelane_b32 v6, s6, 0
+; GCN-O0-NEXT: v_writelane_b32 v6, s7, 1
; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GCN-O0-NEXT: v_writelane_b32 v0, s6, 2
-; GCN-O0-NEXT: v_writelane_b32 v0, s7, 3
+; GCN-O0-NEXT: v_writelane_b32 v6, s6, 2
+; GCN-O0-NEXT: v_writelane_b32 v6, s7, 3
; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
; GCN-O0-NEXT: s_andn2_b64 exec, exec, s[4:5]
; GCN-O0-NEXT: s_cbranch_execnz .LBB5_1
; GCN-O0-NEXT: ; %bb.2: ; %bb2
; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1
+; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s4, v0, 6
-; GCN-O0-NEXT: v_readlane_b32 s5, v0, 7
+; GCN-O0-NEXT: v_readlane_b32 s4, v6, 6
+; GCN-O0-NEXT: v_readlane_b32 s5, v6, 7
; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b32 s6, 0
-; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], v1, s6
-; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v1, s6
-; GCN-O0-NEXT: v_writelane_b32 v0, s4, 8
-; GCN-O0-NEXT: v_writelane_b32 v0, s5, 9
+; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, s6
+; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v0, s6
+; GCN-O0-NEXT: v_writelane_b32 v6, s4, 8
+; GCN-O0-NEXT: v_writelane_b32 v6, s5, 9
; GCN-O0-NEXT: s_mov_b32 s4, 0
; GCN-O0-NEXT: s_mov_b32 s8, s4
; GCN-O0-NEXT: s_mov_b32 s9, s4
; GCN-O0-NEXT: s_mov_b32 s10, s4
; GCN-O0-NEXT: s_mov_b32 s11, s4
-; GCN-O0-NEXT: v_mov_b32_e32 v1, s8
-; GCN-O0-NEXT: v_mov_b32_e32 v2, s9
-; GCN-O0-NEXT: v_mov_b32_e32 v3, s10
-; GCN-O0-NEXT: v_mov_b32_e32 v4, s11
-; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GCN-O0-NEXT: v_mov_b32_e32 v0, s8
+; GCN-O0-NEXT: v_mov_b32_e32 v1, s9
+; GCN-O0-NEXT: v_mov_b32_e32 v2, s10
+; GCN-O0-NEXT: v_mov_b32_e32 v3, s11
+; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 s[4:5], exec
-; GCN-O0-NEXT: v_writelane_b32 v0, s4, 10
-; GCN-O0-NEXT: v_writelane_b32 v0, s5, 11
+; GCN-O0-NEXT: v_writelane_b32 v6, s4, 10
+; GCN-O0-NEXT: v_writelane_b32 v6, s5, 11
; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
; GCN-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; GCN-O0-NEXT: s_mov_b64 exec, s[4:5]
@@ -1175,31 +1134,31 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1
; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
; GCN-O0-NEXT: ; implicit-def: $sgpr4
-; GCN-O0-NEXT: v_mov_b32_e32 v1, s4
-; GCN-O0-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen
+; GCN-O0-NEXT: v_mov_b32_e32 v0, s4
+; GCN-O0-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
; GCN-O0-NEXT: s_mov_b32 s4, 0
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_cmp_lt_f32_e64 s[6:7], v1, s4
+; GCN-O0-NEXT: v_cmp_lt_f32_e64 s[6:7], v0, s4
; GCN-O0-NEXT: s_mov_b32 s8, s4
; GCN-O0-NEXT: s_mov_b32 s9, s4
; GCN-O0-NEXT: s_mov_b32 s10, s4
; GCN-O0-NEXT: s_mov_b32 s11, s4
-; GCN-O0-NEXT: v_mov_b32_e32 v1, s8
-; GCN-O0-NEXT: v_mov_b32_e32 v2, s9
-; GCN-O0-NEXT: v_mov_b32_e32 v3, s10
-; GCN-O0-NEXT: v_mov_b32_e32 v4, s11
-; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GCN-O0-NEXT: v_mov_b32_e32 v0, s8
+; GCN-O0-NEXT: v_mov_b32_e32 v1, s9
+; GCN-O0-NEXT: v_mov_b32_e32 v2, s10
+; GCN-O0-NEXT: v_mov_b32_e32 v3, s11
+; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 s[4:5], exec
-; GCN-O0-NEXT: v_writelane_b32 v0, s4, 12
-; GCN-O0-NEXT: v_writelane_b32 v0, s5, 13
+; GCN-O0-NEXT: v_writelane_b32 v6, s4, 12
+; GCN-O0-NEXT: v_writelane_b32 v6, s5, 13
; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
; GCN-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; GCN-O0-NEXT: s_mov_b64 exec, s[4:5]
@@ -1217,7 +1176,7 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
; GCN-O0-NEXT: s_mov_b32 s5, s10
; GCN-O0-NEXT: s_mov_b32 s6, s9
; GCN-O0-NEXT: s_mov_b32 s7, s8
-; GCN-O0-NEXT: s_waitcnt expcnt(0)
+; GCN-O0-NEXT: s_waitcnt expcnt(1)
; GCN-O0-NEXT: v_mov_b32_e32 v0, s4
; GCN-O0-NEXT: v_mov_b32_e32 v1, s5
; GCN-O0-NEXT: v_mov_b32_e32 v2, s6
@@ -1229,69 +1188,64 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
; GCN-O0-NEXT: s_branch .LBB5_6
; GCN-O0-NEXT: .LBB5_5: ; %Flow2
; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1
-; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT: s_waitcnt expcnt(1)
-; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
-; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s4, v4, 10
-; GCN-O0-NEXT: v_readlane_b32 s5, v4, 11
-; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN-O0-NEXT: s_waitcnt expcnt(0)
+; GCN-O0-NEXT: s_waitcnt expcnt(3)
; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GCN-O0-NEXT: s_waitcnt expcnt(2)
; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; GCN-O0-NEXT: s_waitcnt expcnt(1)
; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; GCN-O0-NEXT: s_waitcnt expcnt(0)
; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GCN-O0-NEXT: s_waitcnt vmcnt(3)
+; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
+; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
+; GCN-O0-NEXT: s_waitcnt vmcnt(0)
+; GCN-O0-NEXT: v_readlane_b32 s4, v6, 10
+; GCN-O0-NEXT: v_readlane_b32 s5, v6, 11
+; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; GCN-O0-NEXT: s_waitcnt vmcnt(3)
; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
-; GCN-O0-NEXT: s_waitcnt vmcnt(3)
; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
-; GCN-O0-NEXT: s_waitcnt vmcnt(3)
; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_branch .LBB5_7
; GCN-O0-NEXT: .LBB5_6: ; %Flow
; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1
-; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT: s_waitcnt expcnt(1)
-; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
-; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s4, v4, 12
-; GCN-O0-NEXT: v_readlane_b32 s5, v4, 13
-; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN-O0-NEXT: s_waitcnt expcnt(0)
+; GCN-O0-NEXT: s_waitcnt expcnt(3)
; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GCN-O0-NEXT: s_waitcnt expcnt(2)
; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GCN-O0-NEXT: s_waitcnt expcnt(1)
; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GCN-O0-NEXT: s_waitcnt expcnt(0)
; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GCN-O0-NEXT: s_waitcnt vmcnt(3)
+; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
+; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
+; GCN-O0-NEXT: s_waitcnt vmcnt(0)
+; GCN-O0-NEXT: v_readlane_b32 s4, v6, 12
+; GCN-O0-NEXT: v_readlane_b32 s5, v6, 13
+; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; GCN-O0-NEXT: s_waitcnt vmcnt(3)
; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; GCN-O0-NEXT: s_waitcnt vmcnt(3)
; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; GCN-O0-NEXT: s_waitcnt vmcnt(3)
; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_branch .LBB5_5
; GCN-O0-NEXT: .LBB5_7: ; %bb10
; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1
; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT: s_waitcnt expcnt(3)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s6, v0, 8
-; GCN-O0-NEXT: v_readlane_b32 s7, v0, 9
+; GCN-O0-NEXT: v_readlane_b32 s6, v6, 8
+; GCN-O0-NEXT: v_readlane_b32 s7, v6, 9
; GCN-O0-NEXT: s_mov_b64 s[4:5], -1
-; GCN-O0-NEXT: v_writelane_b32 v0, s4, 14
-; GCN-O0-NEXT: v_writelane_b32 v0, s5, 15
+; GCN-O0-NEXT: v_writelane_b32 v6, s4, 14
+; GCN-O0-NEXT: v_writelane_b32 v6, s5, 15
; GCN-O0-NEXT: s_mov_b64 s[4:5], exec
-; GCN-O0-NEXT: v_writelane_b32 v0, s4, 16
-; GCN-O0-NEXT: v_writelane_b32 v0, s5, 17
+; GCN-O0-NEXT: v_writelane_b32 v6, s4, 16
+; GCN-O0-NEXT: v_writelane_b32 v6, s5, 17
; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
; GCN-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; GCN-O0-NEXT: s_mov_b64 exec, s[4:5]
@@ -1300,103 +1254,99 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1
; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
; GCN-O0-NEXT: s_mov_b64 s[4:5], 0
; GCN-O0-NEXT: s_xor_b64 s[4:5], exec, -1
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_writelane_b32 v0, s4, 14
-; GCN-O0-NEXT: v_writelane_b32 v0, s5, 15
+; GCN-O0-NEXT: v_writelane_b32 v6, s4, 14
+; GCN-O0-NEXT: v_writelane_b32 v6, s5, 15
; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
; GCN-O0-NEXT: .LBB5_9: ; %Flow3
; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1
-; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
-; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s8, v4, 16
-; GCN-O0-NEXT: v_readlane_b32 s9, v4, 17
-; GCN-O0-NEXT: s_or_b64 exec, exec, s[8:9]
-; GCN-O0-NEXT: v_readlane_b32 s6, v4, 4
-; GCN-O0-NEXT: v_readlane_b32 s7, v4, 5
-; GCN-O0-NEXT: v_readlane_b32 s4, v4, 14
-; GCN-O0-NEXT: v_readlane_b32 s5, v4, 15
-; GCN-O0-NEXT: s_waitcnt expcnt(0)
+; GCN-O0-NEXT: s_waitcnt expcnt(4)
; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GCN-O0-NEXT: s_waitcnt expcnt(3)
; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GCN-O0-NEXT: s_waitcnt expcnt(2)
; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GCN-O0-NEXT: s_waitcnt expcnt(1)
; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
+; GCN-O0-NEXT: s_waitcnt expcnt(0)
+; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
+; GCN-O0-NEXT: s_waitcnt vmcnt(0)
+; GCN-O0-NEXT: v_readlane_b32 s8, v6, 16
+; GCN-O0-NEXT: v_readlane_b32 s9, v6, 17
+; GCN-O0-NEXT: s_or_b64 exec, exec, s[8:9]
+; GCN-O0-NEXT: v_readlane_b32 s6, v6, 4
+; GCN-O0-NEXT: v_readlane_b32 s7, v6, 5
+; GCN-O0-NEXT: v_readlane_b32 s4, v6, 14
+; GCN-O0-NEXT: v_readlane_b32 s5, v6, 15
; GCN-O0-NEXT: s_and_b64 s[4:5], exec, s[4:5]
; GCN-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
; GCN-O0-NEXT: s_mov_b64 s[6:7], 0
; GCN-O0-NEXT: s_mov_b64 s[8:9], s[4:5]
-; GCN-O0-NEXT: v_writelane_b32 v4, s8, 0
-; GCN-O0-NEXT: v_writelane_b32 v4, s9, 1
-; GCN-O0-NEXT: v_writelane_b32 v4, s6, 2
-; GCN-O0-NEXT: v_writelane_b32 v4, s7, 3
+; GCN-O0-NEXT: v_writelane_b32 v6, s8, 0
+; GCN-O0-NEXT: v_writelane_b32 v6, s9, 1
+; GCN-O0-NEXT: v_writelane_b32 v6, s6, 2
+; GCN-O0-NEXT: v_writelane_b32 v6, s7, 3
; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GCN-O0-NEXT: v_writelane_b32 v4, s6, 18
-; GCN-O0-NEXT: v_writelane_b32 v4, s7, 19
+; GCN-O0-NEXT: v_writelane_b32 v6, s6, 18
+; GCN-O0-NEXT: v_writelane_b32 v6, s7, 19
; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
-; GCN-O0-NEXT: s_waitcnt vmcnt(4)
; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
-; GCN-O0-NEXT: s_waitcnt vmcnt(4)
; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
-; GCN-O0-NEXT: s_waitcnt vmcnt(4)
; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
-; GCN-O0-NEXT: s_waitcnt vmcnt(4)
; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_andn2_b64 exec, exec, s[4:5]
; GCN-O0-NEXT: s_cbranch_execnz .LBB5_1
; GCN-O0-NEXT: ; %bb.10: ; %bb12
; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT: s_waitcnt expcnt(3)
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-O0-NEXT: s_waitcnt expcnt(4)
+; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s4, v0, 18
-; GCN-O0-NEXT: v_readlane_b32 s5, v0, 19
+; GCN-O0-NEXT: v_readlane_b32 s4, v6, 18
+; GCN-O0-NEXT: v_readlane_b32 s5, v6, 19
; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-O0-NEXT: ; %bb.11: ; %bb12
-; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
+; GCN-O0-NEXT: s_waitcnt expcnt(3)
+; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_waitcnt expcnt(2)
-; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_waitcnt expcnt(1)
-; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_waitcnt expcnt(0)
-; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_mov_b32_e32 v5, v4
+; GCN-O0-NEXT: v_mov_b32_e32 v4, v3
; GCN-O0-NEXT: ; implicit-def: $sgpr4
-; GCN-O0-NEXT: v_mov_b32_e32 v6, s4
-; GCN-O0-NEXT: buffer_store_dword v5, v6, s[0:3], 0 offen
+; GCN-O0-NEXT: v_mov_b32_e32 v5, s4
+; GCN-O0-NEXT: buffer_store_dword v4, v5, s[0:3], 0 offen
; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0)
-; GCN-O0-NEXT: v_mov_b32_e32 v5, v3
+; GCN-O0-NEXT: v_mov_b32_e32 v4, v2
; GCN-O0-NEXT: ; implicit-def: $sgpr4
-; GCN-O0-NEXT: v_mov_b32_e32 v6, s4
-; GCN-O0-NEXT: buffer_store_dword v5, v6, s[0:3], 0 offen
+; GCN-O0-NEXT: v_mov_b32_e32 v5, s4
+; GCN-O0-NEXT: buffer_store_dword v4, v5, s[0:3], 0 offen
; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0)
-; GCN-O0-NEXT: v_mov_b32_e32 v5, v2
+; GCN-O0-NEXT: v_mov_b32_e32 v4, v1
; GCN-O0-NEXT: ; implicit-def: $sgpr4
-; GCN-O0-NEXT: v_mov_b32_e32 v6, s4
-; GCN-O0-NEXT: buffer_store_dword v5, v6, s[0:3], 0 offen
+; GCN-O0-NEXT: v_mov_b32_e32 v5, s4
+; GCN-O0-NEXT: buffer_store_dword v4, v5, s[0:3], 0 offen
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
+; GCN-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
; GCN-O0-NEXT: ; implicit-def: $sgpr4
-; GCN-O0-NEXT: v_mov_b32_e32 v2, s4
-; GCN-O0-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
+; GCN-O0-NEXT: v_mov_b32_e32 v1, s4
+; GCN-O0-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: ; kill: killed $vgpr0
; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[4:5]
; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; GCN-O0-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
index 789150f690d52e..7c09fec908f93e 100644
--- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
@@ -46,6 +46,9 @@
; VMEM: [[ENDIF]]:
+; Restore val
+; VGPR: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload
+
; Reload and restore exec mask
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]]
@@ -58,7 +61,7 @@
; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]]
; Restore val
-; GCN: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload
+; VMEM: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RELOAD_VAL]]
@@ -120,6 +123,7 @@ endif:
; GCN: buffer_store_dword v[[VAL_LOOP_RELOAD]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill
; GCN: [[END]]:
+; VGPR: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]]
@@ -129,7 +133,8 @@ endif:
; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1
; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]]
-; GCN: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload
+
+; VMEM: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[VAL_END]]
@@ -189,6 +194,7 @@ end:
; GCN-NEXT: s_branch [[ELSE:.LBB[0-9]+_[0-9]+]]
; GCN: [[FLOW]]: ; %Flow
+; VGPR: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload
; VGPR: buffer_load_dword [[SPILL_VGPR:v[0-9]+]], off, s[0:3], 0 ; 4-byte Folded Reload
; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]]
@@ -200,7 +206,7 @@ end:
; GCN: s_or_saveexec_b64 s[[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC:[0-9]+]]:[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC:[0-9]+]]], s[[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]]
-; GCN: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload
+; VMEM: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload
; Regular spill value restored after exec modification
; Followed by spill
@@ -234,6 +240,7 @@ end:
; GCN-NEXT: s_branch [[FLOW]]
; GCN: [[ENDIF]]:
+; VGPR: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_LO_LANE]]
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_HI_LANE]]
@@ -245,7 +252,7 @@ end:
; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]]
-; GCN: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload
+; VMEM: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RESULT]]
define amdgpu_kernel void @divergent_if_else_endif(ptr addrspace(1) %out) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
index d5cdf584a75de9..a14d515688a8b8 100644
--- a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
+++ b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
@@ -14,10 +14,10 @@ body: |
; CHECK-LABEL: name: def_csr_sgpr
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr42, $sgpr43, $sgpr46, $sgpr47, $vgpr0
+ ; CHECK-NEXT: liveins: $sgpr42, $sgpr43, $sgpr46, $sgpr47
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr42, 0, $vgpr0
; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr43, 1, $vgpr0
@@ -26,8 +26,6 @@ body: |
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: liveins: $vgpr0
- ; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sgpr42 = S_MOV_B32 0
; CHECK-NEXT: $sgpr43 = S_MOV_B32 1
; CHECK-NEXT: $sgpr46_sgpr47 = S_MOV_B64 2
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index b541be9f5aa444..6686742e449f5c 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -220,334 +220,327 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0: ; %bb.0: ; %_udiv-special-cases
; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
-; GFX9-O0-NEXT: v_mov_b32_e32 v21, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v4
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v2
-; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v20, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0
+; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v22, v7
+; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
+; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT: v_mov_b32_e32 v21, v0
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v5
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v2
+; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v3
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
+; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v10
-; GFX9-O0-NEXT: v_mov_b32_e32 v20, v11
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v9
+; GFX9-O0-NEXT: v_mov_b32_e32 v19, v10
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_writelane_b32 v0, s6, 0
-; GFX9-O0-NEXT: v_writelane_b32 v0, s7, 1
+; GFX9-O0-NEXT: ; implicit-def: $vgpr30 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 0
+; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 1
; GFX9-O0-NEXT: s_mov_b32 s10, s6
-; GFX9-O0-NEXT: v_writelane_b32 v0, s10, 2
+; GFX9-O0-NEXT: v_writelane_b32 v30, s10, 2
; GFX9-O0-NEXT: s_mov_b32 s11, s7
-; GFX9-O0-NEXT: v_writelane_b32 v0, s11, 3
-; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, s10, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s11
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v1, v3, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v14, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s11
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v20, vcc
+; GFX9-O0-NEXT: v_writelane_b32 v30, s11, 3
+; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, s10, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, s11
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v3, vcc, v0, v2, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, s10
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v0, v13, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, s11
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v19, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v4
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v7
-; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7]
-; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[10:11], s[4:5]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[4:5]
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v3
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6
+; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7]
+; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[9:10], s[4:5]
; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5]
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5]
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v16, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v4
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v20, v1, s[4:5]
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v14, v1, s[4:5]
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v5
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v19, v0, s[4:5]
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v13, v0, s[4:5]
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v8
-; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 killed $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v21
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v22
-; GFX9-O0-NEXT: v_sub_co_u32_e32 v18, vcc, s10, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, s11
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v8, v9, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, s10
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v8, v13, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, s11
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v8, v15, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v20
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v21
+; GFX9-O0-NEXT: v_sub_co_u32_e32 v17, vcc, s10, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, s11
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v9, vcc, v7, v8, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, s10
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v7, v12, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, s11
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v14, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v10
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v19
-; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7]
-; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[21:22], s[4:5]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, v9, v10, s[4:5]
+; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v18, v9
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5]
+; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7]
+; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[20:21], s[4:5]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, v8, v9, s[4:5]
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v8, s[4:5]
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v10
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v18, v9
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v12
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, v15, v8, s[4:5]
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v11
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v13, v8, s[4:5]
+; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v14, v7, s[4:5]
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v12, v7, s[4:5]
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v7
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v9
-; GFX9-O0-NEXT: v_xor_b32_e64 v15, v15, v20
-; GFX9-O0-NEXT: v_xor_b32_e64 v13, v13, v14
-; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15
+; GFX9-O0-NEXT: v_xor_b32_e64 v14, v14, v19
+; GFX9-O0-NEXT: v_xor_b32_e64 v12, v12, v13
+; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v14
; GFX9-O0-NEXT: s_mov_b32 s4, 63
-; GFX9-O0-NEXT: v_ashrrev_i64 v[13:14], s4, v[13:14]
-; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_ashrrev_i64 v[12:13], s4, v[12:13]
+; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v12
+; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11
-; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v10
+; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v17
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v19
-; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v6
+; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v5
-; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v4
+; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v15
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17
-; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v12
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v19
-; GFX9-O0-NEXT: v_or_b32_e64 v15, v13, v14
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v11
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18
-; GFX9-O0-NEXT: v_or_b32_e64 v13, v13, v14
-; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15
-; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[13:14], s[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17
-; GFX9-O0-NEXT: v_or_b32_e64 v15, v13, v14
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16
-; GFX9-O0-NEXT: v_or_b32_e64 v13, v13, v14
-; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15
-; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[13:14], s[6:7]
+; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v18
+; GFX9-O0-NEXT: v_or_b32_e64 v14, v12, v13
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v10
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v17
+; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v13
+; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v14
+; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[12:13], s[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v16
+; GFX9-O0-NEXT: v_or_b32_e64 v14, v12, v13
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v15
+; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v13
+; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v14
+; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[12:13], s[6:7]
; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[11:12], s[8:9]
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v8, v8
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[10:11], s[8:9]
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7
; GFX9-O0-NEXT: s_mov_b32 s13, 32
-; GFX9-O0-NEXT: v_add_u32_e64 v8, v8, s13
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v9, v9
-; GFX9-O0-NEXT: v_min_u32_e64 v8, v8, v9
+; GFX9-O0-NEXT: v_add_u32_e64 v7, v7, s13
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v8, v8
+; GFX9-O0-NEXT: v_min_u32_e64 v7, v7, v8
; GFX9-O0-NEXT: s_mov_b32 s12, 0
; GFX9-O0-NEXT: ; implicit-def: $sgpr14
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, s12
-; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v11
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v9
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7
-; GFX9-O0-NEXT: v_add_u32_e64 v7, v7, s13
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v10
-; GFX9-O0-NEXT: v_min_u32_e64 v13, v7, v10
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, s12
+; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v10
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6
+; GFX9-O0-NEXT: v_add_u32_e64 v6, v6, s13
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v9, v9
+; GFX9-O0-NEXT: v_min_u32_e64 v12, v6, v9
; GFX9-O0-NEXT: ; implicit-def: $sgpr14
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, s12
-; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, s12
+; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v6
; GFX9-O0-NEXT: s_mov_b64 s[14:15], 64
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v13
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v12
; GFX9-O0-NEXT: s_mov_b32 s16, s14
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v13
; GFX9-O0-NEXT: s_mov_b32 s18, s15
-; GFX9-O0-NEXT: v_add_co_u32_e64 v10, s[16:17], v10, s16
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, s18
-; GFX9-O0-NEXT: v_addc_co_u32_e64 v7, s[16:17], v7, v11, s[16:17]
-; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v12, s[8:9]
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v10
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, v8, v9, s[8:9]
+; GFX9-O0-NEXT: v_add_co_u32_e64 v9, s[16:17], v9, s16
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, s18
+; GFX9-O0-NEXT: v_addc_co_u32_e64 v6, s[16:17], v6, v10, s[16:17]
+; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v11, s[8:9]
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v9
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v7, v8, s[8:9]
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v7
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v6
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[5:6], s[8:9]
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[4:5], s[8:9]
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v0
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s13
; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v1
-; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s13
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v2
-; GFX9-O0-NEXT: v_min_u32_e64 v6, v5, v6
+; GFX9-O0-NEXT: v_min_u32_e64 v5, v4, v5
; GFX9-O0-NEXT: ; implicit-def: $sgpr16
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s12
-; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v3
-; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s13
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v11, v4
-; GFX9-O0-NEXT: v_min_u32_e64 v12, v5, v11
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s12
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v2
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s13
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v3
+; GFX9-O0-NEXT: v_min_u32_e64 v11, v4, v10
; GFX9-O0-NEXT: ; implicit-def: $sgpr13
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s12
-; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v12
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s12
+; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v11
; GFX9-O0-NEXT: s_mov_b32 s12, s14
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12
; GFX9-O0-NEXT: s_mov_b32 s14, s15
-; GFX9-O0-NEXT: v_add_co_u32_e64 v11, s[12:13], v11, s12
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, s14
-; GFX9-O0-NEXT: v_addc_co_u32_e64 v5, s[12:13], v5, v12, s[12:13]
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[8:9]
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[8:9]
+; GFX9-O0-NEXT: v_add_co_u32_e64 v10, s[12:13], v10, s12
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, s14
+; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[12:13], v4, v11, s[12:13]
+; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v11
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[8:9]
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[8:9]
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8
+; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10
-; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, v5, v8
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v9, vcc, v6, v7, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, s10
+; GFX9-O0-NEXT: v_sub_co_u32_e32 v4, vcc, v4, v7
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v5, v6, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v6, s10
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v6, v7, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, s11
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, s10
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v6, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v6, s11
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v6, v7, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
+; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
+; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
-; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15]
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9]
-; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7
-; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v7, 1
+; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
+; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
+; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, 1
; GFX9-O0-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9]
; GFX9-O0-NEXT: s_mov_b64 s[4:5], -1
; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
; GFX9-O0-NEXT: s_mov_b32 s14, s13
-; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14
+; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
-; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v9
-; GFX9-O0-NEXT: v_or_b32_e64 v7, v7, v10
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
-; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[5:6], s[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v2, v5, s[8:9]
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, s10
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[8:9]
+; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8
+; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v9
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7
+; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[8:9]
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[8:9]
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
-; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[8:9]
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s10
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[8:9]
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[8:9]
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9]
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4
; GFX9-O0-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 s[4:5], exec
-; GFX9-O0-NEXT: v_writelane_b32 v0, s4, 4
-; GFX9-O0-NEXT: v_writelane_b32 v0, s5, 5
+; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 4
+; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 5
; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
; GFX9-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
@@ -555,11 +548,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_branch .LBB0_8
; GFX9-O0-NEXT: .LBB0_1: ; %Flow
; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s4, v0, 6
-; GFX9-O0-NEXT: v_readlane_b32 s5, v0, 7
+; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 6
+; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 7
; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-O0-NEXT: ; %bb.2: ; %Flow
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
@@ -588,20 +581,19 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB0_5
; GFX9-O0-NEXT: .LBB0_3: ; %Flow2
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s4, v4, 4
-; GFX9-O0-NEXT: v_readlane_b32 s5, v4, 5
-; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 4
+; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 5
+; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: s_nop 0
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
@@ -648,13 +640,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB0_3
; GFX9-O0-NEXT: .LBB0_5: ; %Flow1
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s4, v8, 8
-; GFX9-O0-NEXT: v_readlane_b32 s5, v8, 9
-; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
@@ -663,9 +648,15 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 8
+; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 9
+; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: s_nop 0
; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
@@ -679,92 +670,87 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_branch .LBB0_4
; GFX9-O0-NEXT: .LBB0_6: ; %udiv-do-while
; GFX9-O0-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s6, v16, 10
-; GFX9-O0-NEXT: v_readlane_b32 s7, v16, 11
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 10
+; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 11
; GFX9-O0-NEXT: s_mov_b32 s4, 63
-; GFX9-O0-NEXT: s_waitcnt vmcnt(16)
-; GFX9-O0-NEXT: v_lshrrev_b64 v[29:30], s4, v[2:3]
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v30
+; GFX9-O0-NEXT: v_lshrrev_b64 v[28:29], s4, v[2:3]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v29
; GFX9-O0-NEXT: s_mov_b32 s5, 1
-; GFX9-O0-NEXT: v_lshlrev_b64 v[23:24], s5, v[23:24]
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v24
+; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], s5, v[22:23]
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23
; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v29
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23
-; GFX9-O0-NEXT: v_or_b32_e64 v23, v5, v10
-; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v24, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[29:30], s5, v[2:3]
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v22
+; GFX9-O0-NEXT: v_or_b32_e64 v22, v5, v10
+; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v23, v4
+; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[2:3]
; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], s4, v[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v30
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v29
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5
; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v29
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v28
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
; GFX9-O0-NEXT: v_or_b32_e64 v4, v3, v4
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2
; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s5, v[0:1]
-; GFX9-O0-NEXT: v_lshlrev_b64 v[29:30], s5, v[6:7]
+; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[6:7]
; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1]
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v30
-; GFX9-O0-NEXT: s_waitcnt vmcnt(10)
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v29
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v27
; GFX9-O0-NEXT: v_or3_b32 v6, v6, v7, v10
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, v29
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v27
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v28
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v26
; GFX9-O0-NEXT: v_or3_b32 v0, v0, v1, v7
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3
-; GFX9-O0-NEXT: s_waitcnt vmcnt(8)
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v26
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v25
; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v25
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v24
; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v24
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v22
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v15
; GFX9-O0-NEXT: v_sub_co_u32_e32 v13, vcc, v13, v6
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v10, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v4, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc
@@ -784,22 +770,22 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12
; GFX9-O0-NEXT: v_mov_b32_e32 v12, 0
; GFX9-O0-NEXT: v_mov_b32_e32 v13, 0
-; GFX9-O0-NEXT: v_mov_b32_e32 v23, v22
-; GFX9-O0-NEXT: v_and_b32_e64 v23, v7, v23
-; GFX9-O0-NEXT: v_and_b32_e64 v21, v11, v21
-; GFX9-O0-NEXT: ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v22, v23
-; GFX9-O0-NEXT: v_mov_b32_e32 v23, v20
-; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v23
-; GFX9-O0-NEXT: v_and_b32_e64 v23, v11, v19
-; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v24, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v20, v23
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v24
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v21
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22
-; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v20
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v19, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v22, v21
+; GFX9-O0-NEXT: v_and_b32_e64 v22, v7, v22
+; GFX9-O0-NEXT: v_and_b32_e64 v20, v11, v20
+; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v21, v22
+; GFX9-O0-NEXT: v_mov_b32_e32 v22, v19
+; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v22
+; GFX9-O0-NEXT: v_and_b32_e64 v22, v11, v18
+; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v23, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v19, v22
+; GFX9-O0-NEXT: v_mov_b32_e32 v18, v23
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21
+; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v19
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v18, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v11, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v7, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
@@ -815,66 +801,66 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1
; GFX9-O0-NEXT: s_mov_b32 s5, s8
; GFX9-O0-NEXT: s_mov_b32 s4, s9
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, s5
-; GFX9-O0-NEXT: v_add_co_u32_e32 v20, vcc, v11, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5
+; GFX9-O0-NEXT: v_add_co_u32_e32 v19, vcc, v11, v16
; GFX9-O0-NEXT: v_mov_b32_e32 v11, s4
; GFX9-O0-NEXT: v_addc_co_u32_e32 v9, vcc, v9, v11, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v17, vcc, v10, v11, vcc
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v10, v11, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v10, s4
; GFX9-O0-NEXT: v_addc_co_u32_e32 v8, vcc, v8, v10, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v21, v9
+; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v20, v9
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v8
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v20
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v21
-; GFX9-O0-NEXT: v_mov_b32_e32 v22, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v21
-; GFX9-O0-NEXT: v_or_b32_e64 v19, v19, v22
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v20
-; GFX9-O0-NEXT: v_or_b32_e64 v17, v17, v18
-; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v19
-; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[17:18], v[12:13]
+; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v19
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20
+; GFX9-O0-NEXT: v_mov_b32_e32 v21, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v18, v20
+; GFX9-O0-NEXT: v_or_b32_e64 v18, v18, v21
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v19
+; GFX9-O0-NEXT: v_or_b32_e64 v16, v16, v17
+; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v18
+; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13]
; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v3
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v2
+; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v0
-; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v0
+; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v15
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v14
-; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v15
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v14
+; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v13
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v12
-; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v13
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v12
+; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX9-O0-NEXT: v_writelane_b32 v16, s6, 6
-; GFX9-O0-NEXT: v_writelane_b32 v16, s7, 7
+; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 6
+; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 7
; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX9-O0-NEXT: v_writelane_b32 v16, s6, 10
-; GFX9-O0-NEXT: v_writelane_b32 v16, s7, 11
+; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 10
+; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 11
; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
@@ -912,52 +898,52 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
-; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
; GFX9-O0-NEXT: s_waitcnt vmcnt(10)
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[21:22]
+; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[20:21]
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7
; GFX9-O0-NEXT: s_mov_b32 s6, 64
; GFX9-O0-NEXT: v_sub_u32_e64 v12, s6, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[23:24], v12, v[19:20]
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v24
+; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], v12, v[18:19]
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v23
; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v12
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v23
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22
; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7
; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v4, s6
; GFX9-O0-NEXT: v_sub_u32_e64 v5, v4, s6
-; GFX9-O0-NEXT: v_lshrrev_b64 v[23:24], v5, v[19:20]
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v24
+; GFX9-O0-NEXT: v_lshrrev_b64 v[22:23], v5, v[18:19]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[4:5]
; GFX9-O0-NEXT: s_mov_b32 s6, 0
; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v4, s6
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v22
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v21
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[6:7]
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v23
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v22
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v20
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[6:7]
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[19:20]
+; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[18:19]
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v5
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0
; GFX9-O0-NEXT: s_mov_b32 s8, s7
@@ -976,12 +962,12 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1
; GFX9-O0-NEXT: s_mov_b32 s5, s8
; GFX9-O0-NEXT: s_mov_b32 s4, s9
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, s5
-; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, s4
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v17, vcc, v15, v17, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5
+; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, s4
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v15, v16, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5
; GFX9-O0-NEXT: v_addc_co_u32_e32 v14, vcc, v14, v15, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v15, s4
@@ -993,7 +979,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
@@ -1006,10 +992,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v14, s8
; GFX9-O0-NEXT: v_mov_b32_e32 v13, s7
; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6
-; GFX9-O0-NEXT: v_writelane_b32 v16, s4, 10
-; GFX9-O0-NEXT: v_writelane_b32 v16, s5, 11
+; GFX9-O0-NEXT: s_waitcnt vmcnt(4)
+; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 10
+; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 11
; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
@@ -1037,201 +1024,194 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB0_6
; GFX9-O0-NEXT: .LBB0_8: ; %udiv-bb1
+; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
-; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
; GFX9-O0-NEXT: s_mov_b32 s5, s6
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
+; GFX9-O0-NEXT: s_waitcnt vmcnt(2)
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0
; GFX9-O0-NEXT: s_mov_b32 s4, s7
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0
; GFX9-O0-NEXT: s_mov_b32 s8, s6
; GFX9-O0-NEXT: s_mov_b32 s9, s7
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5
-; GFX9-O0-NEXT: v_add_co_u32_e32 v9, vcc, v4, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s4
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v5, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v5, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s9
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5
+; GFX9-O0-NEXT: v_add_co_u32_e32 v8, vcc, v3, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s4
+; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v4, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v2
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b32 s4, 0x7f
-; GFX9-O0-NEXT: v_sub_u32_e64 v3, s4, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[5:6], v3, v[11:12]
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v6
+; GFX9-O0-NEXT: v_sub_u32_e64 v2, s4, v3
+; GFX9-O0-NEXT: v_lshlrev_b64 v[4:5], v2, v[10:11]
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5
; GFX9-O0-NEXT: s_mov_b32 s4, 64
-; GFX9-O0-NEXT: v_sub_u32_e64 v14, s4, v3
-; GFX9-O0-NEXT: v_lshrrev_b64 v[14:15], v14, v[7:8]
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v15
-; GFX9-O0-NEXT: v_or_b32_e64 v13, v13, v16
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v13
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v6
-; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v3, s4
-; GFX9-O0-NEXT: s_mov_b32 s10, 63
-; GFX9-O0-NEXT: v_sub_u32_e64 v4, s10, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[13:14], v4, v[7:8]
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v15, s[4:5]
-; GFX9-O0-NEXT: s_mov_b32 s10, 0
-; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v3, s10
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v15, s[10:11]
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT: v_sub_u32_e64 v13, s4, v2
+; GFX9-O0-NEXT: v_lshrrev_b64 v[13:14], v13, v[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v14
+; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v15
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5]
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[10:11]
+; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5
+; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v2, s4
+; GFX9-O0-NEXT: s_mov_b32 s10, 63
+; GFX9-O0-NEXT: v_sub_u32_e64 v3, s10, v3
+; GFX9-O0-NEXT: v_lshlrev_b64 v[12:13], v3, v[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[4:5]
+; GFX9-O0-NEXT: s_mov_b32 s10, 0
+; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v2, s10
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v11
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[10:11]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[10:11]
; GFX9-O0-NEXT: ; implicit-def: $sgpr10
; GFX9-O0-NEXT: ; implicit-def: $sgpr10
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[7:8], v3, v[7:8]
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5]
-; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v4, v7, s[4:5]
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-O0-NEXT: v_lshlrev_b64 v[6:7], v2, v[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, s9
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5]
+; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v3, v6, s[4:5]
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v3
-; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2
+; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10
-; GFX9-O0-NEXT: v_or_b32_e64 v3, v3, v4
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v9
-; GFX9-O0-NEXT: v_or_b32_e64 v1, v1, v2
-; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[1:2], s[6:7]
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9
+; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v8
+; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s6
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, s7
-; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, s8
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, s6
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s7
+; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 s[6:7], exec
; GFX9-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
; GFX9-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-O0-NEXT: v_writelane_b32 v0, s6, 8
-; GFX9-O0-NEXT: v_writelane_b32 v0, s7, 9
+; GFX9-O0-NEXT: s_waitcnt vmcnt(16)
+; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 8
+; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 9
; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-O0-NEXT: s_cbranch_execz .LBB0_5
; GFX9-O0-NEXT: s_branch .LBB0_7
; GFX9-O0-NEXT: .LBB0_9: ; %udiv-end
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6
; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v7
; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
-; GFX9-O0-NEXT: v_xor_b32_e64 v9, v6, v5
-; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
+; GFX9-O0-NEXT: v_xor_b32_e64 v8, v5, v4
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v3
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
-; GFX9-O0-NEXT: v_xor_b32_e64 v0, v0, v8
+; GFX9-O0-NEXT: v_xor_b32_e64 v0, v0, v7
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v10
-; GFX9-O0-NEXT: v_sub_co_u32_e32 v7, vcc, v7, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v9
+; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, v5, v7
; GFX9-O0-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v6, vcc
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v3, v5, vcc
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v1
+; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v0
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
; GFX9-O0-NEXT: s_mov_b32 s4, 32
-; GFX9-O0-NEXT: v_lshrrev_b64 v[7:8], s4, v[7:8]
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7
; GFX9-O0-NEXT: v_lshrrev_b64 v[5:6], s4, v[5:6]
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-O0-NEXT: ; kill: killed $vgpr4
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5
+; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s4, v[3:4]
+; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: s_setpc_b64 s[30:31]
@@ -1444,258 +1424,252 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0: ; %bb.0: ; %_udiv-special-cases
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-G-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-G-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v0
-; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v2
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v3
-; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v3
+; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v10
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v9
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v8
+; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v4
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v7
-; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14_vgpr15_vgpr16 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v3
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v2
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v1
+; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v4
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v6
+; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v7
; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0
; GFX9-G-O0-NEXT: s_mov_b64 s[12:13], 0x7f
-; GFX9-G-O0-NEXT: ; kill: def $vgpr1_vgpr2 killed $vgpr4_vgpr5 killed $exec
+; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr3_vgpr4 killed $exec
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v5
; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v7
-; GFX9-G-O0-NEXT: ; kill: def $vgpr3 killed $vgpr1 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v2
+; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v1
; GFX9-G-O0-NEXT: s_mov_b32 s6, 31
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s6
-; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v12, v3, v8
-; GFX9-G-O0-NEXT: ; kill: def $vgpr3 killed $vgpr1 killed $exec
-; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $vgpr1_vgpr2 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s6
+; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v11, v2, v7
+; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec
+; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
; GFX9-G-O0-NEXT: s_mov_b32 s6, 31
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s6
-; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v10, v1, v2
-; GFX9-G-O0-NEXT: ; kill: def $vgpr1_vgpr2 killed $vgpr13_vgpr14 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6
+; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v9, v0, v1
+; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr12_vgpr13 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v14
; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v15
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v16
-; GFX9-G-O0-NEXT: ; kill: def $vgpr3 killed $vgpr1 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v2
+; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v1
; GFX9-G-O0-NEXT: s_mov_b32 s6, 31
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s6
-; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v11, v3, v8
-; GFX9-G-O0-NEXT: ; kill: def $vgpr3 killed $vgpr1 killed $exec
-; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $vgpr1_vgpr2 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s6
+; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v10, v2, v7
+; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec
+; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
; GFX9-G-O0-NEXT: s_mov_b32 s6, 31
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s6
-; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v9, v1, v2
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6
+; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v8, v0, v1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v3
; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5
; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v7
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v1
; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v1, v12, v1
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v2, v12, v2
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v4, v10, v4
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v10, v3
-; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v1, s[6:7], v1, v12
-; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v2, s[6:7], v2, v12, s[6:7]
-; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v6, s[6:7], v4, v10, s[6:7]
-; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v3, s[6:7], v3, v10, s[6:7]
-; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v0, v11, v0
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v1, v11, v1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v4
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v5
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v9, v3
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v2, v9, v2
+; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v0, s[6:7], v0, v11
+; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v1, s[6:7], v1, v11, s[6:7]
+; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v5, s[6:7], v3, v9, s[6:7]
+; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v2, s[6:7], v2, v9, s[6:7]
+; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v12
; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v14
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v14
; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v15
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v16
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v7
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v8
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v8, v11, v5
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v5, v11, v4
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v7, v9, v7
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v4, v9, v4
-; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v8, s[6:7], v8, v11
-; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v5, s[6:7], v5, v11, s[6:7]
-; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v7, s[6:7], v7, v9, s[6:7]
-; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v4, s[6:7], v4, v9, s[6:7]
-; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v13, v11, v12
-; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v11, v11, v12
-; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v11, v9, v10
-; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v9, v9, v10
-; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v8, v7
-; GFX9-G-O0-NEXT: v_or_b32_e64 v11, v5, v4
-; GFX9-G-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4
-; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[6:7], v[9:10], v[11:12]
-; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v1, v6
-; GFX9-G-O0-NEXT: v_or_b32_e64 v11, v2, v3
-; GFX9-G-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4
-; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[9:10], v[11:12]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v7
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v7, v10, v4
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v4, v10, v3
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v12
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v13
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v6, v8, v6
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v8, v3
+; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v7, s[6:7], v7, v10
+; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v4, s[6:7], v4, v10, s[6:7]
+; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v6, s[6:7], v6, v8, s[6:7]
+; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v3, s[6:7], v3, v8, s[6:7]
+; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v12, v10, v11
+; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v10, v10, v11
+; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v10, v8, v9
+; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v8, v8, v9
+; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_or_b32_e64 v8, v7, v6
+; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v4, v3
+; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4
+; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[6:7], v[8:9], v[10:11]
+; GFX9-G-O0-NEXT: v_or_b32_e64 v8, v0, v5
+; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v1, v2
+; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4
+; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], v[10:11]
; GFX9-G-O0-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v7
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v4
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4
-; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[9:10], v[11:12]
-; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v5, v5
-; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v8, v8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, 32
-; GFX9-G-O0-NEXT: v_add_u32_e64 v8, v8, v9
-; GFX9-G-O0-NEXT: v_min_u32_e64 v5, v5, v8
-; GFX9-G-O0-NEXT: s_mov_b32 s10, 64
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s10
-; GFX9-G-O0-NEXT: v_add_u32_e64 v5, v5, v8
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v3
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4
+; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], v[10:11]
; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v4
; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v7, v7
; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, 32
; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v7, v8
; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v7
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[8:9]
+; GFX9-G-O0-NEXT: s_mov_b32 s10, 64
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s10
+; GFX9-G-O0-NEXT: v_add_u32_e64 v4, v4, v7
+; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v3, v3
+; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 32
+; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v6, v7
+; GFX9-G-O0-NEXT: v_min_u32_e64 v3, v3, v6
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[8:9]
; GFX9-G-O0-NEXT: s_mov_b32 s16, 0
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v3
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s4
-; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], v[9:10]
-; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v5, v2
-; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v7, v1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v2
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s4
+; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[6:7], v[8:9]
+; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v1
+; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 32
+; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v6, v7
+; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s10
+; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v4, v6
+; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v2
+; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v7, v5
; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, 32
; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v7, v8
-; GFX9-G-O0-NEXT: v_min_u32_e64 v5, v5, v7
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s10
-; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v5, v7
-; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v5, v3
-; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v8, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, 32
-; GFX9-G-O0-NEXT: v_add_u32_e64 v8, v8, v9
-; GFX9-G-O0-NEXT: v_min_u32_e64 v5, v5, v8
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[8:9]
+; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v7
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[8:9]
; GFX9-G-O0-NEXT: s_mov_b32 s15, 0
; GFX9-G-O0-NEXT: s_mov_b32 s11, 0
; GFX9-G-O0-NEXT: s_mov_b32 s14, 0
; GFX9-G-O0-NEXT: s_mov_b32 s10, 0
-; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v7, s[8:9], v4, v5
-; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v6, s[8:9], v3, v4
+; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s16
; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s16
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s16
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v4, s[8:9], v4, v5, s[8:9]
-; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s15
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s14
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v9, s[8:9], v5, v8, s[8:9]
-; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s11
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s10
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v5, v8, s[8:9]
-; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v7
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v4
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v9
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s4
-; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[10:11], v[12:13], v[14:15]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s4
-; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[12:13], v[14:15]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s12
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s13
-; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[12:13], v[10:11], v[12:13]
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v3, s[8:9], v3, v4, s[8:9]
+; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s15
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s14
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v4, v7, s[8:9]
+; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s11
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s10
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v7, s[8:9], v4, v7, s[8:9]
+; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v3
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v8
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v7
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s4
+; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[10:11], v[11:12], v[13:14]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s4
+; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[11:12], v[13:14]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s12
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s13
+; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[12:13], v[9:10], v[11:12]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, 1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v4, v9, s[12:13]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, 1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v10, v5, v10, s[12:13]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, 1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[10:11]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v10, v5, v10, s[8:9]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, 1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[6:7]
-; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v5, v10
-; GFX9-G-O0-NEXT: s_mov_b32 s7, 0x7f
-; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v7, v7, s7
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v4, v4, s6
-; GFX9-G-O0-NEXT: v_or_b32_e64 v7, v7, v9
-; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v8
-; GFX9-G-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v4
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s4
-; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[7:8], v[9:10]
-; GFX9-G-O0-NEXT: v_and_b32_e32 v4, 1, v5
-; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[6:7]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v2, v4, s[6:7]
-; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4
-; GFX9-G-O0-NEXT: v_and_b32_e32 v4, 1, v5
-; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 0
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v4, v9, s[8:9]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, 1
; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[6:7]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[6:7]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[6:7]
+; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v9
+; GFX9-G-O0-NEXT: s_mov_b32 s7, 0x7f
+; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v6, v6, s7
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v3, s6
+; GFX9-G-O0-NEXT: v_or_b32_e64 v6, v6, v8
+; GFX9-G-O0-NEXT: v_or_b32_e64 v3, v3, v7
; GFX9-G-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v3
-; GFX9-G-O0-NEXT: ; kill: def $vgpr1_vgpr2 killed $vgpr1_vgpr2 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v7
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s4
+; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[6:7], v[8:9]
+; GFX9-G-O0-NEXT: v_and_b32_e32 v3, 1, v4
+; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v3
; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v5, v6
-; GFX9-G-O0-NEXT: v_and_b32_e32 v5, 1, v5
-; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[6:7]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v1, v3, s[6:7]
+; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v3
+; GFX9-G-O0-NEXT: v_and_b32_e32 v3, 1, v4
+; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v3
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[6:7]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[6:7]
+; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v2
+; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5]
+; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v5
+; GFX9-G-O0-NEXT: v_and_b32_e32 v4, 1, v4
+; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4
; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], -1
; GFX9-G-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], exec
-; GFX9-G-O0-NEXT: v_writelane_b32 v0, s4, 0
-; GFX9-G-O0-NEXT: v_writelane_b32 v0, s5, 1
+; GFX9-G-O0-NEXT: ; implicit-def: $vgpr34 : SGPR spill to VGPR lane
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s4, 0
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s5, 1
; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-G-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5]
@@ -1703,11 +1677,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: s_branch .LBB0_8
; GFX9-G-O0-NEXT: .LBB0_1: ; %Flow
; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT: v_readlane_b32 s4, v0, 2
-; GFX9-G-O0-NEXT: v_readlane_b32 s5, v0, 3
+; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 2
+; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 3
; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-G-O0-NEXT: ; %bb.2: ; %Flow
; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
@@ -1736,24 +1710,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_branch .LBB0_5
; GFX9-G-O0-NEXT: .LBB0_3: ; %Flow2
-; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT: v_readlane_b32 s4, v4, 0
-; GFX9-G-O0-NEXT: v_readlane_b32 s5, v4, 1
-; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
+; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
+; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 0
+; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 1
+; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
+; GFX9-G-O0-NEXT: s_nop 0
; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_branch .LBB0_9
; GFX9-G-O0-NEXT: .LBB0_4: ; %udiv-loop-exit
@@ -1813,13 +1784,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_branch .LBB0_3
; GFX9-G-O0-NEXT: .LBB0_5: ; %Flow1
-; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT: v_readlane_b32 s4, v8, 4
-; GFX9-G-O0-NEXT: v_readlane_b32 s5, v8, 5
-; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
@@ -1828,13 +1792,17 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
+; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
+; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 4
+; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 5
+; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
+; GFX9-G-O0-NEXT: s_nop 0
; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
@@ -1844,41 +1812,39 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: s_branch .LBB0_4
; GFX9-G-O0-NEXT: .LBB0_6: ; %udiv-do-while
; GFX9-G-O0-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT: v_readlane_b32 s6, v16, 6
-; GFX9-G-O0-NEXT: v_readlane_b32 s7, v16, 7
-; GFX9-G-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
+; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-G-O0-NEXT: v_readlane_b32 s6, v34, 6
+; GFX9-G-O0-NEXT: v_readlane_b32 s7, v34, 7
; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(18)
; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v2
; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v3
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(16)
; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5
; GFX9-G-O0-NEXT: s_mov_b32 s8, 1
@@ -1897,9 +1863,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v5
; GFX9-G-O0-NEXT: v_or_b32_e64 v7, v2, v3
; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v0, v1
-; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr23_vgpr24 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v25
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v26
+; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr22_vgpr23 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v24
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v25
; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec
; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
; GFX9-G-O0-NEXT: s_mov_b32 s9, 31
@@ -1911,47 +1877,44 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v15
; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v2, v3
; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v0, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v23
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v24
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v25
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v26
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v22
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v23
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v24
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v25
; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8
-; GFX9-G-O0-NEXT: v_lshlrev_b64 v[27:28], v0, v[2:3]
+; GFX9-G-O0-NEXT: v_lshlrev_b64 v[26:27], v0, v[2:3]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8
; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v0, v[14:15]
; GFX9-G-O0-NEXT: ; kill: def $vgpr14 killed $vgpr2 killed $exec
; GFX9-G-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr2_vgpr3 killed $exec
; GFX9-G-O0-NEXT: s_mov_b32 s8, 31
; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8
-; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v23, v2, v3
+; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v22, v2, v3
; GFX9-G-O0-NEXT: s_mov_b32 s8, 0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s8
; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(10)
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v28, v30
; GFX9-G-O0-NEXT: v_mov_b32_e32 v29, v31
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v30, v32
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(8)
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, v32
; GFX9-G-O0-NEXT: v_mov_b32_e32 v25, v33
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v26, v34
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v29
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v30
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, v27
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v28
-; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v24
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v28
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v29
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v26
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v27
+; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v23
; GFX9-G-O0-NEXT: v_or_b32_e64 v15, v1, v15
; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v15
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, v25
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v26
-; GFX9-G-O0-NEXT: v_or3_b32 v14, v14, v23, v24
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v24
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v25
+; GFX9-G-O0-NEXT: v_or3_b32 v14, v14, v22, v23
; GFX9-G-O0-NEXT: v_or3_b32 v2, v2, v3, v15
; GFX9-G-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v2
; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v14
; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v15
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v13, s[8:9], v13, v4
; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v12, s[8:9], v12, v9, s[8:9]
; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v10, v7, s[8:9]
@@ -1968,15 +1931,15 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: v_and_b32_e64 v14, v10, s8
; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v14
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, s4
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, s4
; GFX9-G-O0-NEXT: ; kill: def $vgpr12_vgpr13 killed $vgpr12_vgpr13 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v23
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v24
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v22
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v23
; GFX9-G-O0-NEXT: v_and_b32_e64 v11, v10, v11
-; GFX9-G-O0-NEXT: v_and_b32_e64 v10, v10, v22
+; GFX9-G-O0-NEXT: v_and_b32_e64 v10, v10, v21
; GFX9-G-O0-NEXT: v_and_b32_e64 v8, v6, v8
-; GFX9-G-O0-NEXT: v_and_b32_e64 v6, v6, v21
+; GFX9-G-O0-NEXT: v_and_b32_e64 v6, v6, v20
; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v4, s[8:9], v4, v11
; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v9, v10, s[8:9]
; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v9, s[8:9], v7, v8, s[8:9]
@@ -1985,60 +1948,60 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v10
; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v9
; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v17
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v18
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v19
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v20
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v16
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v17
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v18
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v19
; GFX9-G-O0-NEXT: s_mov_b32 s8, -1
; GFX9-G-O0-NEXT: s_mov_b32 s12, -1
; GFX9-G-O0-NEXT: s_mov_b32 s11, -1
; GFX9-G-O0-NEXT: s_mov_b32 s10, -1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, s8
-; GFX9-G-O0-NEXT: v_add_co_u32_e64 v17, s[8:9], v11, v17
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s8
+; GFX9-G-O0-NEXT: v_add_co_u32_e64 v16, s[8:9], v11, v16
; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s12
-; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v18, s[8:9], v10, v11, s[8:9]
+; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v17, s[8:9], v10, v11, s[8:9]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s11
-; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v20, s[8:9], v9, v10, s[8:9]
+; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v19, s[8:9], v9, v10, s[8:9]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s10
-; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v19, s[8:9], v8, v9, s[8:9]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v17
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v18
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v20
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v19
-; GFX9-G-O0-NEXT: v_or_b32_e64 v17, v17, v20
-; GFX9-G-O0-NEXT: v_or_b32_e64 v19, v18, v19
-; GFX9-G-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v19
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, s4
-; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[17:18], v[19:20]
+; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v18, s[8:9], v8, v9, s[8:9]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v17
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v19
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v18
+; GFX9-G-O0-NEXT: v_or_b32_e64 v16, v16, v19
+; GFX9-G-O0-NEXT: v_or_b32_e64 v18, v17, v18
+; GFX9-G-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v18
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, s4
+; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[18:19]
; GFX9-G-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v3
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v2
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v0
-; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v3
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v2
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v0
+; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v15
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v14
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v12
-; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v15
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v14
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v13
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v12
+; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX9-G-O0-NEXT: v_writelane_b32 v16, s6, 2
-; GFX9-G-O0-NEXT: v_writelane_b32 v16, s7, 3
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 2
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 3
; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX9-G-O0-NEXT: v_writelane_b32 v16, s6, 6
-; GFX9-G-O0-NEXT: v_writelane_b32 v16, s7, 7
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 6
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 7
; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
@@ -2072,87 +2035,88 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_mov_b32 s4, 64
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v18
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, v17
-; GFX9-G-O0-NEXT: ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1)
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v4
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4)
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v17
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v16
+; GFX9-G-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2)
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s4
-; GFX9-G-O0-NEXT: v_sub_u32_e64 v4, v19, v4
+; GFX9-G-O0-NEXT: v_sub_u32_e64 v4, v18, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s4
-; GFX9-G-O0-NEXT: v_sub_u32_e64 v5, v5, v19
+; GFX9-G-O0-NEXT: v_sub_u32_e64 v5, v5, v18
; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4
-; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v19, v6
+; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v18, v6
; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s6
-; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v19, v6
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT: v_lshrrev_b64 v[6:7], v19, v[21:22]
-; GFX9-G-O0-NEXT: v_lshrrev_b64 v[26:27], v19, v[23:24]
-; GFX9-G-O0-NEXT: v_lshlrev_b64 v[24:25], v5, v[21:22]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v26
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v27
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v24
+; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v18, v6
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-G-O0-NEXT: v_lshrrev_b64 v[6:7], v18, v[20:21]
+; GFX9-G-O0-NEXT: v_lshrrev_b64 v[25:26], v18, v[22:23]
+; GFX9-G-O0-NEXT: v_lshlrev_b64 v[23:24], v5, v[20:21]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v25
-; GFX9-G-O0-NEXT: v_or_b32_e64 v20, v20, v23
-; GFX9-G-O0-NEXT: v_or_b32_e64 v19, v5, v19
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v26
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v23
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v24
+; GFX9-G-O0-NEXT: v_or_b32_e64 v19, v19, v22
+; GFX9-G-O0-NEXT: v_or_b32_e64 v18, v5, v18
; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], 0
-; GFX9-G-O0-NEXT: v_lshrrev_b64 v[21:22], v4, v[21:22]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v21
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v22
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v20, s[4:5]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v19, s[4:5]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v18, s[6:7]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v17, v5, v17, s[6:7]
+; GFX9-G-O0-NEXT: v_lshrrev_b64 v[20:21], v4, v[20:21]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v20
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v21
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v19, s[4:5]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v18, s[4:5]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v17, s[6:7]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v16, v5, v16, s[6:7]
; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v17
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v16
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v6
; GFX9-G-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, 0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, 0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v17, v17, v18, s[4:5]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[4:5]
; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-G-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v6
+; GFX9-G-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v6
; GFX9-G-O0-NEXT: ; kill: def $vgpr4_vgpr5 killed $vgpr4_vgpr5 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v17
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v18
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v16
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v17
; GFX9-G-O0-NEXT: s_mov_b32 s4, -1
; GFX9-G-O0-NEXT: s_mov_b32 s10, -1
; GFX9-G-O0-NEXT: s_mov_b32 s7, -1
; GFX9-G-O0-NEXT: s_mov_b32 s6, -1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, s4
-; GFX9-G-O0-NEXT: v_add_co_u32_e64 v16, s[4:5], v16, v17
-; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s10
-; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v15, s[4:5], v15, v16, s[4:5]
-; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s7
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s4
+; GFX9-G-O0-NEXT: v_add_co_u32_e64 v15, s[4:5], v15, v16
+; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s10
; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v14, s[4:5], v14, v15, s[4:5]
-; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s6
+; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s7
; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v13, s[4:5], v13, v14, s[4:5]
-; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s6
+; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v12, s[4:5], v12, v13, s[4:5]
+; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], s[8:9]
; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[8:9]
-; GFX9-G-O0-NEXT: v_writelane_b32 v12, s8, 6
-; GFX9-G-O0-NEXT: v_writelane_b32 v12, s9, 7
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4)
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s8, 6
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s9, 7
; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s7
; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s6
@@ -2180,165 +2144,157 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_branch .LBB0_6
; GFX9-G-O0-NEXT: .LBB0_8: ; %udiv-bb1
+; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0
; GFX9-G-O0-NEXT: s_mov_b32 s6, 1
; GFX9-G-O0-NEXT: s_mov_b32 s10, 0
; GFX9-G-O0-NEXT: s_mov_b32 s9, 0
; GFX9-G-O0-NEXT: s_mov_b32 s8, 0
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s6
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
-; GFX9-G-O0-NEXT: v_add_co_u32_e64 v5, s[6:7], v3, v5
-; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s10
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1)
-; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v6, v8, s[6:7]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s9
-; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v8, s[6:7], v7, v8, s[6:7]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s8
-; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v2, v7, s[6:7]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s6
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4)
+; GFX9-G-O0-NEXT: v_add_co_u32_e64 v4, s[6:7], v2, v4
+; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s10
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2)
+; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v5, s[6:7], v5, v7, s[6:7]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s9
+; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v6, v7, s[6:7]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s8
+; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v1, v6, s[6:7]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v7
-; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v7
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v6
+; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b32 s6, 0x7f
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s6
-; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v9, s[6:7], v2, v3
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s6
+; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v8, s[6:7], v1, v2
; GFX9-G-O0-NEXT: s_mov_b32 s7, 64
-; GFX9-G-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v10
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v4
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s7
-; GFX9-G-O0-NEXT: v_sub_u32_e64 v3, v9, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s7
-; GFX9-G-O0-NEXT: v_sub_u32_e64 v15, v1, v9
+; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v9
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v3
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7
+; GFX9-G-O0-NEXT: v_sub_u32_e64 v2, v8, v0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7
+; GFX9-G-O0-NEXT: v_sub_u32_e64 v14, v0, v8
; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s7
-; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[8:9], v9, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s6
-; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v9, v1
-; GFX9-G-O0-NEXT: v_lshlrev_b64 v[1:2], v9, v[13:14]
-; GFX9-G-O0-NEXT: v_lshrrev_b64 v[18:19], v15, v[13:14]
-; GFX9-G-O0-NEXT: v_lshlrev_b64 v[16:17], v9, v[11:12]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v18
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v19
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v16
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7
+; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[8:9], v8, v0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6
+; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v8, v0
+; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v8, v[12:13]
+; GFX9-G-O0-NEXT: v_lshrrev_b64 v[17:18], v14, v[12:13]
+; GFX9-G-O0-NEXT: v_lshlrev_b64 v[15:16], v8, v[10:11]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v17
-; GFX9-G-O0-NEXT: v_or_b32_e64 v12, v12, v15
-; GFX9-G-O0-NEXT: v_or_b32_e64 v11, v9, v11
-; GFX9-G-O0-NEXT: v_lshlrev_b64 v[13:14], v3, v[13:14]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v2
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v18
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v15
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v16
+; GFX9-G-O0-NEXT: v_or_b32_e64 v11, v11, v14
+; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v8, v10
+; GFX9-G-O0-NEXT: v_lshlrev_b64 v[12:13], v2, v[12:13]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, 0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[8:9]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[8:9]
-; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v14
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v9, v12, s[8:9]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[8:9]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[6:7]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[6:7]
-; GFX9-G-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v3
-; GFX9-G-O0-NEXT: ; kill: def $vgpr1_vgpr2 killed $vgpr1_vgpr2 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[8:9]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[8:9]
+; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v12
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v13
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v8, v8, v11, s[8:9]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[8:9]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[6:7]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[6:7]
+; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v8
; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v9
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v10
-; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX9-G-O0-NEXT: s_mov_b64 s[10:11], s[4:5]
-; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v5, v8
-; GFX9-G-O0-NEXT: v_or_b32_e64 v7, v6, v7
-; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v7
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s4
-; GFX9-G-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[5:6], v[7:8]
-; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v7
+; GFX9-G-O0-NEXT: v_or_b32_e64 v6, v5, v6
+; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4
+; GFX9-G-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[4:5], v[6:7]
+; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s9
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s10
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s11
-; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s9
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s10
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s11
+; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], exec
; GFX9-G-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
; GFX9-G-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-G-O0-NEXT: v_writelane_b32 v0, s6, 4
-; GFX9-G-O0-NEXT: v_writelane_b32 v0, s7, 5
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(17)
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 4
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 5
; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-G-O0-NEXT: s_cbranch_execz .LBB0_5
; GFX9-G-O0-NEXT: s_branch .LBB0_7
; GFX9-G-O0-NEXT: .LBB0_9: ; %udiv-end
-; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2)
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v9
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v10
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v8
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v9
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v10
; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v11
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v12
; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v1
; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v0, v0, v8
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v1, v1, v7
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v9
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v10
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v2, v2, v6
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v3, v5
-; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v0, s[4:5], v0, v8
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v1, s[4:5], v1, v7, s[4:5]
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v2, s[4:5], v2, v6, s[4:5]
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v3, s[4:5], v3, v5, s[4:5]
-; GFX9-G-O0-NEXT: ; kill: killed $vgpr4
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v0, v0, v7
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v1, v1, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v8
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v9
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v2, v2, v5
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v3, v4
+; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v0, s[4:5], v0, v7
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v1, s[4:5], v1, v6, s[4:5]
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v2, s[4:5], v2, v5, s[4:5]
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v3, s[4:5], v3, v4, s[4:5]
; GFX9-G-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-G-O0-NEXT: s_setpc_b64 s[30:31]
@@ -2533,246 +2489,238 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0: ; %bb.0: ; %_udiv-special-cases
; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v0
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v0
+; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v2
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v3
+; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v3
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7
+; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v7
; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v10
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_waitcnt vmcnt(3)
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: s_waitcnt vmcnt(2)
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v12
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v2
-; GFX9-O0-NEXT: v_or_b32_e64 v3, v8, v7
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1
-; GFX9-O0-NEXT: v_or_b32_e64 v1, v5, v6
-; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1
+; GFX9-O0-NEXT: v_or_b32_e64 v2, v7, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0
+; GFX9-O0-NEXT: v_or_b32_e64 v0, v4, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0
-; GFX9-O0-NEXT: s_waitcnt vmcnt(8)
-; GFX9-O0-NEXT: v_writelane_b32 v0, s6, 0
-; GFX9-O0-NEXT: v_writelane_b32 v0, s7, 1
-; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[1:2], s[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v14
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
-; GFX9-O0-NEXT: v_or_b32_e64 v15, v4, v2
+; GFX9-O0-NEXT: ; implicit-def: $vgpr30 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 0
+; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 1
+; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[0:1], s[6:7]
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v13
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT: v_or_b32_e64 v9, v3, v1
-; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v15
-; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[9:10], s[6:7]
+; GFX9-O0-NEXT: v_or_b32_e64 v14, v3, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v12
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8
+; GFX9-O0-NEXT: v_or_b32_e64 v8, v2, v0
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v14
+; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v5
; GFX9-O0-NEXT: s_mov_b32 s9, 32
-; GFX9-O0-NEXT: v_add_u32_e64 v6, v6, s9
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7
-; GFX9-O0-NEXT: v_min_u32_e64 v6, v6, v7
+; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6
+; GFX9-O0-NEXT: v_min_u32_e64 v5, v5, v6
; GFX9-O0-NEXT: s_mov_b32 s8, 0
; GFX9-O0-NEXT: ; implicit-def: $sgpr10
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, s8
-; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v9
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v7
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v5
-; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v8, v8
-; GFX9-O0-NEXT: v_min_u32_e64 v15, v5, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, s8
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v6
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v4
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7
+; GFX9-O0-NEXT: v_min_u32_e64 v14, v4, v7
; GFX9-O0-NEXT: ; implicit-def: $sgpr10
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v4
; GFX9-O0-NEXT: s_mov_b64 s[10:11], 64
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v15
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14
; GFX9-O0-NEXT: s_mov_b32 s12, s10
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v15
; GFX9-O0-NEXT: s_mov_b32 s14, s11
-; GFX9-O0-NEXT: v_add_co_u32_e64 v8, s[12:13], v8, s12
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, s14
-; GFX9-O0-NEXT: v_addc_co_u32_e64 v5, s[12:13], v5, v9, s[12:13]
-; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
+; GFX9-O0-NEXT: v_add_co_u32_e64 v7, s[12:13], v7, s12
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, s14
+; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[12:13], v4, v8, s[12:13]
+; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8
; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[6:7]
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[12:13]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[12:13]
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, v6, v7, s[12:13]
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[10:11], s[12:13]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v9, s[12:13]
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v5, v6, s[12:13]
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
-; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v0
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9
; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v1
-; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v2
-; GFX9-O0-NEXT: v_min_u32_e64 v6, v5, v6
+; GFX9-O0-NEXT: v_min_u32_e64 v5, v4, v5
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v3
-; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v11, v4
-; GFX9-O0-NEXT: v_min_u32_e64 v15, v5, v11
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v2
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v3
+; GFX9-O0-NEXT: v_min_u32_e64 v14, v4, v10
; GFX9-O0-NEXT: ; implicit-def: $sgpr9
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v15
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v14
; GFX9-O0-NEXT: s_mov_b32 s8, s10
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v15
; GFX9-O0-NEXT: s_mov_b32 s10, s11
-; GFX9-O0-NEXT: v_add_co_u32_e64 v11, s[8:9], v11, s8
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, s10
-; GFX9-O0-NEXT: v_addc_co_u32_e64 v5, s[8:9], v5, v12, s[8:9]
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-O0-NEXT: v_add_co_u32_e64 v10, s[8:9], v10, s8
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, s10
+; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[8:9], v4, v11, s[8:9]
+; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v11
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[8:9]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[8:9]
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[8:9]
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[12:13], s[8:9]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[8:9]
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[8:9]
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8
+; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10
; GFX9-O0-NEXT: s_mov_b32 s10, s6
; GFX9-O0-NEXT: s_mov_b32 s11, s7
-; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, v5, v8
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v9, vcc, v6, v7, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, s10
+; GFX9-O0-NEXT: v_sub_co_u32_e32 v4, vcc, v4, v7
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v5, v6, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v6, s10
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v6, v7, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, s11
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, s10
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v6, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v6, s11
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v6, v7, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
+; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
+; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
-; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15]
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9]
-; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7
-; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v7, 1
+; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
+; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
+; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, 1
; GFX9-O0-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9]
; GFX9-O0-NEXT: s_mov_b64 s[4:5], -1
; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
; GFX9-O0-NEXT: s_mov_b32 s14, s13
-; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14
+; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
-; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v9
-; GFX9-O0-NEXT: v_or_b32_e64 v7, v7, v10
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
-; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[5:6], s[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v2, v5, s[8:9]
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, s10
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[8:9]
+; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8
+; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v9
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7
+; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[8:9]
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[8:9]
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
-; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[8:9]
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s10
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[8:9]
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[8:9]
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9]
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4
; GFX9-O0-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 s[4:5], exec
-; GFX9-O0-NEXT: v_writelane_b32 v0, s4, 2
-; GFX9-O0-NEXT: v_writelane_b32 v0, s5, 3
+; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 2
+; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 3
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
@@ -2780,11 +2728,11 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_branch .LBB1_8
; GFX9-O0-NEXT: .LBB1_1: ; %Flow
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s4, v0, 4
-; GFX9-O0-NEXT: v_readlane_b32 s5, v0, 5
+; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 4
+; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 5
; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-O0-NEXT: ; %bb.2: ; %Flow
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
@@ -2813,20 +2761,19 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB1_5
; GFX9-O0-NEXT: .LBB1_3: ; %Flow2
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s4, v4, 2
-; GFX9-O0-NEXT: v_readlane_b32 s5, v4, 3
-; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
+; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 2
+; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 3
+; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: s_nop 0
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
@@ -2873,13 +2820,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB1_3
; GFX9-O0-NEXT: .LBB1_5: ; %Flow1
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s4, v8, 6
-; GFX9-O0-NEXT: v_readlane_b32 s5, v8, 7
-; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
@@ -2888,9 +2828,15 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
+; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 6
+; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 7
+; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: s_nop 0
; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
@@ -2904,92 +2850,87 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_branch .LBB1_4
; GFX9-O0-NEXT: .LBB1_6: ; %udiv-do-while
; GFX9-O0-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s6, v16, 8
-; GFX9-O0-NEXT: v_readlane_b32 s7, v16, 9
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
+; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 8
+; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 9
; GFX9-O0-NEXT: s_mov_b32 s4, 63
-; GFX9-O0-NEXT: s_waitcnt vmcnt(16)
-; GFX9-O0-NEXT: v_lshrrev_b64 v[29:30], s4, v[2:3]
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v30
+; GFX9-O0-NEXT: v_lshrrev_b64 v[28:29], s4, v[2:3]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v29
; GFX9-O0-NEXT: s_mov_b32 s5, 1
-; GFX9-O0-NEXT: v_lshlrev_b64 v[23:24], s5, v[23:24]
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v24
+; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], s5, v[22:23]
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23
; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v29
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23
-; GFX9-O0-NEXT: v_or_b32_e64 v23, v5, v10
-; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v24, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[29:30], s5, v[2:3]
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v22
+; GFX9-O0-NEXT: v_or_b32_e64 v22, v5, v10
+; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v23, v4
+; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[2:3]
; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], s4, v[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v30
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v29
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5
; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v29
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v28
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
; GFX9-O0-NEXT: v_or_b32_e64 v4, v3, v4
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2
; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s5, v[0:1]
-; GFX9-O0-NEXT: v_lshlrev_b64 v[29:30], s5, v[6:7]
+; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[6:7]
; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1]
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v30
-; GFX9-O0-NEXT: s_waitcnt vmcnt(10)
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v29
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v27
; GFX9-O0-NEXT: v_or3_b32 v6, v6, v7, v10
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, v29
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v27
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v28
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v26
; GFX9-O0-NEXT: v_or3_b32 v0, v0, v1, v7
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3
-; GFX9-O0-NEXT: s_waitcnt vmcnt(8)
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v26
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v25
; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v25
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v24
; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v24
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v22
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v15
; GFX9-O0-NEXT: v_sub_co_u32_e32 v13, vcc, v13, v6
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v10, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v4, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc
@@ -3009,22 +2950,22 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12
; GFX9-O0-NEXT: v_mov_b32_e32 v12, 0
; GFX9-O0-NEXT: v_mov_b32_e32 v13, 0
-; GFX9-O0-NEXT: v_mov_b32_e32 v23, v22
-; GFX9-O0-NEXT: v_and_b32_e64 v23, v7, v23
-; GFX9-O0-NEXT: v_and_b32_e64 v21, v11, v21
-; GFX9-O0-NEXT: ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v22, v23
-; GFX9-O0-NEXT: v_mov_b32_e32 v23, v20
-; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v23
-; GFX9-O0-NEXT: v_and_b32_e64 v23, v11, v19
-; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v24, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v20, v23
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v24
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v21
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22
-; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v20
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v19, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v22, v21
+; GFX9-O0-NEXT: v_and_b32_e64 v22, v7, v22
+; GFX9-O0-NEXT: v_and_b32_e64 v20, v11, v20
+; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v21, v22
+; GFX9-O0-NEXT: v_mov_b32_e32 v22, v19
+; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v22
+; GFX9-O0-NEXT: v_and_b32_e64 v22, v11, v18
+; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v23, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v19, v22
+; GFX9-O0-NEXT: v_mov_b32_e32 v18, v23
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21
+; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v19
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v18, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v11, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v7, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
@@ -3040,66 +2981,66 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1
; GFX9-O0-NEXT: s_mov_b32 s5, s8
; GFX9-O0-NEXT: s_mov_b32 s4, s9
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, s5
-; GFX9-O0-NEXT: v_add_co_u32_e32 v20, vcc, v11, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5
+; GFX9-O0-NEXT: v_add_co_u32_e32 v19, vcc, v11, v16
; GFX9-O0-NEXT: v_mov_b32_e32 v11, s4
; GFX9-O0-NEXT: v_addc_co_u32_e32 v9, vcc, v9, v11, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v17, vcc, v10, v11, vcc
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v10, v11, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v10, s4
; GFX9-O0-NEXT: v_addc_co_u32_e32 v8, vcc, v8, v10, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v21, v9
+; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v20, v9
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v8
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v20
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v21
-; GFX9-O0-NEXT: v_mov_b32_e32 v22, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v21
-; GFX9-O0-NEXT: v_or_b32_e64 v19, v19, v22
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v20
-; GFX9-O0-NEXT: v_or_b32_e64 v17, v17, v18
-; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v19
-; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[17:18], v[12:13]
+; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v19
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20
+; GFX9-O0-NEXT: v_mov_b32_e32 v21, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v18, v20
+; GFX9-O0-NEXT: v_or_b32_e64 v18, v18, v21
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v19
+; GFX9-O0-NEXT: v_or_b32_e64 v16, v16, v17
+; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v18
+; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13]
; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v3
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v2
+; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v0
-; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v0
+; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v15
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v14
-; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v15
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v14
+; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v13
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v12
-; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v13
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v12
+; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX9-O0-NEXT: v_writelane_b32 v16, s6, 4
-; GFX9-O0-NEXT: v_writelane_b32 v16, s7, 5
+; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 4
+; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 5
; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX9-O0-NEXT: v_writelane_b32 v16, s6, 8
-; GFX9-O0-NEXT: v_writelane_b32 v16, s7, 9
+; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 8
+; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 9
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
@@ -3137,52 +3078,52 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-O0-NEXT: s_waitcnt vmcnt(10)
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[21:22]
+; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[20:21]
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7
; GFX9-O0-NEXT: s_mov_b32 s6, 64
; GFX9-O0-NEXT: v_sub_u32_e64 v12, s6, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[23:24], v12, v[19:20]
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v24
+; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], v12, v[18:19]
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v23
; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v12
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v23
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22
; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7
; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v4, s6
; GFX9-O0-NEXT: v_sub_u32_e64 v5, v4, s6
-; GFX9-O0-NEXT: v_lshrrev_b64 v[23:24], v5, v[19:20]
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v24
+; GFX9-O0-NEXT: v_lshrrev_b64 v[22:23], v5, v[18:19]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[4:5]
; GFX9-O0-NEXT: s_mov_b32 s6, 0
; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v4, s6
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v22
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v21
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[6:7]
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v23
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v22
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v20
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[6:7]
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[19:20]
+; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[18:19]
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v5
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0
; GFX9-O0-NEXT: s_mov_b32 s8, s7
@@ -3201,12 +3142,12 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1
; GFX9-O0-NEXT: s_mov_b32 s5, s8
; GFX9-O0-NEXT: s_mov_b32 s4, s9
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, s5
-; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, s4
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v17, vcc, v15, v17, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5
+; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, s4
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v15, v16, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5
; GFX9-O0-NEXT: v_addc_co_u32_e32 v14, vcc, v14, v15, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v15, s4
@@ -3218,7 +3159,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
@@ -3231,10 +3172,11 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v14, s8
; GFX9-O0-NEXT: v_mov_b32_e32 v13, s7
; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6
-; GFX9-O0-NEXT: v_writelane_b32 v16, s4, 8
-; GFX9-O0-NEXT: v_writelane_b32 v16, s5, 9
+; GFX9-O0-NEXT: s_waitcnt vmcnt(4)
+; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 8
+; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 9
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
@@ -3262,165 +3204,158 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB1_6
; GFX9-O0-NEXT: .LBB1_8: ; %udiv-bb1
+; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
; GFX9-O0-NEXT: s_mov_b32 s5, s6
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
+; GFX9-O0-NEXT: s_waitcnt vmcnt(2)
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0
; GFX9-O0-NEXT: s_mov_b32 s4, s7
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0
; GFX9-O0-NEXT: s_mov_b32 s8, s6
; GFX9-O0-NEXT: s_mov_b32 s9, s7
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5
-; GFX9-O0-NEXT: v_add_co_u32_e32 v9, vcc, v4, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s4
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v5, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v5, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s9
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5
+; GFX9-O0-NEXT: v_add_co_u32_e32 v8, vcc, v3, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s4
+; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v4, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v2
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b32 s4, 0x7f
-; GFX9-O0-NEXT: v_sub_u32_e64 v3, s4, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[5:6], v3, v[11:12]
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v6
+; GFX9-O0-NEXT: v_sub_u32_e64 v2, s4, v3
+; GFX9-O0-NEXT: v_lshlrev_b64 v[4:5], v2, v[10:11]
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5
; GFX9-O0-NEXT: s_mov_b32 s4, 64
-; GFX9-O0-NEXT: v_sub_u32_e64 v14, s4, v3
-; GFX9-O0-NEXT: v_lshrrev_b64 v[14:15], v14, v[7:8]
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v15
-; GFX9-O0-NEXT: v_or_b32_e64 v13, v13, v16
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v13
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v6
-; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v3, s4
+; GFX9-O0-NEXT: v_sub_u32_e64 v13, s4, v2
+; GFX9-O0-NEXT: v_lshrrev_b64 v[13:14], v13, v[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v14
+; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v15
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13
+; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5
+; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v2, s4
; GFX9-O0-NEXT: s_mov_b32 s10, 63
-; GFX9-O0-NEXT: v_sub_u32_e64 v4, s10, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[13:14], v4, v[7:8]
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v15, s[4:5]
+; GFX9-O0-NEXT: v_sub_u32_e64 v3, s10, v3
+; GFX9-O0-NEXT: v_lshlrev_b64 v[12:13], v3, v[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[4:5]
; GFX9-O0-NEXT: s_mov_b32 s10, 0
-; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v3, s10
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v15, s[10:11]
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5]
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[10:11]
+; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v2, s10
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v11
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[10:11]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[10:11]
; GFX9-O0-NEXT: ; implicit-def: $sgpr10
; GFX9-O0-NEXT: ; implicit-def: $sgpr10
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[7:8], v3, v[7:8]
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5]
-; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v4, v7, s[4:5]
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-O0-NEXT: v_lshlrev_b64 v[6:7], v2, v[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, s9
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5]
+; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v3, v6, s[4:5]
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v3
-; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2
+; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10
-; GFX9-O0-NEXT: v_or_b32_e64 v3, v3, v4
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v9
-; GFX9-O0-NEXT: v_or_b32_e64 v1, v1, v2
-; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[1:2], s[6:7]
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9
+; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v8
+; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s6
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, s7
-; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, s8
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, s6
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s7
+; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 s[6:7], exec
; GFX9-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
; GFX9-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-O0-NEXT: v_writelane_b32 v0, s6, 6
-; GFX9-O0-NEXT: v_writelane_b32 v0, s7, 7
+; GFX9-O0-NEXT: s_waitcnt vmcnt(16)
+; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 6
+; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 7
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-O0-NEXT: s_cbranch_execz .LBB1_5
; GFX9-O0-NEXT: s_branch .LBB1_7
; GFX9-O0-NEXT: .LBB1_9: ; %udiv-end
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b32 s4, 32
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[7:8]
+; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[6:7]
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-O0-NEXT: v_lshrrev_b64 v[2:3], s4, v[5:6]
+; GFX9-O0-NEXT: v_lshrrev_b64 v[2:3], s4, v[4:5]
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
-; GFX9-O0-NEXT: ; kill: killed $vgpr4
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v4
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: s_setpc_b64 s[30:31]
@@ -3610,83 +3545,94 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0: ; %bb.0: ; %_udiv-special-cases
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-G-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-G-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v0
-; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v2
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v3
-; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v4
-; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v3
+; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v10
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v9
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v8
+; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v5
-; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v7
-; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6_vgpr7_vgpr8 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v11
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v10
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v9
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v7
+; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v10
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v9
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8
+; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v6
; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v8
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v7
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v9
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v12
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v13
+; GFX9-G-O0-NEXT: v_or_b32_e64 v8, v8, v11
+; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v9, v10
+; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v14
-; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v9, v12
-; GFX9-G-O0-NEXT: v_or_b32_e64 v11, v10, v11
-; GFX9-G-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4
-; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[6:7], v[9:10], v[11:12]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v2
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4
+; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[6:7], v[8:9], v[10:11]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v4
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v3
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v2
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v9
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v12
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v13
+; GFX9-G-O0-NEXT: v_or_b32_e64 v8, v8, v11
+; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v9, v10
+; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v14
-; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v9, v12
-; GFX9-G-O0-NEXT: v_or_b32_e64 v11, v10, v11
-; GFX9-G-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4
-; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[9:10], v[11:12]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4
+; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], v[10:11]
; GFX9-G-O0-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v6
; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v8
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v7
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s4
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s5
+; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], v[4:5]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v9
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v10
+; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v4
+; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v5, v5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 32
+; GFX9-G-O0-NEXT: v_add_u32_e64 v5, v5, v6
+; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v5
+; GFX9-G-O0-NEXT: s_mov_b32 s10, 64
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s10
+; GFX9-G-O0-NEXT: v_add_u32_e64 v5, v4, v5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v7
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v8
+; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v4
+; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 32
+; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v6, v7
+; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v6
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[8:9]
+; GFX9-G-O0-NEXT: s_mov_b32 s14, 0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v3
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v2
; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s5
; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s4
; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], v[5:6]
@@ -3697,7 +3643,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 32
; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v6, v7
; GFX9-G-O0-NEXT: v_min_u32_e64 v5, v5, v6
-; GFX9-G-O0-NEXT: s_mov_b32 s10, 64
; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s10
; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v5, v6
; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8
@@ -3708,130 +3653,106 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v7, v8
; GFX9-G-O0-NEXT: v_min_u32_e64 v5, v5, v7
; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[8:9]
-; GFX9-G-O0-NEXT: s_mov_b32 s14, 0
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v2
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v4
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v3
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4
-; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[9:10], v[6:7]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v11
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v12
-; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v6
-; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v7, v7
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, 32
-; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v7, v8
-; GFX9-G-O0-NEXT: v_min_u32_e64 v6, v6, v7
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s10
-; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v6, v7
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v9
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v10
-; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v6
-; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v8, v8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, 32
-; GFX9-G-O0-NEXT: v_add_u32_e64 v8, v8, v9
-; GFX9-G-O0-NEXT: v_min_u32_e64 v6, v6, v8
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[8:9]
; GFX9-G-O0-NEXT: s_mov_b32 s13, 0
; GFX9-G-O0-NEXT: s_mov_b32 s11, 0
; GFX9-G-O0-NEXT: s_mov_b32 s12, 0
; GFX9-G-O0-NEXT: s_mov_b32 s10, 0
-; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v6, s[8:9], v5, v6
-; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s14
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s14
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v7, s[8:9], v5, v7, s[8:9]
-; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s12
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v9, s[8:9], v5, v8, s[8:9]
-; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s11
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s10
-; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v5, v8, s[8:9]
-; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v5, s[8:9], v4, v5
+; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s14
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s14
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v6, s[8:9], v4, v6, s[8:9]
+; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s13
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s12
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v4, v7, s[8:9]
+; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s11
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s10
+; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v7, s[8:9], v4, v7, s[8:9]
+; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 s[12:13], 0x7f
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v5
; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v7
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v9
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s4
-; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[10:11], v[12:13], v[14:15]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s4
-; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[12:13], v[14:15]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s12
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s13
-; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[12:13], v[10:11], v[12:13]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v8
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v7
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s4
+; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[10:11], v[11:12], v[13:14]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s4
+; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[11:12], v[13:14]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s12
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s13
+; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[12:13], v[9:10], v[11:12]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, 1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v4, v9, s[12:13]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, 1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v10, v5, v10, s[12:13]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, 1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[10:11]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v10, v5, v10, s[8:9]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, 1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[6:7]
-; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v5, v10
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v4, v9, s[8:9]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, 1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[6:7]
+; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v9
; GFX9-G-O0-NEXT: s_mov_b32 s7, 0x7f
; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v6, v6, s7
-; GFX9-G-O0-NEXT: v_xor_b32_e64 v7, v7, s6
-; GFX9-G-O0-NEXT: v_or_b32_e64 v6, v6, v9
-; GFX9-G-O0-NEXT: v_or_b32_e64 v8, v7, v8
-; GFX9-G-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s4
-; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[6:7], v[8:9]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v2
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v5, v5, s7
+; GFX9-G-O0-NEXT: v_xor_b32_e64 v6, v6, s6
+; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v5, v8
+; GFX9-G-O0-NEXT: v_or_b32_e64 v7, v6, v7
+; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v7
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s4
+; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[5:6], v[7:8]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v4
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v3
-; GFX9-G-O0-NEXT: v_and_b32_e32 v1, 1, v5
-; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v2
+; GFX9-G-O0-NEXT: v_and_b32_e32 v0, 1, v4
+; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v5
; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v7
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[6:7]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[6:7]
-; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-G-O0-NEXT: v_and_b32_e32 v3, 1, v5
-; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v3
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 0
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v9
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[6:7]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[6:7]
-; GFX9-G-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v3
-; GFX9-G-O0-NEXT: ; kill: def $vgpr1_vgpr2 killed $vgpr1_vgpr2 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v7
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 1
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[6:7]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[6:7]
+; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2
+; GFX9-G-O0-NEXT: v_and_b32_e32 v2, 1, v4
+; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v2
; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v5, v6
-; GFX9-G-O0-NEXT: v_and_b32_e32 v5, 1, v5
-; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v7
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v8
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[6:7]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[6:7]
+; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v2
+; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5]
+; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v5
+; GFX9-G-O0-NEXT: v_and_b32_e32 v4, 1, v4
+; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4
; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], -1
; GFX9-G-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], exec
-; GFX9-G-O0-NEXT: v_writelane_b32 v0, s4, 0
-; GFX9-G-O0-NEXT: v_writelane_b32 v0, s5, 1
+; GFX9-G-O0-NEXT: ; implicit-def: $vgpr34 : SGPR spill to VGPR lane
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s4, 0
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s5, 1
; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-G-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5]
@@ -3839,68 +3760,65 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: s_branch .LBB1_8
; GFX9-G-O0-NEXT: .LBB1_1: ; %Flow
; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT: v_readlane_b32 s4, v0, 2
-; GFX9-G-O0-NEXT: v_readlane_b32 s5, v0, 3
+; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 2
+; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 3
; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-G-O0-NEXT: ; %bb.2: ; %Flow
-; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
-; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
-; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
-; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
-; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
-; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
-; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
-; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7)
-; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_branch .LBB1_5
; GFX9-G-O0-NEXT: .LBB1_3: ; %Flow2
-; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT: v_readlane_b32 s4, v4, 0
-; GFX9-G-O0-NEXT: v_readlane_b32 s5, v4, 1
-; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
-; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
-; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
-; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
-; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
+; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 0
+; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 1
+; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: s_nop 0
+; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_branch .LBB1_9
; GFX9-G-O0-NEXT: .LBB1_4: ; %udiv-loop-exit
-; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2)
; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v5
@@ -3949,77 +3867,72 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_branch .LBB1_3
; GFX9-G-O0-NEXT: .LBB1_5: ; %Flow1
+; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT: v_readlane_b32 s4, v8, 4
-; GFX9-G-O0-NEXT: v_readlane_b32 s5, v8, 5
+; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 4
+; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 5
; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5]
-; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
-; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
-; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
-; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
-; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: s_nop 0
+; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_branch .LBB1_4
; GFX9-G-O0-NEXT: .LBB1_6: ; %udiv-do-while
; GFX9-G-O0-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT: v_readlane_b32 s6, v16, 6
-; GFX9-G-O0-NEXT: v_readlane_b32 s7, v16, 7
-; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
+; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-G-O0-NEXT: v_readlane_b32 s6, v34, 6
+; GFX9-G-O0-NEXT: v_readlane_b32 s7, v34, 7
; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(18)
; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v2
; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v3
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(16)
; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5
; GFX9-G-O0-NEXT: s_mov_b32 s8, 1
; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8
-; GFX9-G-O0-NEXT: v_lshlrev_b64 v[21:22], v2, v[0:1]
+; GFX9-G-O0-NEXT: v_lshlrev_b64 v[20:21], v2, v[0:1]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8
; GFX9-G-O0-NEXT: v_lshlrev_b64 v[4:5], v2, v[3:4]
; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec
@@ -4043,8 +3956,8 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v3, v0, v1
; GFX9-G-O0-NEXT: s_mov_b32 s9, 0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s9
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v21
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v22
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v20
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v21
; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v2, v3
; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v0, v1
; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v12
@@ -4052,7 +3965,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v14
; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v15
; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8
-; GFX9-G-O0-NEXT: v_lshlrev_b64 v[23:24], v0, v[2:3]
+; GFX9-G-O0-NEXT: v_lshlrev_b64 v[22:23], v0, v[2:3]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8
; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v0, v[12:13]
; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr2 killed $exec
@@ -4064,22 +3977,20 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s8
; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(10)
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v28, v30
; GFX9-G-O0-NEXT: v_mov_b32_e32 v29, v31
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v30, v32
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(8)
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v32
; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v33
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v34
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v29
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v30
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v23
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v24
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v28
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v29
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v22
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v23
; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v15
; GFX9-G-O0-NEXT: v_or_b32_e64 v13, v1, v13
; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v21
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v22
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v20
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v21
; GFX9-G-O0-NEXT: v_or3_b32 v12, v12, v14, v15
; GFX9-G-O0-NEXT: v_or3_b32 v2, v2, v3, v13
; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
@@ -4087,7 +3998,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v12
; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v13
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v11, s[8:9], v11, v4
; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v10, v9, s[8:9]
; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v8, v7, s[8:9]
@@ -4109,18 +4019,18 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: ; kill: def $vgpr12_vgpr13 killed $vgpr12_vgpr13 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec
; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v11
; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v10
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v24
; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v25
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, v26
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v26
; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v27
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v28
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v23
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v24
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v22
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v23
; GFX9-G-O0-NEXT: v_and_b32_e64 v11, v8, v11
; GFX9-G-O0-NEXT: v_and_b32_e64 v10, v8, v10
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v21
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v22
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v20
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v21
; GFX9-G-O0-NEXT: v_and_b32_e64 v8, v6, v8
-; GFX9-G-O0-NEXT: v_and_b32_e64 v6, v6, v21
+; GFX9-G-O0-NEXT: v_and_b32_e64 v6, v6, v20
; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v4, s[8:9], v4, v11
; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v9, v10, s[8:9]
; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v9, s[8:9], v7, v8, s[8:9]
@@ -4129,351 +4039,344 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v10
; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v9
; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v17
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v18
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v19
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v20
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v16
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v17
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v18
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v19
; GFX9-G-O0-NEXT: s_mov_b32 s8, -1
; GFX9-G-O0-NEXT: s_mov_b32 s12, -1
; GFX9-G-O0-NEXT: s_mov_b32 s11, -1
; GFX9-G-O0-NEXT: s_mov_b32 s10, -1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, s8
-; GFX9-G-O0-NEXT: v_add_co_u32_e64 v17, s[8:9], v11, v17
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s8
+; GFX9-G-O0-NEXT: v_add_co_u32_e64 v16, s[8:9], v11, v16
; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s12
-; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v18, s[8:9], v10, v11, s[8:9]
+; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v17, s[8:9], v10, v11, s[8:9]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s11
-; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v20, s[8:9], v9, v10, s[8:9]
+; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v19, s[8:9], v9, v10, s[8:9]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s10
-; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v19, s[8:9], v8, v9, s[8:9]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v17
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v18
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v20
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v19
-; GFX9-G-O0-NEXT: v_or_b32_e64 v17, v17, v20
-; GFX9-G-O0-NEXT: v_or_b32_e64 v19, v18, v19
-; GFX9-G-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v19
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, s4
-; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[17:18], v[19:20]
+; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v18, s[8:9], v8, v9, s[8:9]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v17
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v19
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v18
+; GFX9-G-O0-NEXT: v_or_b32_e64 v16, v16, v19
+; GFX9-G-O0-NEXT: v_or_b32_e64 v18, v17, v18
+; GFX9-G-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v18
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, s4
+; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[18:19]
; GFX9-G-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v3
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v2
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v0
-; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v3
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v2
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v0
+; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
+; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v15
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v14
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v12
-; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v15
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v14
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v13
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v12
+; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
+; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX9-G-O0-NEXT: v_writelane_b32 v16, s6, 2
-; GFX9-G-O0-NEXT: v_writelane_b32 v16, s7, 3
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 2
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 3
; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX9-G-O0-NEXT: v_writelane_b32 v16, s6, 6
-; GFX9-G-O0-NEXT: v_writelane_b32 v16, s7, 7
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 6
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 7
; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_andn2_b64 exec, exec, s[4:5]
; GFX9-G-O0-NEXT: s_cbranch_execnz .LBB1_6
; GFX9-G-O0-NEXT: s_branch .LBB1_1
; GFX9-G-O0-NEXT: .LBB1_7: ; %udiv-preheader
-; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_mov_b32 s4, 64
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2)
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v4
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v7
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v6
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v4
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v7
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v6
; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s4
-; GFX9-G-O0-NEXT: v_sub_u32_e64 v4, v13, v4
+; GFX9-G-O0-NEXT: v_sub_u32_e64 v4, v12, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s4
-; GFX9-G-O0-NEXT: v_sub_u32_e64 v5, v5, v13
+; GFX9-G-O0-NEXT: v_sub_u32_e64 v5, v5, v12
; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4
-; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v13, v6
+; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v12, v6
; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s6
-; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v13, v6
-; GFX9-G-O0-NEXT: v_lshrrev_b64 v[6:7], v13, v[21:22]
-; GFX9-G-O0-NEXT: v_lshrrev_b64 v[26:27], v13, v[15:16]
-; GFX9-G-O0-NEXT: v_lshlrev_b64 v[24:25], v5, v[21:22]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v26
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v27
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v24
+; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v12, v6
+; GFX9-G-O0-NEXT: v_lshrrev_b64 v[6:7], v12, v[20:21]
+; GFX9-G-O0-NEXT: v_lshrrev_b64 v[25:26], v12, v[14:15]
+; GFX9-G-O0-NEXT: v_lshlrev_b64 v[23:24], v5, v[20:21]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v25
-; GFX9-G-O0-NEXT: v_or_b32_e64 v14, v14, v23
-; GFX9-G-O0-NEXT: v_or_b32_e64 v13, v5, v13
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v26
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v23
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v24
+; GFX9-G-O0-NEXT: v_or_b32_e64 v13, v13, v22
+; GFX9-G-O0-NEXT: v_or_b32_e64 v12, v5, v12
; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], 0
-; GFX9-G-O0-NEXT: v_lshrrev_b64 v[21:22], v4, v[21:22]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v21
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v22
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v14, s[4:5]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[4:5]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v15
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v16
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v14, s[6:7]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v13, v5, v13, s[6:7]
+; GFX9-G-O0-NEXT: v_lshrrev_b64 v[20:21], v4, v[20:21]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v20
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v21
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v13, s[4:5]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[4:5]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v14
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v15
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v13, s[6:7]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v12, v5, v12, s[6:7]
; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v6
; GFX9-G-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, 0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, 0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[4:5]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v12, v12, v13, s[4:5]
; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-G-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v6
+; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v6
; GFX9-G-O0-NEXT: ; kill: def $vgpr4_vgpr5 killed $vgpr4_vgpr5 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v14
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v17
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v18
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v19
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v20
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v12
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v13
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v16
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v17
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v18
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v19
; GFX9-G-O0-NEXT: s_mov_b32 s4, -1
; GFX9-G-O0-NEXT: s_mov_b32 s10, -1
; GFX9-G-O0-NEXT: s_mov_b32 s7, -1
; GFX9-G-O0-NEXT: s_mov_b32 s6, -1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, s4
-; GFX9-G-O0-NEXT: v_add_co_u32_e64 v16, s[4:5], v16, v17
-; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s10
-; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v15, s[4:5], v15, v16, s[4:5]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s4
+; GFX9-G-O0-NEXT: v_add_co_u32_e64 v15, s[4:5], v15, v16
; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s7
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s10
; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v14, s[4:5], v14, v15, s[4:5]
; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s7
; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v13, s[4:5], v13, v14, s[4:5]
; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s6
+; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v12, s[4:5], v12, v13, s[4:5]
+; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], s[8:9]
; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[8:9]
-; GFX9-G-O0-NEXT: v_writelane_b32 v12, s8, 6
-; GFX9-G-O0-NEXT: v_writelane_b32 v12, s9, 7
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4)
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s8, 6
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s9, 7
; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s7
; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s6
; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s5
; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s4
-; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_branch .LBB1_6
; GFX9-G-O0-NEXT: .LBB1_8: ; %udiv-bb1
-; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
+; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
+; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0
; GFX9-G-O0-NEXT: s_mov_b32 s6, 1
; GFX9-G-O0-NEXT: s_mov_b32 s10, 0
; GFX9-G-O0-NEXT: s_mov_b32 s9, 0
; GFX9-G-O0-NEXT: s_mov_b32 s8, 0
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s6
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
-; GFX9-G-O0-NEXT: v_add_co_u32_e64 v5, s[6:7], v2, v5
-; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s10
-; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1)
-; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v4, v6, s[6:7]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s9
-; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v8, s[6:7], v3, v4, s[6:7]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s8
-; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v1, v3, s[6:7]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s6
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4)
+; GFX9-G-O0-NEXT: v_add_co_u32_e64 v4, s[6:7], v1, v4
+; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s10
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2)
+; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v5, s[6:7], v3, v5, s[6:7]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s9
+; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v2, v3, s[6:7]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8
+; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v0, v2, s[6:7]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v4
; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v6
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v7
-; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v7
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v6
+; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
+; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b32 s6, 0x7f
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s6
-; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v4, s[6:7], v1, v2
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6
+; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v3, s[6:7], v0, v1
; GFX9-G-O0-NEXT: s_mov_b32 s7, 64
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v10
; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v9
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s7
-; GFX9-G-O0-NEXT: v_sub_u32_e64 v3, v4, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s7
-; GFX9-G-O0-NEXT: v_sub_u32_e64 v9, v1, v4
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v8
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7
+; GFX9-G-O0-NEXT: v_sub_u32_e64 v2, v3, v0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7
+; GFX9-G-O0-NEXT: v_sub_u32_e64 v8, v0, v3
; GFX9-G-O0-NEXT: s_mov_b32 s6, 0
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s7
-; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[8:9], v4, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s6
-; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v4, v1
-; GFX9-G-O0-NEXT: v_lshlrev_b64 v[1:2], v4, v[13:14]
-; GFX9-G-O0-NEXT: v_lshrrev_b64 v[18:19], v9, v[13:14]
-; GFX9-G-O0-NEXT: v_lshlrev_b64 v[16:17], v4, v[11:12]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v18
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v19
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v16
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7
+; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[8:9], v3, v0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6
+; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v3, v0
+; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v3, v[12:13]
+; GFX9-G-O0-NEXT: v_lshrrev_b64 v[17:18], v8, v[12:13]
+; GFX9-G-O0-NEXT: v_lshlrev_b64 v[15:16], v3, v[10:11]
; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v17
-; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v10, v15
-; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v9
-; GFX9-G-O0-NEXT: v_lshlrev_b64 v[13:14], v3, v[13:14]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v1
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v2
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v18
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v15
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v9, v14
+; GFX9-G-O0-NEXT: v_or_b32_e64 v3, v3, v8
+; GFX9-G-O0-NEXT: v_lshlrev_b64 v[12:13], v2, v[12:13]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v0
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, 0
; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, 0
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[8:9]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[8:9]
-; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v13
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v14
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[8:9]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[8:9]
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v12
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[6:7]
-; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[6:7]
-; GFX9-G-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v3
-; GFX9-G-O0-NEXT: ; kill: def $vgpr1_vgpr2 killed $vgpr1_vgpr2 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[8:9]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[8:9]
+; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v12
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v13
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[8:9]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9]
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v11
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[6:7]
+; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[6:7]
+; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v8
; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v9
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v10
-; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
+; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX9-G-O0-NEXT: s_mov_b64 s[10:11], s[4:5]
-; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v5, v8
-; GFX9-G-O0-NEXT: v_or_b32_e64 v7, v6, v7
-; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v7
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s5
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s4
-; GFX9-G-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[5:6], v[7:8]
-; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v7
+; GFX9-G-O0-NEXT: v_or_b32_e64 v6, v5, v6
+; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4
+; GFX9-G-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[4:5], v[6:7]
+; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
+; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s8
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s9
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s10
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s11
-; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s9
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s10
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s11
+; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_nop 0
+; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], exec
; GFX9-G-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
; GFX9-G-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-G-O0-NEXT: v_writelane_b32 v0, s6, 4
-; GFX9-G-O0-NEXT: v_writelane_b32 v0, s7, 5
+; GFX9-G-O0-NEXT: s_waitcnt vmcnt(17)
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 4
+; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 5
; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-G-O0-NEXT: s_cbranch_execz .LBB1_5
; GFX9-G-O0-NEXT: s_branch .LBB1_7
; GFX9-G-O0-NEXT: .LBB1_9: ; %udiv-end
-; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3)
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v5
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v3
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2)
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v6
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v4
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1)
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v7
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v5
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v8
-; GFX9-G-O0-NEXT: ; kill: killed $vgpr4
+; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6
; GFX9-G-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: s_nop 0
-; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-G-O0-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir
index 2695fdbda87556..a680b63a34b9a9 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir
@@ -294,7 +294,7 @@ body: |
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; MUBUFW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
- ; MUBUFW32: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; MUBUFW32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; MUBUFW32-NEXT: {{ $}}
; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 killed $sgpr4, $sgpr8, 0, implicit $exec
@@ -302,7 +302,7 @@ body: |
; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0
;
; FLATSCRW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
- ; FLATSCRW32: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; FLATSCRW32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; FLATSCRW32-NEXT: {{ $}}
; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 killed $sgpr4, $sgpr8, 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
index 6ec296144bf193..fa442aa849d17e 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
@@ -1192,7 +1192,7 @@ body: |
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
- ; GFX7: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX7: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
@@ -1204,7 +1204,7 @@ body: |
; GFX7-NEXT: SI_RETURN implicit $vgpr0
;
; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
- ; GFX8: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
@@ -1216,7 +1216,7 @@ body: |
; GFX8-NEXT: SI_RETURN implicit $vgpr0
;
; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
- ; GFX900: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX900: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
@@ -1227,7 +1227,7 @@ body: |
; GFX900-NEXT: SI_RETURN implicit $vgpr0
;
; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
- ; GFX90A: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
@@ -1238,7 +1238,7 @@ body: |
; GFX90A-NEXT: SI_RETURN implicit $vgpr0
;
; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
- ; GFX10: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; GFX10-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
@@ -1249,7 +1249,7 @@ body: |
; GFX10-NEXT: SI_RETURN implicit $vgpr0
;
; GFX940-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
- ; GFX940: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX940: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX940-NEXT: {{ $}}
; GFX940-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
; GFX940-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
@@ -1260,7 +1260,7 @@ body: |
; GFX940-NEXT: SI_RETURN implicit $vgpr0
;
; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
- ; GFX11: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX11: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr4, $sgpr8, 0, implicit $exec
@@ -1268,7 +1268,7 @@ body: |
; GFX11-NEXT: SI_RETURN implicit $vgpr0
;
; GFX12-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
- ; GFX12: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX12: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr4, $sgpr8, 0, implicit $exec
@@ -1296,7 +1296,7 @@ body: |
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX7-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required
- ; GFX7: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX7: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
@@ -1308,7 +1308,7 @@ body: |
; GFX7-NEXT: SI_RETURN implicit $vgpr0
;
; GFX8-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required
- ; GFX8: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
@@ -1320,7 +1320,7 @@ body: |
; GFX8-NEXT: SI_RETURN implicit $vgpr0
;
; GFX900-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required
- ; GFX900: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX900: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
@@ -1331,7 +1331,7 @@ body: |
; GFX900-NEXT: SI_RETURN implicit $vgpr0
;
; GFX90A-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required
- ; GFX90A: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
@@ -1342,7 +1342,7 @@ body: |
; GFX90A-NEXT: SI_RETURN implicit $vgpr0
;
; GFX10-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required
- ; GFX10: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; GFX10-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
@@ -1353,7 +1353,7 @@ body: |
; GFX10-NEXT: SI_RETURN implicit $vgpr0
;
; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required
- ; FLATSCRW64: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; FLATSCRW64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; FLATSCRW64-NEXT: {{ $}}
; FLATSCRW64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
@@ -1384,7 +1384,7 @@ body: |
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX7-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required
- ; GFX7: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX7: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX7-NEXT: {{ $}}
; GFX7-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
@@ -1396,7 +1396,7 @@ body: |
; GFX7-NEXT: SI_RETURN implicit $vgpr0
;
; GFX8-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required
- ; GFX8: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
@@ -1408,7 +1408,7 @@ body: |
; GFX8-NEXT: SI_RETURN implicit $vgpr0
;
; GFX900-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required
- ; GFX900: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX900: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX900-NEXT: {{ $}}
; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
@@ -1419,7 +1419,7 @@ body: |
; GFX900-NEXT: SI_RETURN implicit $vgpr0
;
; GFX90A-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required
- ; GFX90A: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
@@ -1430,7 +1430,7 @@ body: |
; GFX90A-NEXT: SI_RETURN implicit $vgpr0
;
; GFX10-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required
- ; GFX10: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; GFX10-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
@@ -1441,7 +1441,7 @@ body: |
; GFX10-NEXT: SI_RETURN implicit $vgpr0
;
; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required
- ; FLATSCRW64: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; FLATSCRW64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
; FLATSCRW64-NEXT: {{ $}}
; FLATSCRW64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc
diff --git a/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir b/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir
index 3bf7e7b8c56960..2f43c8264bf90a 100644
--- a/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir
+++ b/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir
@@ -25,13 +25,12 @@ body: |
; GCN-LABEL: name: test_single_block
; GCN: liveins: $sgpr4, $vgpr2_vgpr3
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
+ ; GCN-NEXT: renamable $vgpr63 = IMPLICIT_DEF
+ ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr63
; GCN-NEXT: S_NOP 0
- ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
- ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
- ; GCN-NEXT: KILL killed renamable $vgpr0
+ ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 0
+ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
+ ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, [[V_MOV_B32_e32_]], 0, 0, implicit $exec
; GCN-NEXT: SI_RETURN
SI_SPILL_S32_SAVE killed $sgpr4, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
S_NOP 0
@@ -63,32 +62,31 @@ body: |
; GCN-NEXT: successors: %bb.1(0x80000000)
; GCN-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
; GCN-NEXT: S_BRANCH %bb.1
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; GCN-NEXT: liveins: $sgpr6, $vgpr0, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: successors: %bb.3(0x80000000)
- ; GCN-NEXT: liveins: $sgpr6, $vgpr0, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr6, 0, killed $vgpr0
+ ; GCN-NEXT: renamable $vgpr63 = IMPLICIT_DEF
+ ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr6, 0, killed $vgpr63
; GCN-NEXT: S_NOP 0
- ; GCN-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
+ ; GCN-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 0
+ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
; GCN-NEXT: S_BRANCH %bb.3
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.3:
- ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $sgpr5 = V_READFIRSTLANE_B32 killed $vgpr1, implicit $exec
+ ; GCN-NEXT: $sgpr5 = V_READFIRSTLANE_B32 [[V_MOV_B32_e32_]], implicit $exec
; GCN-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: KILL killed renamable $vgpr0
; GCN-NEXT: SI_RETURN
bb.0:
liveins: $sgpr6, $sgpr10_sgpr11
@@ -135,52 +133,50 @@ body: |
; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; GCN-NEXT: liveins: $sgpr4, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.2(0x80000000)
- ; GCN-NEXT: liveins: $sgpr4, $vgpr0, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr4, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
+ ; GCN-NEXT: renamable $vgpr63 = IMPLICIT_DEF
+ ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr63
; GCN-NEXT: S_NOP 0
- ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
+ ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 0
+ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
; GCN-NEXT: S_BRANCH %bb.2
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: successors: %bb.3(0x80000000)
- ; GCN-NEXT: liveins: $sgpr4, $vgpr0, $vgpr1, $sgpr10_sgpr11
+ ; GCN-NEXT: liveins: $sgpr4, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: S_STORE_DWORD_IMM $sgpr4, $sgpr10_sgpr11, 0, 0
- ; GCN-NEXT: $sgpr5 = V_READFIRSTLANE_B32 killed $vgpr1, implicit $exec
+ ; GCN-NEXT: $sgpr5 = V_READFIRSTLANE_B32 [[V_MOV_B32_e32_]], implicit $exec
; GCN-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 4
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 5, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
; GCN-NEXT: S_BRANCH %bb.3
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.3:
; GCN-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000)
- ; GCN-NEXT: liveins: $vgpr0, $vgpr1
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $vcc = V_CMP_EQ_U32_e64 0, $vgpr1, implicit $exec
+ ; GCN-NEXT: $vcc = V_CMP_EQ_U32_e64 0, [[V_MOV_B32_e32_1]], implicit $exec
; GCN-NEXT: $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
; GCN-NEXT: S_CBRANCH_SCC1 %bb.5, implicit $scc
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.4:
; GCN-NEXT: successors: %bb.3(0x80000000)
- ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr6_sgpr7
+ ; GCN-NEXT: liveins: $sgpr6_sgpr7
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_SUB_U32_e32 1, killed $vgpr1, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+ ; GCN-NEXT: [[V_SUB_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 1, [[V_MOV_B32_e32_1]], implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[V_SUB_U32_e32_]], implicit $exec
; GCN-NEXT: S_BRANCH %bb.3
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.5:
- ; GCN-NEXT: liveins: $vgpr0, $sgpr6_sgpr7
+ ; GCN-NEXT: liveins: $sgpr6_sgpr7
; GCN-NEXT: {{ $}}
; GCN-NEXT: $exec = S_OR_B64 $exec, $sgpr6_sgpr7, implicit-def $scc
- ; GCN-NEXT: KILL killed renamable $vgpr0
; GCN-NEXT: SI_RETURN
bb.0:
liveins: $sgpr4, $sgpr10_sgpr11
@@ -239,26 +235,24 @@ body: |
; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; GCN-NEXT: liveins: $sgpr4, $vgpr2_vgpr3
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
- ; GCN-NEXT: liveins: $sgpr4, $vgpr0, $vgpr2_vgpr3
+ ; GCN-NEXT: liveins: $sgpr4, $vgpr2_vgpr3
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
+ ; GCN-NEXT: renamable $vgpr63 = IMPLICIT_DEF
+ ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr63
; GCN-NEXT: S_NOP 0
- ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec
- ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
- ; GCN-NEXT: KILL killed renamable $vgpr0
+ ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 0
+ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
+ ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, [[V_MOV_B32_e32_]], 0, 0, implicit $exec
; GCN-NEXT: SI_RETURN
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
- ; GCN-NEXT: liveins: $vgpr0, $vgpr2_vgpr3
+ ; GCN-NEXT: liveins: $vgpr2_vgpr3
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
- ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
- ; GCN-NEXT: KILL killed renamable $vgpr0
+ ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
+ ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
; GCN-NEXT: SI_RETURN
bb.0:
liveins: $sgpr4, $vgpr2_vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
index 431b7d5400f430..798cd6239d2621 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll
@@ -108,255 +108,114 @@ define amdgpu_kernel void @kernel_calls_no_stack() {
}
define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) {
-; FLAT_SCR_OPT-LABEL: test:
-; FLAT_SCR_OPT: ; %bb.0:
-; FLAT_SCR_OPT-NEXT: s_add_u32 s6, s6, s11
-; FLAT_SCR_OPT-NEXT: s_addc_u32 s7, s7, 0
-; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6
-; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7
-; FLAT_SCR_OPT-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
-; FLAT_SCR_OPT-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
-; FLAT_SCR_OPT-NEXT: s_waitcnt lgkmcnt(0)
-; FLAT_SCR_OPT-NEXT: v_writelane_b32 v0, s0, 0
-; FLAT_SCR_OPT-NEXT: v_writelane_b32 v0, s1, 1
-; FLAT_SCR_OPT-NEXT: s_or_saveexec_b32 s105, -1
-; FLAT_SCR_OPT-NEXT: s_mov_b32 s0, 0
-; FLAT_SCR_OPT-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill
-; FLAT_SCR_OPT-NEXT: s_waitcnt_depctr 0xffe3
-; FLAT_SCR_OPT-NEXT: s_mov_b32 exec_lo, s105
-; FLAT_SCR_OPT-NEXT: s_load_dword vcc_lo, s[2:3], 0x8
-; FLAT_SCR_OPT-NEXT: ; kill: killed $sgpr2_sgpr3
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: s_waitcnt lgkmcnt(0)
-; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v0, vcc_lo
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: ;;#ASMSTART
-; FLAT_SCR_OPT-NEXT: ;;#ASMEND
-; FLAT_SCR_OPT-NEXT: s_or_saveexec_b32 s105, -1
-; FLAT_SCR_OPT-NEXT: s_mov_b32 s0, 0
-; FLAT_SCR_OPT-NEXT: scratch_load_dword v1, off, s0 ; 4-byte Folded Reload
-; FLAT_SCR_OPT-NEXT: s_waitcnt_depctr 0xffe3
-; FLAT_SCR_OPT-NEXT: s_mov_b32 exec_lo, s105
-; FLAT_SCR_OPT-NEXT: s_waitcnt vmcnt(0)
-; FLAT_SCR_OPT-NEXT: v_readlane_b32 s0, v1, 0
-; FLAT_SCR_OPT-NEXT: v_readlane_b32 s1, v1, 1
-; FLAT_SCR_OPT-NEXT: s_or_saveexec_b32 s105, -1
-; FLAT_SCR_OPT-NEXT: s_mov_b32 exec_lo, s105
-; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v2, 0
-; FLAT_SCR_OPT-NEXT: ; kill: killed $vgpr1
-; FLAT_SCR_OPT-NEXT: global_store_dword v2, v0, s[0:1]
-; FLAT_SCR_OPT-NEXT: s_endpgm
-;
-; FLAT_SCR_ARCH-LABEL: test:
-; FLAT_SCR_ARCH: ; %bb.0:
-; FLAT_SCR_ARCH-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
-; FLAT_SCR_ARCH-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
-; FLAT_SCR_ARCH-NEXT: s_waitcnt lgkmcnt(0)
-; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v0, s0, 0
-; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v0, s1, 1
-; FLAT_SCR_ARCH-NEXT: s_or_saveexec_b32 s105, -1
-; FLAT_SCR_ARCH-NEXT: s_mov_b32 s0, 0
-; FLAT_SCR_ARCH-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill
-; FLAT_SCR_ARCH-NEXT: s_waitcnt_depctr 0xffe3
-; FLAT_SCR_ARCH-NEXT: s_mov_b32 exec_lo, s105
-; FLAT_SCR_ARCH-NEXT: s_load_dword vcc_lo, s[2:3], 0x8
-; FLAT_SCR_ARCH-NEXT: ; kill: killed $sgpr2_sgpr3
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: s_waitcnt lgkmcnt(0)
-; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v0, vcc_lo
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART
-; FLAT_SCR_ARCH-NEXT: ;;#ASMEND
-; FLAT_SCR_ARCH-NEXT: s_or_saveexec_b32 s105, -1
-; FLAT_SCR_ARCH-NEXT: s_mov_b32 s0, 0
-; FLAT_SCR_ARCH-NEXT: scratch_load_dword v1, off, s0 ; 4-byte Folded Reload
-; FLAT_SCR_ARCH-NEXT: s_waitcnt_depctr 0xffe3
-; FLAT_SCR_ARCH-NEXT: s_mov_b32 exec_lo, s105
-; FLAT_SCR_ARCH-NEXT: s_waitcnt vmcnt(0)
-; FLAT_SCR_ARCH-NEXT: v_readlane_b32 s0, v1, 0
-; FLAT_SCR_ARCH-NEXT: v_readlane_b32 s1, v1, 1
-; FLAT_SCR_ARCH-NEXT: s_or_saveexec_b32 s105, -1
-; FLAT_SCR_ARCH-NEXT: s_mov_b32 exec_lo, s105
-; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v2, 0
-; FLAT_SCR_ARCH-NEXT: ; kill: killed $vgpr1
-; FLAT_SCR_ARCH-NEXT: global_store_dword v2, v0, s[0:1]
-; FLAT_SCR_ARCH-NEXT: s_endpgm
+; GCN-LABEL: test:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_clause 0x1
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
+; GCN-NEXT: s_load_dword vcc_lo, s[2:3], 0x8
+; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
+; GCN-NEXT: ; kill: killed $sgpr2_sgpr3
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_writelane_b32 v0, s0, 0
+; GCN-NEXT: v_writelane_b32 v0, s1, 1
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: v_readlane_b32 s0, v0, 0
+; GCN-NEXT: v_mov_b32_e32 v1, vcc_lo
+; GCN-NEXT: v_readlane_b32 s1, v0, 1
+; GCN-NEXT: v_mov_b32_e32 v2, 0
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ;;#ASMSTART
+; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: global_store_dword v2, v1, s[0:1]
+; GCN-NEXT: s_endpgm
call void asm sideeffect "", "~{s[0:7]}" ()
call void asm sideeffect "", "~{s[8:15]}" ()
call void asm sideeffect "", "~{s[16:23]}" ()
@@ -371,7 +230,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) {
call void asm sideeffect "", "~{s[88:95]}" ()
call void asm sideeffect "", "~{s[96:103]}" ()
call void asm sideeffect "", "~{s[104:105]}" ()
- call void asm sideeffect "", "~{v[0:7]}" ()
+ call void asm sideeffect "", "~{v[1:7]}" ()
call void asm sideeffect "", "~{v[8:15]}" ()
call void asm sideeffect "", "~{v[16:23]}" ()
call void asm sideeffect "", "~{v[24:31]}" ()
diff --git a/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir b/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir
index ba619a659f1b07..5f36d5403ebcfc 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir
@@ -12,13 +12,13 @@ machineFunctionInfo:
body: |
bb.0:
; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec_lo
- ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
- ; CHECK-NEXT: S_NOP 0, implicit-def $exec_lo
+ ; CHECK: S_NOP 0, implicit-def $exec_lo
; CHECK-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
- ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
+ ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
- ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
+ ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0
; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def $exec_lo
@@ -37,13 +37,13 @@ machineFunctionInfo:
body: |
bb.0:
; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec_hi
- ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
- ; CHECK-NEXT: S_NOP 0, implicit-def $exec_hi
+ ; CHECK: S_NOP 0, implicit-def $exec_hi
; CHECK-NEXT: $sgpr0 = S_MOV_B32 $exec_hi
- ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
+ ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
- ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
+ ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: $exec_hi = S_MOV_B32 killed $sgpr0
; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def $exec_hi
@@ -62,16 +62,16 @@ machineFunctionInfo:
body: |
bb.0:
; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec
- ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
- ; CHECK-NEXT: S_NOP 0, implicit-def $exec
+ ; CHECK: S_NOP 0, implicit-def $exec
; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
- ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
- ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1
+ ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1, implicit killed renamable $sgpr0_sgpr1
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
- ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 killed $vgpr0, 1
+ ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def $exec
@@ -93,12 +93,12 @@ machineFunctionInfo:
body: |
bb.0:
; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec_lo
- ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
- ; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_lo
- ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
+ ; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_lo
+ ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
- ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
+ ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0
; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_lo
@@ -116,12 +116,12 @@ machineFunctionInfo:
body: |
bb.0:
; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec_hi
- ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
- ; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_hi
- ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
+ ; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_hi
+ ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
- ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
+ ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: $exec_hi = S_MOV_B32 killed $sgpr0
; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_hi
@@ -139,15 +139,15 @@ machineFunctionInfo:
body: |
bb.0:
; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec
- ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
- ; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def $exec
- ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
- ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1
+ ; CHECK: S_NOP 0, implicit-def renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def $exec
+ ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1
- ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 killed $vgpr0, 1
+ ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
S_NOP 0, implicit-def %0:sreg_64, implicit-def %1:sreg_64, implicit-def $exec
diff --git a/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir b/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir
index 1c7896fcb4f141..1c2436bd6b6cd3 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir
@@ -13,13 +13,13 @@ body: |
bb.0:
; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_m0
- ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
- ; CHECK-NEXT: S_NOP 0, implicit-def $m0
+ ; CHECK: S_NOP 0, implicit-def $m0
; CHECK-NEXT: $sgpr0 = S_MOV_B32 $m0
- ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
+ ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0
- ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
+ ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: $m0 = S_MOV_B32 killed $sgpr0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
@@ -43,12 +43,12 @@ body: |
bb.0:
; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_m0
- ; CHECK: renamable $vgpr0 = IMPLICIT_DEF
+ ; CHECK: $vgpr0 = IMPLICIT_DEF
; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $m0
- ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0
+ ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0
; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0
- ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0
+ ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0
; CHECK-NEXT: $m0 = S_MOV_B32 killed $sgpr0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index f388aeb0470291..0309a156171d7d 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -166,7 +166,7 @@ body: |
bb.0:
liveins: $sgpr30_sgpr31, $sgpr10
; GCN-LABEL: name: func_add_constant_to_fi_uniform_live_SCC_i32
- ; GCN: liveins: $sgpr10, $sgpr30_sgpr31
+ ; GCN: liveins: $sgpr30_sgpr31, $sgpr10
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 $sgpr10, 4, implicit-def $scc
; GCN-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
index 3b078c41f4a849..7d07641f455e3f 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
@@ -2635,7 +2635,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX9-NEXT: s_add_i32 s33, s32, 0x7fc0
; GFX9-NEXT: s_and_b32 s33, s33, 0xffff8000
; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:1536 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:1564 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_add_i32 s32, s32, 0x28000
; GFX9-NEXT: v_mov_b32_e32 v0, 0
@@ -2775,25 +2775,25 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:796
; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:516
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1540 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1536 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:520
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1544 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1540 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:524
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1548 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1544 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:528
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1552 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1548 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:532
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1556 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1552 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:536
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1560 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1556 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:540
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1564 ; 4-byte Folded Spill
+; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1560 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:544
; GFX9-NEXT: s_nop 0
; GFX9-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:548
@@ -2861,13 +2861,13 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX9-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:152
; GFX9-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:156
; GFX9-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:160
-; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1540 ; 4-byte Folded Reload
-; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:1544 ; 4-byte Folded Reload
-; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:1548 ; 4-byte Folded Reload
-; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:1552 ; 4-byte Folded Reload
-; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:1556 ; 4-byte Folded Reload
-; GFX9-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:1560 ; 4-byte Folded Reload
-; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:1564 ; 4-byte Folded Reload
+; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1536 ; 4-byte Folded Reload
+; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:1540 ; 4-byte Folded Reload
+; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:1544 ; 4-byte Folded Reload
+; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:1548 ; 4-byte Folded Reload
+; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:1552 ; 4-byte Folded Reload
+; GFX9-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:1556 ; 4-byte Folded Reload
+; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:1560 ; 4-byte Folded Reload
; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33
; GFX9-NEXT: v_add_u32_e32 v0, 0x400, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 42
@@ -2890,7 +2890,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX9-NEXT: v_readlane_b32 s31, v63, 1
; GFX9-NEXT: v_readlane_b32 s30, v63, 0
; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GFX9-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:1536 ; 4-byte Folded Reload
+; GFX9-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:1564 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_add_i32 s32, s32, 0xfffd8000
; GFX9-NEXT: s_mov_b32 s33, s36
@@ -2904,7 +2904,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX10-NEXT: s_add_i32 s33, s32, 0x3fe0
; GFX10-NEXT: s_and_b32 s33, s33, 0xffffc000
; GFX10-NEXT: s_or_saveexec_b32 s34, -1
-; GFX10-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:1536 ; 4-byte Folded Spill
+; GFX10-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:1568 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_mov_b32_e32 v0, 0
@@ -3046,28 +3046,28 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX10-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:796
; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:516
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1540 ; 4-byte Folded Spill
+; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1536 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:520
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1544 ; 4-byte Folded Spill
+; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1540 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:524
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1548 ; 4-byte Folded Spill
+; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1544 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:528
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1552 ; 4-byte Folded Spill
+; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1548 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:532
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1556 ; 4-byte Folded Spill
+; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1552 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:536
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1560 ; 4-byte Folded Spill
+; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1556 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:540
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1564 ; 4-byte Folded Spill
+; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1560 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:544
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1568 ; 4-byte Folded Spill
+; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1564 ; 4-byte Folded Spill
; GFX10-NEXT: s_clause 0x15
; GFX10-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:548
; GFX10-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:552
@@ -3134,14 +3134,14 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX10-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:156
; GFX10-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:160
; GFX10-NEXT: s_clause 0x7
-; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1540
-; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:1544
-; GFX10-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:1548
-; GFX10-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:1552
-; GFX10-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:1556
-; GFX10-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:1560
-; GFX10-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:1564
-; GFX10-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:1568
+; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1536
+; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:1540
+; GFX10-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:1544
+; GFX10-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:1548
+; GFX10-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:1552
+; GFX10-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:1556
+; GFX10-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:1560
+; GFX10-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:1564
; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33
; GFX10-NEXT: v_mov_b32_e32 v1, 42
; GFX10-NEXT: v_add_nc_u32_e32 v0, 0x400, v0
@@ -3165,7 +3165,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX10-NEXT: v_readlane_b32 s31, v63, 1
; GFX10-NEXT: v_readlane_b32 s30, v63, 0
; GFX10-NEXT: s_or_saveexec_b32 s34, -1
-; GFX10-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:1536 ; 4-byte Folded Reload
+; GFX10-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:1568 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_add_i32 s32, s32, 0xfffec000
@@ -3181,7 +3181,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_b32 s33, s33, 0xfffffe00
; GFX11-NEXT: s_or_saveexec_b32 s0, -1
-; GFX11-NEXT: scratch_store_b32 off, v60, s33 offset:1536 ; 4-byte Folded Spill
+; GFX11-NEXT: scratch_store_b32 off, v60, s33 offset:1600 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_mov_b32 s0, 0
; GFX11-NEXT: v_mov_b32_e32 v4, 0
@@ -3267,7 +3267,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: s_waitcnt vmcnt(2)
; GFX11-NEXT: v_dual_mov_b32 v14, v1 :: v_dual_mov_b32 v1, v4
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: scratch_store_b128 off, v[16:19], s33 offset:1588 ; 16-byte Folded Spill
+; GFX11-NEXT: scratch_store_b128 off, v[16:19], s33 offset:1584 ; 16-byte Folded Spill
; GFX11-NEXT: s_clause 0x3
; GFX11-NEXT: scratch_load_b128 v[16:19], off, s33 offset:528
; GFX11-NEXT: scratch_load_b128 v[20:23], off, s33 offset:544
@@ -3277,13 +3277,13 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: s_waitcnt vmcnt(2)
; GFX11-NEXT: v_mov_b32_e32 v10, v21
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: scratch_store_b128 off, v[28:31], s33 offset:1572 ; 16-byte Folded Spill
+; GFX11-NEXT: scratch_store_b128 off, v[28:31], s33 offset:1568 ; 16-byte Folded Spill
; GFX11-NEXT: scratch_load_b128 v[28:31], off, s33 offset:592
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: scratch_store_b128 off, v[28:31], s33 offset:1556 ; 16-byte Folded Spill
+; GFX11-NEXT: scratch_store_b128 off, v[28:31], s33 offset:1552 ; 16-byte Folded Spill
; GFX11-NEXT: scratch_load_b128 v[28:31], off, s33 offset:608
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: scratch_store_b128 off, v[28:31], s33 offset:1540 ; 16-byte Folded Spill
+; GFX11-NEXT: scratch_store_b128 off, v[28:31], s33 offset:1536 ; 16-byte Folded Spill
; GFX11-NEXT: scratch_store_b128 off, v[32:35], s32
; GFX11-NEXT: v_dual_mov_b32 v31, v47 :: v_dual_mov_b32 v32, v36
; GFX11-NEXT: v_dual_mov_b32 v33, v48 :: v_dual_mov_b32 v34, v49
@@ -3333,13 +3333,13 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: scratch_store_b128 off, v[48:51], s2
; GFX11-NEXT: s_add_i32 s2, s32, 16
; GFX11-NEXT: scratch_store_b128 off, v[32:35], s2
-; GFX11-NEXT: scratch_load_b128 v[1:4], off, s33 offset:1588 ; 16-byte Folded Reload
+; GFX11-NEXT: scratch_load_b128 v[1:4], off, s33 offset:1584 ; 16-byte Folded Reload
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v1, 42
; GFX11-NEXT: s_clause 0x2
-; GFX11-NEXT: scratch_load_b128 v[17:20], off, s33 offset:1572
-; GFX11-NEXT: scratch_load_b128 v[21:24], off, s33 offset:1556
-; GFX11-NEXT: scratch_load_b128 v[25:28], off, s33 offset:1540
+; GFX11-NEXT: scratch_load_b128 v[17:20], off, s33 offset:1568
+; GFX11-NEXT: scratch_load_b128 v[21:24], off, s33 offset:1552
+; GFX11-NEXT: scratch_load_b128 v[25:28], off, s33 offset:1536
; GFX11-NEXT: s_add_i32 s2, s33, 0x400
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_mov_b32_e32 v0, s2
@@ -3360,7 +3360,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: v_readlane_b32 s31, v60, 1
; GFX11-NEXT: v_readlane_b32 s30, v60, 0
; GFX11-NEXT: s_or_saveexec_b32 s0, -1
-; GFX11-NEXT: scratch_load_b32 v60, off, s33 offset:1536 ; 4-byte Folded Reload
+; GFX11-NEXT: scratch_load_b32 v60, off, s33 offset:1600 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_addk_i32 s32, 0xf600
; GFX11-NEXT: s_mov_b32 s33, s34
diff --git a/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir b/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir
index 742498cdd8bd14..c76a84cb1c5d4c 100644
--- a/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir
+++ b/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir
@@ -21,14 +21,10 @@ body: |
; CHECK-LABEL: name: split_instruction_subranges
; CHECK: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %1:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %3:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR1]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 8, 0, implicit $exec :: (load (s64), addrspace 1)
- ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1
- ; CHECK-NEXT: S_NOP 0, implicit [[COPY]].sub1
- ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
- ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V64_RESTORE]].sub0
- ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub1
- ; CHECK-NEXT: S_NOP 0, implicit [[COPY1]].sub1
+ ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1
+ ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR1]].sub0
+ ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub1
; CHECK-NEXT: S_ENDPGM 0
%1:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %4:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1)
%2:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1)
@@ -61,23 +57,13 @@ body: |
; CHECK-LABEL: name: split_instruction_subranges_use_is_subreg_def
; CHECK: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %1:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %3:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR1]], %stack.1, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.1, align 4, addrspace 5)
; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 8, 0, implicit $exec :: (load (s64), addrspace 1)
- ; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR2]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
; CHECK-NEXT: S_NOP 0, implicit-def [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0
- ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
- ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY [[SI_SPILL_V64_RESTORE]].sub0
- ; CHECK-NEXT: S_NOP 0, implicit-def [[COPY]].sub1
- ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:vreg_64 = COPY [[COPY]].sub0
- ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE1:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
- ; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub1:vreg_64 = COPY [[SI_SPILL_V64_RESTORE1]].sub1
- ; CHECK-NEXT: S_NOP 0, implicit-def [[COPY2]].sub0
- ; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub1:vreg_64 = COPY [[COPY2]].sub1
+ ; CHECK-NEXT: S_NOP 0, implicit-def [[GLOBAL_LOAD_DWORDX2_SADDR1]].sub1
+ ; CHECK-NEXT: S_NOP 0, implicit-def [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub0
; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1
- ; CHECK-NEXT: undef [[COPY4:%[0-9]+]].sub0:vreg_64 = COPY [[COPY1]].sub0
- ; CHECK-NEXT: S_NOP 0, implicit [[COPY4]].sub0
- ; CHECK-NEXT: undef [[COPY5:%[0-9]+]].sub1:vreg_64 = COPY [[COPY3]].sub1
- ; CHECK-NEXT: S_NOP 0, implicit [[COPY5]].sub1
+ ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR1]].sub0
+ ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub1
; CHECK-NEXT: S_ENDPGM 0
%1:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %4:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1)
%2:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
index 5abd4c9069c919..a4a8f43646d4ba 100644
--- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
+++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
@@ -6,209 +6,209 @@ define void @main(i1 %arg) #0 {
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 ; 4-byte Folded Spill
-; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
-; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
-; CHECK-NEXT: v_writelane_b32 v8, s30, 0
-; CHECK-NEXT: v_writelane_b32 v8, s31, 1
-; CHECK-NEXT: v_writelane_b32 v8, s36, 2
-; CHECK-NEXT: v_writelane_b32 v8, s37, 3
-; CHECK-NEXT: v_writelane_b32 v8, s38, 4
-; CHECK-NEXT: v_writelane_b32 v8, s39, 5
-; CHECK-NEXT: v_writelane_b32 v8, s40, 6
-; CHECK-NEXT: v_writelane_b32 v8, s41, 7
-; CHECK-NEXT: v_writelane_b32 v8, s42, 8
-; CHECK-NEXT: v_writelane_b32 v8, s43, 9
-; CHECK-NEXT: v_writelane_b32 v8, s44, 10
-; CHECK-NEXT: v_writelane_b32 v8, s45, 11
-; CHECK-NEXT: v_writelane_b32 v8, s46, 12
-; CHECK-NEXT: v_writelane_b32 v8, s47, 13
-; CHECK-NEXT: v_writelane_b32 v8, s48, 14
-; CHECK-NEXT: v_writelane_b32 v8, s49, 15
+; CHECK-NEXT: v_writelane_b32 v5, s30, 0
+; CHECK-NEXT: v_writelane_b32 v5, s31, 1
+; CHECK-NEXT: v_writelane_b32 v5, s36, 2
+; CHECK-NEXT: v_writelane_b32 v5, s37, 3
+; CHECK-NEXT: v_writelane_b32 v5, s38, 4
+; CHECK-NEXT: v_writelane_b32 v5, s39, 5
+; CHECK-NEXT: v_writelane_b32 v5, s40, 6
+; CHECK-NEXT: v_writelane_b32 v5, s41, 7
+; CHECK-NEXT: v_writelane_b32 v5, s42, 8
+; CHECK-NEXT: v_writelane_b32 v5, s43, 9
+; CHECK-NEXT: v_writelane_b32 v5, s44, 10
+; CHECK-NEXT: v_writelane_b32 v5, s45, 11
+; CHECK-NEXT: v_writelane_b32 v5, s46, 12
+; CHECK-NEXT: v_writelane_b32 v5, s47, 13
+; CHECK-NEXT: v_writelane_b32 v5, s48, 14
+; CHECK-NEXT: v_writelane_b32 v5, s49, 15
; CHECK-NEXT: s_getpc_b64 s[24:25]
-; CHECK-NEXT: v_writelane_b32 v8, s50, 16
+; CHECK-NEXT: v_writelane_b32 v5, s50, 16
; CHECK-NEXT: s_movk_i32 s4, 0xf0
; CHECK-NEXT: s_mov_b32 s5, s24
-; CHECK-NEXT: v_writelane_b32 v8, s51, 17
+; CHECK-NEXT: v_writelane_b32 v5, s51, 17
; CHECK-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x0
-; CHECK-NEXT: ; implicit-def: $vgpr4 : SGPR spill to VGPR lane
+; CHECK-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane
; CHECK-NEXT: s_mov_b64 s[4:5], 0
; CHECK-NEXT: s_load_dwordx4 s[28:31], s[4:5], 0x0
; CHECK-NEXT: s_movk_i32 s20, 0x130
; CHECK-NEXT: s_mov_b32 s21, s24
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: v_writelane_b32 v4, s36, 0
-; CHECK-NEXT: v_writelane_b32 v4, s37, 1
-; CHECK-NEXT: v_writelane_b32 v4, s38, 2
-; CHECK-NEXT: v_writelane_b32 v4, s39, 3
-; CHECK-NEXT: v_writelane_b32 v4, s40, 4
-; CHECK-NEXT: v_writelane_b32 v4, s41, 5
-; CHECK-NEXT: v_writelane_b32 v4, s42, 6
-; CHECK-NEXT: v_writelane_b32 v4, s43, 7
-; CHECK-NEXT: v_writelane_b32 v4, s44, 8
-; CHECK-NEXT: v_writelane_b32 v4, s45, 9
-; CHECK-NEXT: v_writelane_b32 v4, s46, 10
+; CHECK-NEXT: v_writelane_b32 v7, s36, 0
+; CHECK-NEXT: v_writelane_b32 v7, s37, 1
+; CHECK-NEXT: v_writelane_b32 v7, s38, 2
+; CHECK-NEXT: v_writelane_b32 v7, s39, 3
+; CHECK-NEXT: v_writelane_b32 v7, s40, 4
+; CHECK-NEXT: v_writelane_b32 v7, s41, 5
+; CHECK-NEXT: v_writelane_b32 v7, s42, 6
+; CHECK-NEXT: v_writelane_b32 v7, s43, 7
+; CHECK-NEXT: v_writelane_b32 v7, s44, 8
+; CHECK-NEXT: v_writelane_b32 v7, s45, 9
+; CHECK-NEXT: v_writelane_b32 v7, s46, 10
; CHECK-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0
-; CHECK-NEXT: v_writelane_b32 v4, s47, 11
-; CHECK-NEXT: v_writelane_b32 v4, s48, 12
-; CHECK-NEXT: v_writelane_b32 v4, s49, 13
+; CHECK-NEXT: v_writelane_b32 v7, s47, 11
+; CHECK-NEXT: v_writelane_b32 v7, s48, 12
; CHECK-NEXT: s_mov_b32 s20, 0
; CHECK-NEXT: v_mov_b32_e32 v1, 0
-; CHECK-NEXT: v_writelane_b32 v4, s50, 14
-; CHECK-NEXT: v_mov_b32_e32 v5, s28
-; CHECK-NEXT: v_mov_b32_e32 v6, v1
+; CHECK-NEXT: v_writelane_b32 v7, s49, 13
+; CHECK-NEXT: v_mov_b32_e32 v2, s28
+; CHECK-NEXT: v_mov_b32_e32 v3, v1
; CHECK-NEXT: s_mov_b32 s21, s20
; CHECK-NEXT: s_mov_b32 s22, s20
; CHECK-NEXT: s_mov_b32 s23, s20
-; CHECK-NEXT: v_writelane_b32 v4, s51, 15
+; CHECK-NEXT: v_writelane_b32 v7, s50, 14
+; CHECK-NEXT: v_writelane_b32 v7, s51, 15
+; CHECK-NEXT: image_sample_lz v3, v[2:3], s[44:51], s[20:23] dmask:0x1
; CHECK-NEXT: v_mov_b32_e32 v2, v1
-; CHECK-NEXT: image_sample_lz v5, v[5:6], s[44:51], s[20:23] dmask:0x1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: v_writelane_b32 v4, s4, 16
-; CHECK-NEXT: v_writelane_b32 v4, s5, 17
-; CHECK-NEXT: v_writelane_b32 v4, s6, 18
-; CHECK-NEXT: v_writelane_b32 v4, s7, 19
-; CHECK-NEXT: v_writelane_b32 v4, s8, 20
-; CHECK-NEXT: v_writelane_b32 v4, s9, 21
-; CHECK-NEXT: image_sample_lz v6, v[1:2], s[4:11], s[20:23] dmask:0x1
-; CHECK-NEXT: v_writelane_b32 v4, s10, 22
-; CHECK-NEXT: v_writelane_b32 v4, s11, 23
-; CHECK-NEXT: v_writelane_b32 v4, s12, 24
-; CHECK-NEXT: v_writelane_b32 v4, s13, 25
-; CHECK-NEXT: v_writelane_b32 v4, s14, 26
-; CHECK-NEXT: v_writelane_b32 v4, s15, 27
-; CHECK-NEXT: v_writelane_b32 v8, s52, 18
-; CHECK-NEXT: v_writelane_b32 v4, s16, 28
-; CHECK-NEXT: v_writelane_b32 v8, s53, 19
-; CHECK-NEXT: v_writelane_b32 v4, s17, 29
-; CHECK-NEXT: v_writelane_b32 v8, s54, 20
-; CHECK-NEXT: v_writelane_b32 v4, s18, 30
+; CHECK-NEXT: v_writelane_b32 v7, s4, 16
+; CHECK-NEXT: v_writelane_b32 v7, s5, 17
+; CHECK-NEXT: v_writelane_b32 v7, s6, 18
+; CHECK-NEXT: v_writelane_b32 v7, s7, 19
+; CHECK-NEXT: v_writelane_b32 v7, s8, 20
+; CHECK-NEXT: v_writelane_b32 v7, s9, 21
+; CHECK-NEXT: image_sample_lz v4, v[1:2], s[4:11], s[20:23] dmask:0x1
+; CHECK-NEXT: v_writelane_b32 v7, s10, 22
+; CHECK-NEXT: v_writelane_b32 v7, s11, 23
+; CHECK-NEXT: v_writelane_b32 v7, s12, 24
+; CHECK-NEXT: v_writelane_b32 v7, s13, 25
+; CHECK-NEXT: v_writelane_b32 v7, s14, 26
+; CHECK-NEXT: v_writelane_b32 v7, s15, 27
+; CHECK-NEXT: v_writelane_b32 v5, s52, 18
+; CHECK-NEXT: v_writelane_b32 v7, s16, 28
+; CHECK-NEXT: v_writelane_b32 v5, s53, 19
+; CHECK-NEXT: v_writelane_b32 v7, s17, 29
+; CHECK-NEXT: v_writelane_b32 v5, s54, 20
+; CHECK-NEXT: v_writelane_b32 v7, s18, 30
; CHECK-NEXT: s_mov_b32 s26, 48
; CHECK-NEXT: s_mov_b32 s27, s24
-; CHECK-NEXT: v_writelane_b32 v8, s55, 21
-; CHECK-NEXT: v_writelane_b32 v4, s19, 31
+; CHECK-NEXT: v_writelane_b32 v5, s55, 21
+; CHECK-NEXT: v_writelane_b32 v7, s19, 31
; CHECK-NEXT: s_load_dwordx8 s[4:11], s[26:27], 0x0
-; CHECK-NEXT: v_writelane_b32 v8, s56, 22
-; CHECK-NEXT: v_writelane_b32 v8, s57, 23
-; CHECK-NEXT: v_writelane_b32 v8, s58, 24
-; CHECK-NEXT: v_writelane_b32 v8, s59, 25
-; CHECK-NEXT: v_writelane_b32 v8, s60, 26
+; CHECK-NEXT: v_writelane_b32 v5, s56, 22
+; CHECK-NEXT: v_writelane_b32 v5, s57, 23
+; CHECK-NEXT: v_writelane_b32 v5, s58, 24
+; CHECK-NEXT: v_writelane_b32 v5, s59, 25
+; CHECK-NEXT: v_writelane_b32 v5, s60, 26
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: v_writelane_b32 v4, s4, 32
-; CHECK-NEXT: v_writelane_b32 v8, s61, 27
-; CHECK-NEXT: v_writelane_b32 v4, s5, 33
-; CHECK-NEXT: v_writelane_b32 v8, s62, 28
-; CHECK-NEXT: v_writelane_b32 v4, s6, 34
-; CHECK-NEXT: v_writelane_b32 v8, s63, 29
-; CHECK-NEXT: v_writelane_b32 v4, s7, 35
-; CHECK-NEXT: v_writelane_b32 v8, s64, 30
-; CHECK-NEXT: v_writelane_b32 v4, s8, 36
-; CHECK-NEXT: v_writelane_b32 v8, s65, 31
-; CHECK-NEXT: v_writelane_b32 v4, s9, 37
-; CHECK-NEXT: v_writelane_b32 v8, s66, 32
+; CHECK-NEXT: v_writelane_b32 v7, s4, 32
+; CHECK-NEXT: v_writelane_b32 v5, s61, 27
+; CHECK-NEXT: v_writelane_b32 v7, s5, 33
+; CHECK-NEXT: v_writelane_b32 v5, s62, 28
+; CHECK-NEXT: v_writelane_b32 v7, s6, 34
+; CHECK-NEXT: v_writelane_b32 v5, s63, 29
+; CHECK-NEXT: v_writelane_b32 v7, s7, 35
+; CHECK-NEXT: v_writelane_b32 v5, s64, 30
+; CHECK-NEXT: v_writelane_b32 v7, s8, 36
+; CHECK-NEXT: v_writelane_b32 v5, s65, 31
+; CHECK-NEXT: v_writelane_b32 v7, s9, 37
+; CHECK-NEXT: v_writelane_b32 v5, s66, 32
; CHECK-NEXT: s_movk_i32 s28, 0x1f0
; CHECK-NEXT: s_movk_i32 s30, 0x2f0
; CHECK-NEXT: s_mov_b32 s29, s24
; CHECK-NEXT: s_mov_b32 s31, s24
-; CHECK-NEXT: v_writelane_b32 v4, s10, 38
-; CHECK-NEXT: v_writelane_b32 v8, s67, 33
-; CHECK-NEXT: v_writelane_b32 v4, s11, 39
+; CHECK-NEXT: v_writelane_b32 v7, s10, 38
+; CHECK-NEXT: v_writelane_b32 v5, s67, 33
+; CHECK-NEXT: v_writelane_b32 v7, s11, 39
; CHECK-NEXT: s_load_dwordx16 s[52:67], s[28:29], 0x0
; CHECK-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; CHECK-NEXT: s_xor_b64 s[24:25], vcc, -1
-; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_mul_f32_e32 v0, v6, v5
+; CHECK-NEXT: v_mul_f32_e32 v0, v4, v3
; CHECK-NEXT: s_and_saveexec_b64 s[26:27], s[24:25]
; CHECK-NEXT: s_xor_b64 s[26:27], exec, s[26:27]
; CHECK-NEXT: s_cbranch_execz .LBB0_3
; CHECK-NEXT: ; %bb.1: ; %bb48
-; CHECK-NEXT: v_readlane_b32 s36, v4, 0
-; CHECK-NEXT: v_readlane_b32 s44, v4, 8
-; CHECK-NEXT: v_readlane_b32 s45, v4, 9
-; CHECK-NEXT: v_readlane_b32 s46, v4, 10
-; CHECK-NEXT: v_readlane_b32 s47, v4, 11
-; CHECK-NEXT: v_readlane_b32 s48, v4, 12
-; CHECK-NEXT: v_readlane_b32 s49, v4, 13
-; CHECK-NEXT: v_readlane_b32 s50, v4, 14
-; CHECK-NEXT: v_readlane_b32 s51, v4, 15
+; CHECK-NEXT: v_readlane_b32 s36, v7, 0
+; CHECK-NEXT: v_readlane_b32 s44, v7, 8
+; CHECK-NEXT: v_readlane_b32 s45, v7, 9
+; CHECK-NEXT: v_readlane_b32 s46, v7, 10
+; CHECK-NEXT: v_readlane_b32 s47, v7, 11
+; CHECK-NEXT: v_readlane_b32 s48, v7, 12
+; CHECK-NEXT: v_readlane_b32 s49, v7, 13
+; CHECK-NEXT: v_readlane_b32 s50, v7, 14
+; CHECK-NEXT: v_readlane_b32 s51, v7, 15
; CHECK-NEXT: s_and_b64 vcc, exec, -1
-; CHECK-NEXT: v_readlane_b32 s37, v4, 1
-; CHECK-NEXT: v_readlane_b32 s38, v4, 2
-; CHECK-NEXT: v_readlane_b32 s39, v4, 3
-; CHECK-NEXT: v_readlane_b32 s40, v4, 4
-; CHECK-NEXT: image_sample_lz v5, v[1:2], s[44:51], s[20:23] dmask:0x1
+; CHECK-NEXT: v_readlane_b32 s37, v7, 1
+; CHECK-NEXT: v_readlane_b32 s38, v7, 2
+; CHECK-NEXT: v_readlane_b32 s39, v7, 3
+; CHECK-NEXT: v_readlane_b32 s40, v7, 4
+; CHECK-NEXT: image_sample_lz v3, v[1:2], s[44:51], s[20:23] dmask:0x1
; CHECK-NEXT: v_mov_b32_e32 v2, 0
-; CHECK-NEXT: v_readlane_b32 s41, v4, 5
-; CHECK-NEXT: v_readlane_b32 s42, v4, 6
-; CHECK-NEXT: v_readlane_b32 s43, v4, 7
+; CHECK-NEXT: v_readlane_b32 s41, v7, 5
+; CHECK-NEXT: v_readlane_b32 s42, v7, 6
+; CHECK-NEXT: v_readlane_b32 s43, v7, 7
; CHECK-NEXT: .LBB0_2: ; %bb50
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: v_readlane_b32 s36, v4, 32
-; CHECK-NEXT: v_readlane_b32 s40, v4, 36
-; CHECK-NEXT: v_readlane_b32 s41, v4, 37
-; CHECK-NEXT: v_readlane_b32 s42, v4, 38
-; CHECK-NEXT: v_readlane_b32 s43, v4, 39
+; CHECK-NEXT: v_readlane_b32 s36, v7, 32
+; CHECK-NEXT: v_readlane_b32 s40, v7, 36
+; CHECK-NEXT: v_readlane_b32 s41, v7, 37
+; CHECK-NEXT: v_readlane_b32 s42, v7, 38
+; CHECK-NEXT: v_readlane_b32 s43, v7, 39
; CHECK-NEXT: s_mov_b32 s21, s20
; CHECK-NEXT: s_mov_b32 s22, s20
; CHECK-NEXT: s_mov_b32 s23, s20
-; CHECK-NEXT: v_readlane_b32 s37, v4, 33
-; CHECK-NEXT: v_readlane_b32 s38, v4, 34
+; CHECK-NEXT: v_readlane_b32 s37, v7, 33
+; CHECK-NEXT: v_readlane_b32 s38, v7, 34
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: image_sample_lz v6, v[1:2], s[60:67], s[40:43] dmask:0x1
-; CHECK-NEXT: v_readlane_b32 s39, v4, 35
+; CHECK-NEXT: image_sample_lz v4, v[1:2], s[60:67], s[40:43] dmask:0x1
+; CHECK-NEXT: v_readlane_b32 s39, v7, 35
; CHECK-NEXT: image_sample_lz v1, v[1:2], s[12:19], s[20:23] dmask:0x1
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_sub_f32_e32 v1, v1, v6
+; CHECK-NEXT: v_sub_f32_e32 v1, v1, v4
; CHECK-NEXT: v_mul_f32_e32 v1, v1, v0
-; CHECK-NEXT: v_mul_f32_e32 v1, v1, v5
+; CHECK-NEXT: v_mul_f32_e32 v1, v1, v3
; CHECK-NEXT: s_mov_b64 vcc, vcc
; CHECK-NEXT: s_cbranch_vccnz .LBB0_2
; CHECK-NEXT: .LBB0_3: ; %Flow14
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: v_readlane_b32 s12, v4, 32
-; CHECK-NEXT: v_readlane_b32 s13, v4, 33
-; CHECK-NEXT: v_readlane_b32 s14, v4, 34
-; CHECK-NEXT: v_readlane_b32 s15, v4, 35
-; CHECK-NEXT: v_readlane_b32 s16, v4, 36
-; CHECK-NEXT: v_readlane_b32 s17, v4, 37
-; CHECK-NEXT: v_readlane_b32 s18, v4, 38
-; CHECK-NEXT: v_readlane_b32 s19, v4, 39
-; CHECK-NEXT: v_writelane_b32 v4, s4, 40
-; CHECK-NEXT: v_writelane_b32 v4, s5, 41
-; CHECK-NEXT: v_writelane_b32 v4, s6, 42
-; CHECK-NEXT: v_writelane_b32 v4, s7, 43
-; CHECK-NEXT: v_writelane_b32 v4, s8, 44
-; CHECK-NEXT: v_writelane_b32 v4, s9, 45
-; CHECK-NEXT: v_writelane_b32 v4, s10, 46
-; CHECK-NEXT: v_writelane_b32 v4, s11, 47
-; CHECK-NEXT: v_writelane_b32 v4, s12, 48
-; CHECK-NEXT: v_writelane_b32 v4, s13, 49
-; CHECK-NEXT: v_writelane_b32 v4, s14, 50
-; CHECK-NEXT: v_writelane_b32 v4, s15, 51
-; CHECK-NEXT: v_writelane_b32 v4, s16, 52
-; CHECK-NEXT: v_writelane_b32 v4, s17, 53
-; CHECK-NEXT: v_writelane_b32 v4, s18, 54
-; CHECK-NEXT: v_writelane_b32 v4, s19, 55
-; CHECK-NEXT: v_writelane_b32 v4, s52, 56
-; CHECK-NEXT: v_writelane_b32 v3, s60, 0
-; CHECK-NEXT: v_writelane_b32 v4, s53, 57
-; CHECK-NEXT: v_writelane_b32 v3, s61, 1
-; CHECK-NEXT: v_writelane_b32 v4, s54, 58
-; CHECK-NEXT: v_writelane_b32 v3, s62, 2
-; CHECK-NEXT: v_writelane_b32 v4, s55, 59
-; CHECK-NEXT: v_writelane_b32 v3, s63, 3
-; CHECK-NEXT: v_writelane_b32 v4, s56, 60
-; CHECK-NEXT: v_writelane_b32 v3, s64, 4
-; CHECK-NEXT: v_writelane_b32 v4, s57, 61
-; CHECK-NEXT: v_writelane_b32 v3, s65, 5
-; CHECK-NEXT: v_writelane_b32 v4, s58, 62
-; CHECK-NEXT: v_writelane_b32 v3, s66, 6
-; CHECK-NEXT: v_writelane_b32 v4, s59, 63
-; CHECK-NEXT: v_writelane_b32 v3, s67, 7
+; CHECK-NEXT: v_readlane_b32 s12, v7, 32
+; CHECK-NEXT: v_readlane_b32 s13, v7, 33
+; CHECK-NEXT: v_readlane_b32 s14, v7, 34
+; CHECK-NEXT: v_readlane_b32 s15, v7, 35
+; CHECK-NEXT: v_readlane_b32 s16, v7, 36
+; CHECK-NEXT: v_readlane_b32 s17, v7, 37
+; CHECK-NEXT: v_readlane_b32 s18, v7, 38
+; CHECK-NEXT: v_readlane_b32 s19, v7, 39
+; CHECK-NEXT: v_writelane_b32 v7, s4, 40
+; CHECK-NEXT: v_writelane_b32 v7, s5, 41
+; CHECK-NEXT: v_writelane_b32 v7, s6, 42
+; CHECK-NEXT: v_writelane_b32 v7, s7, 43
+; CHECK-NEXT: v_writelane_b32 v7, s8, 44
+; CHECK-NEXT: v_writelane_b32 v7, s9, 45
+; CHECK-NEXT: v_writelane_b32 v7, s10, 46
+; CHECK-NEXT: v_writelane_b32 v7, s11, 47
+; CHECK-NEXT: v_writelane_b32 v7, s12, 48
+; CHECK-NEXT: v_writelane_b32 v7, s13, 49
+; CHECK-NEXT: v_writelane_b32 v7, s14, 50
+; CHECK-NEXT: v_writelane_b32 v7, s15, 51
+; CHECK-NEXT: v_writelane_b32 v7, s16, 52
+; CHECK-NEXT: v_writelane_b32 v7, s17, 53
+; CHECK-NEXT: v_writelane_b32 v7, s18, 54
+; CHECK-NEXT: v_writelane_b32 v7, s19, 55
+; CHECK-NEXT: ; implicit-def: $vgpr6 : SGPR spill to VGPR lane
+; CHECK-NEXT: v_writelane_b32 v7, s52, 56
+; CHECK-NEXT: v_writelane_b32 v6, s60, 0
+; CHECK-NEXT: v_writelane_b32 v7, s53, 57
+; CHECK-NEXT: v_writelane_b32 v6, s61, 1
+; CHECK-NEXT: v_writelane_b32 v7, s54, 58
+; CHECK-NEXT: v_writelane_b32 v6, s62, 2
+; CHECK-NEXT: v_writelane_b32 v7, s55, 59
+; CHECK-NEXT: v_writelane_b32 v6, s63, 3
+; CHECK-NEXT: v_writelane_b32 v7, s56, 60
+; CHECK-NEXT: v_writelane_b32 v6, s64, 4
+; CHECK-NEXT: v_writelane_b32 v7, s57, 61
+; CHECK-NEXT: v_writelane_b32 v6, s65, 5
+; CHECK-NEXT: v_writelane_b32 v7, s58, 62
+; CHECK-NEXT: v_writelane_b32 v6, s66, 6
+; CHECK-NEXT: v_writelane_b32 v7, s59, 63
+; CHECK-NEXT: v_writelane_b32 v6, s67, 7
; CHECK-NEXT: s_andn2_saveexec_b64 s[20:21], s[26:27]
; CHECK-NEXT: s_cbranch_execz .LBB0_10
; CHECK-NEXT: ; %bb.4: ; %bb32
@@ -219,68 +219,68 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: s_mov_b32 s8, 0
; CHECK-NEXT: s_mov_b32 s9, s8
; CHECK-NEXT: v_mov_b32_e32 v0, s8
-; CHECK-NEXT: v_readlane_b32 s36, v4, 0
+; CHECK-NEXT: v_readlane_b32 s36, v7, 0
; CHECK-NEXT: v_mov_b32_e32 v1, s9
; CHECK-NEXT: s_mov_b32 s10, s8
; CHECK-NEXT: s_mov_b32 s11, s8
-; CHECK-NEXT: v_readlane_b32 s37, v4, 1
-; CHECK-NEXT: v_readlane_b32 s38, v4, 2
-; CHECK-NEXT: v_readlane_b32 s39, v4, 3
-; CHECK-NEXT: v_readlane_b32 s40, v4, 4
-; CHECK-NEXT: v_readlane_b32 s41, v4, 5
-; CHECK-NEXT: v_readlane_b32 s42, v4, 6
-; CHECK-NEXT: v_readlane_b32 s43, v4, 7
-; CHECK-NEXT: v_readlane_b32 s44, v4, 8
-; CHECK-NEXT: v_readlane_b32 s45, v4, 9
-; CHECK-NEXT: v_readlane_b32 s46, v4, 10
-; CHECK-NEXT: v_readlane_b32 s47, v4, 11
-; CHECK-NEXT: v_readlane_b32 s48, v4, 12
-; CHECK-NEXT: v_readlane_b32 s49, v4, 13
-; CHECK-NEXT: v_readlane_b32 s50, v4, 14
-; CHECK-NEXT: v_readlane_b32 s51, v4, 15
-; CHECK-NEXT: image_sample_lz v5, v[0:1], s[36:43], s[8:11] dmask:0x1
-; CHECK-NEXT: v_readlane_b32 s36, v4, 16
-; CHECK-NEXT: v_readlane_b32 s44, v4, 24
-; CHECK-NEXT: v_readlane_b32 s45, v4, 25
-; CHECK-NEXT: v_readlane_b32 s46, v4, 26
-; CHECK-NEXT: v_readlane_b32 s47, v4, 27
-; CHECK-NEXT: v_readlane_b32 s48, v4, 28
-; CHECK-NEXT: v_readlane_b32 s49, v4, 29
-; CHECK-NEXT: v_readlane_b32 s50, v4, 30
-; CHECK-NEXT: v_readlane_b32 s51, v4, 31
-; CHECK-NEXT: v_mov_b32_e32 v6, 0
-; CHECK-NEXT: v_mov_b32_e32 v7, v6
-; CHECK-NEXT: v_readlane_b32 s37, v4, 17
-; CHECK-NEXT: v_readlane_b32 s38, v4, 18
-; CHECK-NEXT: v_readlane_b32 s39, v4, 19
+; CHECK-NEXT: v_readlane_b32 s37, v7, 1
+; CHECK-NEXT: v_readlane_b32 s38, v7, 2
+; CHECK-NEXT: v_readlane_b32 s39, v7, 3
+; CHECK-NEXT: v_readlane_b32 s40, v7, 4
+; CHECK-NEXT: v_readlane_b32 s41, v7, 5
+; CHECK-NEXT: v_readlane_b32 s42, v7, 6
+; CHECK-NEXT: v_readlane_b32 s43, v7, 7
+; CHECK-NEXT: v_readlane_b32 s44, v7, 8
+; CHECK-NEXT: v_readlane_b32 s45, v7, 9
+; CHECK-NEXT: v_readlane_b32 s46, v7, 10
+; CHECK-NEXT: v_readlane_b32 s47, v7, 11
+; CHECK-NEXT: v_readlane_b32 s48, v7, 12
+; CHECK-NEXT: v_readlane_b32 s49, v7, 13
+; CHECK-NEXT: v_readlane_b32 s50, v7, 14
+; CHECK-NEXT: v_readlane_b32 s51, v7, 15
+; CHECK-NEXT: image_sample_lz v2, v[0:1], s[36:43], s[8:11] dmask:0x1
+; CHECK-NEXT: v_readlane_b32 s36, v7, 16
+; CHECK-NEXT: v_readlane_b32 s44, v7, 24
+; CHECK-NEXT: v_readlane_b32 s45, v7, 25
+; CHECK-NEXT: v_readlane_b32 s46, v7, 26
+; CHECK-NEXT: v_readlane_b32 s47, v7, 27
+; CHECK-NEXT: v_readlane_b32 s48, v7, 28
+; CHECK-NEXT: v_readlane_b32 s49, v7, 29
+; CHECK-NEXT: v_readlane_b32 s50, v7, 30
+; CHECK-NEXT: v_readlane_b32 s51, v7, 31
+; CHECK-NEXT: v_mov_b32_e32 v3, 0
+; CHECK-NEXT: v_mov_b32_e32 v4, v3
+; CHECK-NEXT: v_readlane_b32 s37, v7, 17
+; CHECK-NEXT: v_readlane_b32 s38, v7, 18
+; CHECK-NEXT: v_readlane_b32 s39, v7, 19
; CHECK-NEXT: image_sample_lz v0, v[0:1], s[44:51], s[12:15] dmask:0x1
-; CHECK-NEXT: v_readlane_b32 s40, v4, 20
-; CHECK-NEXT: v_readlane_b32 s41, v4, 21
-; CHECK-NEXT: v_readlane_b32 s42, v4, 22
-; CHECK-NEXT: v_readlane_b32 s43, v4, 23
+; CHECK-NEXT: v_readlane_b32 s40, v7, 20
+; CHECK-NEXT: v_readlane_b32 s41, v7, 21
+; CHECK-NEXT: v_readlane_b32 s42, v7, 22
+; CHECK-NEXT: v_readlane_b32 s43, v7, 23
; CHECK-NEXT: s_waitcnt vmcnt(1)
-; CHECK-NEXT: buffer_store_dwordx3 v[5:7], off, s[8:11], 0
+; CHECK-NEXT: buffer_store_dwordx3 v[2:4], off, s[8:11], 0
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
; CHECK-NEXT: ; implicit-def: $vgpr0
; CHECK-NEXT: .LBB0_6: ; %Flow12
; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[22:23]
-; CHECK-NEXT: v_readlane_b32 s52, v4, 40
-; CHECK-NEXT: v_readlane_b32 s53, v4, 41
-; CHECK-NEXT: v_readlane_b32 s54, v4, 42
-; CHECK-NEXT: v_readlane_b32 s55, v4, 43
-; CHECK-NEXT: v_readlane_b32 s56, v4, 44
-; CHECK-NEXT: v_readlane_b32 s57, v4, 45
-; CHECK-NEXT: v_readlane_b32 s58, v4, 46
-; CHECK-NEXT: v_readlane_b32 s59, v4, 47
-; CHECK-NEXT: v_readlane_b32 s60, v4, 48
-; CHECK-NEXT: v_readlane_b32 s61, v4, 49
-; CHECK-NEXT: v_readlane_b32 s62, v4, 50
-; CHECK-NEXT: v_readlane_b32 s63, v4, 51
-; CHECK-NEXT: v_readlane_b32 s64, v4, 52
-; CHECK-NEXT: v_readlane_b32 s65, v4, 53
-; CHECK-NEXT: v_readlane_b32 s66, v4, 54
-; CHECK-NEXT: v_readlane_b32 s67, v4, 55
+; CHECK-NEXT: v_readlane_b32 s52, v7, 40
+; CHECK-NEXT: v_readlane_b32 s53, v7, 41
+; CHECK-NEXT: v_readlane_b32 s54, v7, 42
+; CHECK-NEXT: v_readlane_b32 s55, v7, 43
+; CHECK-NEXT: v_readlane_b32 s56, v7, 44
+; CHECK-NEXT: v_readlane_b32 s57, v7, 45
+; CHECK-NEXT: v_readlane_b32 s58, v7, 46
+; CHECK-NEXT: v_readlane_b32 s59, v7, 47
+; CHECK-NEXT: v_readlane_b32 s60, v7, 48
+; CHECK-NEXT: v_readlane_b32 s61, v7, 49
+; CHECK-NEXT: v_readlane_b32 s62, v7, 50
+; CHECK-NEXT: v_readlane_b32 s63, v7, 51
+; CHECK-NEXT: v_readlane_b32 s64, v7, 52
+; CHECK-NEXT: v_readlane_b32 s65, v7, 53
+; CHECK-NEXT: v_readlane_b32 s66, v7, 54
+; CHECK-NEXT: v_readlane_b32 s67, v7, 55
; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5]
; CHECK-NEXT: s_cbranch_execz .LBB0_9
; CHECK-NEXT: ; %bb.7: ; %bb33.preheader
@@ -288,32 +288,32 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: s_mov_b32 s6, s8
; CHECK-NEXT: s_mov_b32 s7, s8
; CHECK-NEXT: v_mov_b32_e32 v1, s6
-; CHECK-NEXT: v_readlane_b32 s36, v4, 56
+; CHECK-NEXT: v_readlane_b32 s36, v7, 56
; CHECK-NEXT: s_mov_b32 s9, s8
; CHECK-NEXT: s_mov_b32 s10, s8
; CHECK-NEXT: s_mov_b32 s11, s8
; CHECK-NEXT: v_mov_b32_e32 v2, s7
-; CHECK-NEXT: v_readlane_b32 s37, v4, 57
-; CHECK-NEXT: v_readlane_b32 s38, v4, 58
-; CHECK-NEXT: v_readlane_b32 s39, v4, 59
-; CHECK-NEXT: v_readlane_b32 s40, v4, 60
-; CHECK-NEXT: v_readlane_b32 s41, v4, 61
-; CHECK-NEXT: v_readlane_b32 s42, v4, 62
-; CHECK-NEXT: v_readlane_b32 s43, v4, 63
+; CHECK-NEXT: v_readlane_b32 s37, v7, 57
+; CHECK-NEXT: v_readlane_b32 s38, v7, 58
+; CHECK-NEXT: v_readlane_b32 s39, v7, 59
+; CHECK-NEXT: v_readlane_b32 s40, v7, 60
+; CHECK-NEXT: v_readlane_b32 s41, v7, 61
+; CHECK-NEXT: v_readlane_b32 s42, v7, 62
+; CHECK-NEXT: v_readlane_b32 s43, v7, 63
; CHECK-NEXT: s_nop 4
-; CHECK-NEXT: image_sample_lz v5, v[1:2], s[36:43], s[8:11] dmask:0x1
-; CHECK-NEXT: image_sample_lz v6, v[1:2], s[52:59], s[8:11] dmask:0x1
+; CHECK-NEXT: image_sample_lz v3, v[1:2], s[36:43], s[8:11] dmask:0x1
+; CHECK-NEXT: image_sample_lz v4, v[1:2], s[52:59], s[8:11] dmask:0x1
; CHECK-NEXT: ; kill: killed $vgpr1_vgpr2
; CHECK-NEXT: s_mov_b64 s[12:13], s[36:37]
; CHECK-NEXT: s_and_b64 vcc, exec, 0
-; CHECK-NEXT: v_readlane_b32 s44, v3, 0
-; CHECK-NEXT: v_readlane_b32 s45, v3, 1
-; CHECK-NEXT: v_readlane_b32 s46, v3, 2
-; CHECK-NEXT: v_readlane_b32 s47, v3, 3
-; CHECK-NEXT: v_readlane_b32 s48, v3, 4
-; CHECK-NEXT: v_readlane_b32 s49, v3, 5
-; CHECK-NEXT: v_readlane_b32 s50, v3, 6
-; CHECK-NEXT: v_readlane_b32 s51, v3, 7
+; CHECK-NEXT: v_readlane_b32 s44, v6, 0
+; CHECK-NEXT: v_readlane_b32 s45, v6, 1
+; CHECK-NEXT: v_readlane_b32 s46, v6, 2
+; CHECK-NEXT: v_readlane_b32 s47, v6, 3
+; CHECK-NEXT: v_readlane_b32 s48, v6, 4
+; CHECK-NEXT: v_readlane_b32 s49, v6, 5
+; CHECK-NEXT: v_readlane_b32 s50, v6, 6
+; CHECK-NEXT: v_readlane_b32 s51, v6, 7
; CHECK-NEXT: s_mov_b64 s[14:15], s[38:39]
; CHECK-NEXT: s_mov_b64 s[16:17], s[40:41]
; CHECK-NEXT: s_mov_b64 s[18:19], s[42:43]
@@ -321,7 +321,7 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: ; kill: killed $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59
; CHECK-NEXT: ; kill: killed $sgpr8_sgpr9_sgpr10 killed $sgpr11
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_sub_f32_e32 v1, v6, v5
+; CHECK-NEXT: v_sub_f32_e32 v1, v4, v3
; CHECK-NEXT: v_mul_f32_e32 v0, v1, v0
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: .LBB0_8: ; %bb33
@@ -334,46 +334,44 @@ define void @main(i1 %arg) #0 {
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
; CHECK-NEXT: .LBB0_10: ; %UnifiedReturnBlock
; CHECK-NEXT: s_or_b64 exec, exec, s[20:21]
-; CHECK-NEXT: v_readlane_b32 s67, v8, 33
-; CHECK-NEXT: v_readlane_b32 s66, v8, 32
-; CHECK-NEXT: v_readlane_b32 s65, v8, 31
-; CHECK-NEXT: v_readlane_b32 s64, v8, 30
-; CHECK-NEXT: v_readlane_b32 s63, v8, 29
-; CHECK-NEXT: v_readlane_b32 s62, v8, 28
-; CHECK-NEXT: v_readlane_b32 s61, v8, 27
-; CHECK-NEXT: v_readlane_b32 s60, v8, 26
-; CHECK-NEXT: v_readlane_b32 s59, v8, 25
-; CHECK-NEXT: v_readlane_b32 s58, v8, 24
-; CHECK-NEXT: v_readlane_b32 s57, v8, 23
-; CHECK-NEXT: v_readlane_b32 s56, v8, 22
-; CHECK-NEXT: v_readlane_b32 s55, v8, 21
-; CHECK-NEXT: v_readlane_b32 s54, v8, 20
-; CHECK-NEXT: v_readlane_b32 s53, v8, 19
-; CHECK-NEXT: v_readlane_b32 s52, v8, 18
-; CHECK-NEXT: v_readlane_b32 s51, v8, 17
-; CHECK-NEXT: v_readlane_b32 s50, v8, 16
-; CHECK-NEXT: v_readlane_b32 s49, v8, 15
-; CHECK-NEXT: v_readlane_b32 s48, v8, 14
-; CHECK-NEXT: v_readlane_b32 s47, v8, 13
-; CHECK-NEXT: v_readlane_b32 s46, v8, 12
-; CHECK-NEXT: v_readlane_b32 s45, v8, 11
-; CHECK-NEXT: v_readlane_b32 s44, v8, 10
-; CHECK-NEXT: v_readlane_b32 s43, v8, 9
-; CHECK-NEXT: v_readlane_b32 s42, v8, 8
-; CHECK-NEXT: v_readlane_b32 s41, v8, 7
-; CHECK-NEXT: v_readlane_b32 s40, v8, 6
-; CHECK-NEXT: v_readlane_b32 s39, v8, 5
-; CHECK-NEXT: v_readlane_b32 s38, v8, 4
-; CHECK-NEXT: v_readlane_b32 s37, v8, 3
-; CHECK-NEXT: v_readlane_b32 s36, v8, 2
-; CHECK-NEXT: v_readlane_b32 s31, v8, 1
-; CHECK-NEXT: v_readlane_b32 s30, v8, 0
-; CHECK-NEXT: ; kill: killed $vgpr4
-; CHECK-NEXT: ; kill: killed $vgpr3
+; CHECK-NEXT: v_readlane_b32 s67, v5, 33
+; CHECK-NEXT: v_readlane_b32 s66, v5, 32
+; CHECK-NEXT: v_readlane_b32 s65, v5, 31
+; CHECK-NEXT: v_readlane_b32 s64, v5, 30
+; CHECK-NEXT: v_readlane_b32 s63, v5, 29
+; CHECK-NEXT: v_readlane_b32 s62, v5, 28
+; CHECK-NEXT: v_readlane_b32 s61, v5, 27
+; CHECK-NEXT: v_readlane_b32 s60, v5, 26
+; CHECK-NEXT: v_readlane_b32 s59, v5, 25
+; CHECK-NEXT: v_readlane_b32 s58, v5, 24
+; CHECK-NEXT: v_readlane_b32 s57, v5, 23
+; CHECK-NEXT: v_readlane_b32 s56, v5, 22
+; CHECK-NEXT: v_readlane_b32 s55, v5, 21
+; CHECK-NEXT: v_readlane_b32 s54, v5, 20
+; CHECK-NEXT: v_readlane_b32 s53, v5, 19
+; CHECK-NEXT: v_readlane_b32 s52, v5, 18
+; CHECK-NEXT: v_readlane_b32 s51, v5, 17
+; CHECK-NEXT: v_readlane_b32 s50, v5, 16
+; CHECK-NEXT: v_readlane_b32 s49, v5, 15
+; CHECK-NEXT: v_readlane_b32 s48, v5, 14
+; CHECK-NEXT: v_readlane_b32 s47, v5, 13
+; CHECK-NEXT: v_readlane_b32 s46, v5, 12
+; CHECK-NEXT: v_readlane_b32 s45, v5, 11
+; CHECK-NEXT: v_readlane_b32 s44, v5, 10
+; CHECK-NEXT: v_readlane_b32 s43, v5, 9
+; CHECK-NEXT: v_readlane_b32 s42, v5, 8
+; CHECK-NEXT: v_readlane_b32 s41, v5, 7
+; CHECK-NEXT: v_readlane_b32 s40, v5, 6
+; CHECK-NEXT: v_readlane_b32 s39, v5, 5
+; CHECK-NEXT: v_readlane_b32 s38, v5, 4
+; CHECK-NEXT: v_readlane_b32 s37, v5, 3
+; CHECK-NEXT: v_readlane_b32 s36, v5, 2
+; CHECK-NEXT: v_readlane_b32 s31, v5, 1
+; CHECK-NEXT: v_readlane_b32 s30, v5, 0
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v5, off, s[0:3], s32 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir b/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
index 96fb7cfeb2775e..40089ed82b5db0 100644
--- a/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
+++ b/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: not llc -mtriple=amdgcn -mcpu=gfx900 -start-before=greedy,0 -stop-after=virtregrewriter,1 -o - 2>%t.err %s | FileCheck %s
+# RUN: not llc -mtriple=amdgcn -mcpu=gfx900 -start-before=greedy,0 -stop-after=virtregrewriter,2 -o - 2>%t.err %s | FileCheck %s
# RUN: FileCheck -check-prefix=ERR %s < %t.err
# This testcase cannot be compiled. An attempted eviction legality
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
index 60946956547a7c..f1f4abe580c002 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -1510,12 +1510,7 @@ define amdgpu_kernel void @extract_neg_offset_vgpr(ptr addrspace(1) %out) {
; NOOPT-NEXT: s_mov_b32 s23, 0xe8f000
; NOOPT-NEXT: s_add_u32 s20, s20, s9
; NOOPT-NEXT: s_addc_u32 s21, s21, 0
-; NOOPT-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; NOOPT-NEXT: v_mov_b32_e32 v1, v0
-; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1
-; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_mov_b64 exec, s[16:17]
-; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:68 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:68 ; 4-byte Folded Spill
; NOOPT-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; NOOPT-NEXT: s_waitcnt lgkmcnt(0)
; NOOPT-NEXT: s_mov_b32 s6, s1
@@ -1526,11 +1521,11 @@ define amdgpu_kernel void @extract_neg_offset_vgpr(ptr addrspace(1) %out) {
; NOOPT-NEXT: s_mov_b32 s1, s6
; NOOPT-NEXT: s_mov_b32 s2, s5
; NOOPT-NEXT: s_mov_b32 s3, s4
-; NOOPT-NEXT: s_waitcnt vmcnt(1)
-; NOOPT-NEXT: v_writelane_b32 v0, s0, 0
-; NOOPT-NEXT: v_writelane_b32 v0, s1, 1
-; NOOPT-NEXT: v_writelane_b32 v0, s2, 2
-; NOOPT-NEXT: v_writelane_b32 v0, s3, 3
+; NOOPT-NEXT: ; implicit-def: $vgpr31 : SGPR spill to VGPR lane
+; NOOPT-NEXT: v_writelane_b32 v31, s0, 0
+; NOOPT-NEXT: v_writelane_b32 v31, s1, 1
+; NOOPT-NEXT: v_writelane_b32 v31, s2, 2
+; NOOPT-NEXT: v_writelane_b32 v31, s3, 3
; NOOPT-NEXT: s_mov_b32 s0, 16
; NOOPT-NEXT: s_mov_b32 s1, 15
; NOOPT-NEXT: s_mov_b32 s2, 14
@@ -1548,126 +1543,130 @@ define amdgpu_kernel void @extract_neg_offset_vgpr(ptr addrspace(1) %out) {
; NOOPT-NEXT: s_mov_b32 s14, 1
; NOOPT-NEXT: s_mov_b32 s15, 0
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: v_mov_b32_e32 v1, s15
-; NOOPT-NEXT: v_mov_b32_e32 v31, s14
-; NOOPT-NEXT: v_mov_b32_e32 v30, s13
-; NOOPT-NEXT: v_mov_b32_e32 v29, s12
-; NOOPT-NEXT: v_mov_b32_e32 v28, s11
-; NOOPT-NEXT: v_mov_b32_e32 v27, s10
-; NOOPT-NEXT: v_mov_b32_e32 v26, s9
-; NOOPT-NEXT: v_mov_b32_e32 v25, s8
-; NOOPT-NEXT: v_mov_b32_e32 v24, s7
-; NOOPT-NEXT: v_mov_b32_e32 v23, s6
-; NOOPT-NEXT: v_mov_b32_e32 v22, s5
-; NOOPT-NEXT: v_mov_b32_e32 v21, s4
-; NOOPT-NEXT: v_mov_b32_e32 v20, s3
-; NOOPT-NEXT: v_mov_b32_e32 v19, s2
-; NOOPT-NEXT: v_mov_b32_e32 v18, s1
-; NOOPT-NEXT: v_mov_b32_e32 v17, s0
-; NOOPT-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 killed $exec
-; NOOPT-NEXT: v_mov_b32_e32 v2, v31
-; NOOPT-NEXT: v_mov_b32_e32 v3, v30
-; NOOPT-NEXT: v_mov_b32_e32 v4, v29
-; NOOPT-NEXT: v_mov_b32_e32 v5, v28
-; NOOPT-NEXT: v_mov_b32_e32 v6, v27
-; NOOPT-NEXT: v_mov_b32_e32 v7, v26
-; NOOPT-NEXT: v_mov_b32_e32 v8, v25
-; NOOPT-NEXT: v_mov_b32_e32 v9, v24
-; NOOPT-NEXT: v_mov_b32_e32 v10, v23
-; NOOPT-NEXT: v_mov_b32_e32 v11, v22
-; NOOPT-NEXT: v_mov_b32_e32 v12, v21
-; NOOPT-NEXT: v_mov_b32_e32 v13, v20
-; NOOPT-NEXT: v_mov_b32_e32 v14, v19
-; NOOPT-NEXT: v_mov_b32_e32 v15, v18
-; NOOPT-NEXT: v_mov_b32_e32 v16, v17
-; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill
+; NOOPT-NEXT: v_mov_b32_e32 v0, s15
+; NOOPT-NEXT: v_mov_b32_e32 v30, s14
+; NOOPT-NEXT: v_mov_b32_e32 v29, s13
+; NOOPT-NEXT: v_mov_b32_e32 v28, s12
+; NOOPT-NEXT: v_mov_b32_e32 v27, s11
+; NOOPT-NEXT: v_mov_b32_e32 v26, s10
+; NOOPT-NEXT: v_mov_b32_e32 v25, s9
+; NOOPT-NEXT: v_mov_b32_e32 v24, s8
+; NOOPT-NEXT: v_mov_b32_e32 v23, s7
+; NOOPT-NEXT: v_mov_b32_e32 v22, s6
+; NOOPT-NEXT: v_mov_b32_e32 v21, s5
+; NOOPT-NEXT: v_mov_b32_e32 v20, s4
+; NOOPT-NEXT: v_mov_b32_e32 v19, s3
+; NOOPT-NEXT: v_mov_b32_e32 v18, s2
+; NOOPT-NEXT: v_mov_b32_e32 v17, s1
+; NOOPT-NEXT: v_mov_b32_e32 v16, s0
+; NOOPT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 killed $exec
+; NOOPT-NEXT: v_mov_b32_e32 v1, v30
+; NOOPT-NEXT: v_mov_b32_e32 v2, v29
+; NOOPT-NEXT: v_mov_b32_e32 v3, v28
+; NOOPT-NEXT: v_mov_b32_e32 v4, v27
+; NOOPT-NEXT: v_mov_b32_e32 v5, v26
+; NOOPT-NEXT: v_mov_b32_e32 v6, v25
+; NOOPT-NEXT: v_mov_b32_e32 v7, v24
+; NOOPT-NEXT: v_mov_b32_e32 v8, v23
+; NOOPT-NEXT: v_mov_b32_e32 v9, v22
+; NOOPT-NEXT: v_mov_b32_e32 v10, v21
+; NOOPT-NEXT: v_mov_b32_e32 v11, v20
+; NOOPT-NEXT: v_mov_b32_e32 v12, v19
+; NOOPT-NEXT: v_mov_b32_e32 v13, v18
+; NOOPT-NEXT: v_mov_b32_e32 v14, v17
+; NOOPT-NEXT: v_mov_b32_e32 v15, v16
+; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 s[0:1], exec
-; NOOPT-NEXT: v_writelane_b32 v0, s0, 4
-; NOOPT-NEXT: v_writelane_b32 v0, s1, 5
+; NOOPT-NEXT: v_writelane_b32 v31, s0, 4
+; NOOPT-NEXT: v_writelane_b32 v31, s1, 5
; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v31, off, s[20:23], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[16:17]
; NOOPT-NEXT: ; implicit-def: $vgpr0
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: .LBB5_1: ; =>This Inner Loop Header: Depth=1
+; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:72 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(1)
+; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:12 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v3, off, s[20:23], 0 offset:16 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v4, off, s[20:23], 0 offset:20 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v5, off, s[20:23], 0 offset:24 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v6, off, s[20:23], 0 offset:28 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v7, off, s[20:23], 0 offset:32 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v8, off, s[20:23], 0 offset:36 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v9, off, s[20:23], 0 offset:40 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(6)
+; NOOPT-NEXT: buffer_load_dword v10, off, s[20:23], 0 offset:44 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(5)
+; NOOPT-NEXT: buffer_load_dword v11, off, s[20:23], 0 offset:48 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(4)
+; NOOPT-NEXT: buffer_load_dword v12, off, s[20:23], 0 offset:52 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(3)
+; NOOPT-NEXT: buffer_load_dword v13, off, s[20:23], 0 offset:56 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(2)
+; NOOPT-NEXT: buffer_load_dword v14, off, s[20:23], 0 offset:60 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(1)
+; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload
; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[16:17]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 6
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 7
-; NOOPT-NEXT: buffer_load_dword v18, off, s[20:23], 0 offset:72 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v3, off, s[20:23], 0 offset:12 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v4, off, s[20:23], 0 offset:16 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v5, off, s[20:23], 0 offset:20 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v6, off, s[20:23], 0 offset:24 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v7, off, s[20:23], 0 offset:28 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v8, off, s[20:23], 0 offset:32 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v9, off, s[20:23], 0 offset:36 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v10, off, s[20:23], 0 offset:40 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v11, off, s[20:23], 0 offset:44 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v12, off, s[20:23], 0 offset:48 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v13, off, s[20:23], 0 offset:52 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v14, off, s[20:23], 0 offset:56 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:60 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readfirstlane_b32 s2, v17
-; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17
+; NOOPT-NEXT: v_readlane_b32 s0, v31, 6
+; NOOPT-NEXT: v_readlane_b32 s1, v31, 7
+; NOOPT-NEXT: v_readfirstlane_b32 s2, v16
+; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v16
; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1]
; NOOPT-NEXT: s_add_i32 m0, s2, 0xfffffe00
-; NOOPT-NEXT: v_movrels_b32_e32 v1, v1
-; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:76 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:72 ; 4-byte Folded Spill
+; NOOPT-NEXT: v_movrels_b32_e32 v0, v0
+; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:76 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:72 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 s[2:3], s[0:1]
-; NOOPT-NEXT: v_writelane_b32 v0, s2, 6
-; NOOPT-NEXT: v_writelane_b32 v0, s3, 7
+; NOOPT-NEXT: v_writelane_b32 v31, s2, 6
+; NOOPT-NEXT: v_writelane_b32 v31, s3, 7
; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v31, off, s[20:23], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[16:17]
; NOOPT-NEXT: s_xor_b64 exec, exec, s[0:1]
; NOOPT-NEXT: s_cbranch_execnz .LBB5_1
; NOOPT-NEXT: ; %bb.2:
; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[16:17]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 4
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 5
+; NOOPT-NEXT: v_readlane_b32 s0, v31, 4
+; NOOPT-NEXT: v_readlane_b32 s1, v31, 5
; NOOPT-NEXT: s_mov_b64 exec, s[0:1]
; NOOPT-NEXT: ; %bb.3:
+; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:76 ; 4-byte Folded Reload
; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1
-; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[16:17]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 0
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 1
-; NOOPT-NEXT: v_readlane_b32 s2, v0, 2
-; NOOPT-NEXT: v_readlane_b32 s3, v0, 3
-; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 offset:76 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: buffer_store_dword v1, off, s[0:3], 0
-; NOOPT-NEXT: ; kill: killed $vgpr0
+; NOOPT-NEXT: v_readlane_b32 s0, v31, 0
+; NOOPT-NEXT: v_readlane_b32 s1, v31, 1
+; NOOPT-NEXT: v_readlane_b32 s2, v31, 2
+; NOOPT-NEXT: v_readlane_b32 s3, v31, 3
+; NOOPT-NEXT: buffer_store_dword v0, off, s[0:3], 0
; NOOPT-NEXT: s_endpgm
;
; SI-MOVREL-LABEL: extract_neg_offset_vgpr:
@@ -4022,7 +4021,6 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr
; NOOPT-NEXT: s_mov_b32 s23, 0xe8f000
; NOOPT-NEXT: s_add_u32 s20, s20, s9
; NOOPT-NEXT: s_addc_u32 s21, s21, 0
-; NOOPT-NEXT: ; implicit-def: $vgpr16 : SGPR spill to VGPR lane
; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:136 ; 4-byte Folded Spill
; NOOPT-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xb
; NOOPT-NEXT: s_waitcnt lgkmcnt(0)
@@ -4034,10 +4032,11 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr
; NOOPT-NEXT: s_mov_b32 s1, s6
; NOOPT-NEXT: s_mov_b32 s2, s5
; NOOPT-NEXT: s_mov_b32 s3, s4
-; NOOPT-NEXT: v_writelane_b32 v16, s0, 0
-; NOOPT-NEXT: v_writelane_b32 v16, s1, 1
-; NOOPT-NEXT: v_writelane_b32 v16, s2, 2
-; NOOPT-NEXT: v_writelane_b32 v16, s3, 3
+; NOOPT-NEXT: ; implicit-def: $vgpr31 : SGPR spill to VGPR lane
+; NOOPT-NEXT: v_writelane_b32 v31, s0, 0
+; NOOPT-NEXT: v_writelane_b32 v31, s1, 1
+; NOOPT-NEXT: v_writelane_b32 v31, s2, 2
+; NOOPT-NEXT: v_writelane_b32 v31, s3, 3
; NOOPT-NEXT: s_mov_b32 s0, 16
; NOOPT-NEXT: s_mov_b32 s1, 15
; NOOPT-NEXT: s_mov_b32 s2, 14
@@ -4056,37 +4055,37 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr
; NOOPT-NEXT: s_mov_b32 s15, 1
; NOOPT-NEXT: s_waitcnt expcnt(0)
; NOOPT-NEXT: v_mov_b32_e32 v0, s15
-; NOOPT-NEXT: v_mov_b32_e32 v31, s14
-; NOOPT-NEXT: v_mov_b32_e32 v30, s13
-; NOOPT-NEXT: v_mov_b32_e32 v29, s12
-; NOOPT-NEXT: v_mov_b32_e32 v28, s11
-; NOOPT-NEXT: v_mov_b32_e32 v27, s10
-; NOOPT-NEXT: v_mov_b32_e32 v26, s9
-; NOOPT-NEXT: v_mov_b32_e32 v25, s8
-; NOOPT-NEXT: v_mov_b32_e32 v24, s7
-; NOOPT-NEXT: v_mov_b32_e32 v23, s6
-; NOOPT-NEXT: v_mov_b32_e32 v22, s5
-; NOOPT-NEXT: v_mov_b32_e32 v21, s4
-; NOOPT-NEXT: v_mov_b32_e32 v20, s3
-; NOOPT-NEXT: v_mov_b32_e32 v19, s2
-; NOOPT-NEXT: v_mov_b32_e32 v18, s1
-; NOOPT-NEXT: v_mov_b32_e32 v17, s0
+; NOOPT-NEXT: v_mov_b32_e32 v30, s14
+; NOOPT-NEXT: v_mov_b32_e32 v29, s13
+; NOOPT-NEXT: v_mov_b32_e32 v28, s12
+; NOOPT-NEXT: v_mov_b32_e32 v27, s11
+; NOOPT-NEXT: v_mov_b32_e32 v26, s10
+; NOOPT-NEXT: v_mov_b32_e32 v25, s9
+; NOOPT-NEXT: v_mov_b32_e32 v24, s8
+; NOOPT-NEXT: v_mov_b32_e32 v23, s7
+; NOOPT-NEXT: v_mov_b32_e32 v22, s6
+; NOOPT-NEXT: v_mov_b32_e32 v21, s5
+; NOOPT-NEXT: v_mov_b32_e32 v20, s4
+; NOOPT-NEXT: v_mov_b32_e32 v19, s3
+; NOOPT-NEXT: v_mov_b32_e32 v18, s2
+; NOOPT-NEXT: v_mov_b32_e32 v17, s1
+; NOOPT-NEXT: v_mov_b32_e32 v16, s0
; NOOPT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 killed $exec
-; NOOPT-NEXT: v_mov_b32_e32 v1, v31
-; NOOPT-NEXT: v_mov_b32_e32 v2, v30
-; NOOPT-NEXT: v_mov_b32_e32 v3, v29
-; NOOPT-NEXT: v_mov_b32_e32 v4, v28
-; NOOPT-NEXT: v_mov_b32_e32 v5, v27
-; NOOPT-NEXT: v_mov_b32_e32 v6, v26
-; NOOPT-NEXT: v_mov_b32_e32 v7, v25
-; NOOPT-NEXT: v_mov_b32_e32 v8, v24
-; NOOPT-NEXT: v_mov_b32_e32 v9, v23
-; NOOPT-NEXT: v_mov_b32_e32 v10, v22
-; NOOPT-NEXT: v_mov_b32_e32 v11, v21
-; NOOPT-NEXT: v_mov_b32_e32 v12, v20
-; NOOPT-NEXT: v_mov_b32_e32 v13, v19
-; NOOPT-NEXT: v_mov_b32_e32 v14, v18
-; NOOPT-NEXT: v_mov_b32_e32 v15, v17
+; NOOPT-NEXT: v_mov_b32_e32 v1, v30
+; NOOPT-NEXT: v_mov_b32_e32 v2, v29
+; NOOPT-NEXT: v_mov_b32_e32 v3, v28
+; NOOPT-NEXT: v_mov_b32_e32 v4, v27
+; NOOPT-NEXT: v_mov_b32_e32 v5, v26
+; NOOPT-NEXT: v_mov_b32_e32 v6, v25
+; NOOPT-NEXT: v_mov_b32_e32 v7, v24
+; NOOPT-NEXT: v_mov_b32_e32 v8, v23
+; NOOPT-NEXT: v_mov_b32_e32 v9, v22
+; NOOPT-NEXT: v_mov_b32_e32 v10, v21
+; NOOPT-NEXT: v_mov_b32_e32 v11, v20
+; NOOPT-NEXT: v_mov_b32_e32 v12, v19
+; NOOPT-NEXT: v_mov_b32_e32 v13, v18
+; NOOPT-NEXT: v_mov_b32_e32 v14, v17
+; NOOPT-NEXT: v_mov_b32_e32 v15, v16
; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:72 ; 4-byte Folded Spill
; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:76 ; 4-byte Folded Spill
; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:80 ; 4-byte Folded Spill
@@ -4103,202 +4102,195 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr
; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:124 ; 4-byte Folded Spill
; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:128 ; 4-byte Folded Spill
; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:132 ; 4-byte Folded Spill
-; NOOPT-NEXT: v_mov_b32_e32 v17, 33
-; NOOPT-NEXT: buffer_store_dword v17, off, s[20:23], 0 offset:68 ; 4-byte Folded Spill
+; NOOPT-NEXT: v_mov_b32_e32 v16, 33
+; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:68 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 s[0:1], exec
-; NOOPT-NEXT: v_writelane_b32 v16, s0, 4
-; NOOPT-NEXT: v_writelane_b32 v16, s1, 5
+; NOOPT-NEXT: v_writelane_b32 v31, s0, 4
+; NOOPT-NEXT: v_writelane_b32 v31, s1, 5
; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1
-; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v31, off, s[20:23], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[16:17]
-; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: .LBB14_1: ; =>This Inner Loop Header: Depth=1
-; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1
-; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_mov_b64 exec, s[16:17]
-; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 6
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 7
-; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v4, off, s[20:23], 0 offset:12 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v5, off, s[20:23], 0 offset:16 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v6, off, s[20:23], 0 offset:20 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v7, off, s[20:23], 0 offset:24 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v8, off, s[20:23], 0 offset:28 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:12 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v3, off, s[20:23], 0 offset:16 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v4, off, s[20:23], 0 offset:20 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v5, off, s[20:23], 0 offset:24 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v6, off, s[20:23], 0 offset:28 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v7, off, s[20:23], 0 offset:32 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v8, off, s[20:23], 0 offset:36 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(6)
-; NOOPT-NEXT: buffer_load_dword v9, off, s[20:23], 0 offset:32 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v9, off, s[20:23], 0 offset:40 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(5)
-; NOOPT-NEXT: buffer_load_dword v10, off, s[20:23], 0 offset:36 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v10, off, s[20:23], 0 offset:44 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(4)
-; NOOPT-NEXT: buffer_load_dword v11, off, s[20:23], 0 offset:40 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v11, off, s[20:23], 0 offset:48 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(3)
-; NOOPT-NEXT: buffer_load_dword v12, off, s[20:23], 0 offset:44 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v12, off, s[20:23], 0 offset:52 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(2)
-; NOOPT-NEXT: buffer_load_dword v13, off, s[20:23], 0 offset:48 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v13, off, s[20:23], 0 offset:56 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(1)
-; NOOPT-NEXT: buffer_load_dword v14, off, s[20:23], 0 offset:52 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v14, off, s[20:23], 0 offset:60 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:56 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:60 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v18, off, s[20:23], 0 offset:136 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:136 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1
+; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_mov_b64 exec, s[16:17]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readfirstlane_b32 s2, v18
-; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v18
+; NOOPT-NEXT: v_readlane_b32 s0, v31, 6
+; NOOPT-NEXT: v_readlane_b32 s1, v31, 7
+; NOOPT-NEXT: v_readfirstlane_b32 s2, v17
+; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17
; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1]
; NOOPT-NEXT: s_add_i32 m0, s2, 0xfffffe00
-; NOOPT-NEXT: v_movreld_b32_e32 v1, v17
-; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:140 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:144 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:148 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:152 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:156 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:160 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:164 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:168 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:172 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:176 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:180 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:184 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:188 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:192 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:196 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:200 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill
+; NOOPT-NEXT: v_movreld_b32_e32 v0, v16
+; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:140 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:144 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:148 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:152 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:156 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:160 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:164 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:168 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:172 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:176 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:180 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:184 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:188 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:192 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:196 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:200 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 s[2:3], s[0:1]
-; NOOPT-NEXT: v_writelane_b32 v0, s2, 6
-; NOOPT-NEXT: v_writelane_b32 v0, s3, 7
+; NOOPT-NEXT: v_writelane_b32 v31, s2, 6
+; NOOPT-NEXT: v_writelane_b32 v31, s3, 7
; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v31, off, s[20:23], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[16:17]
; NOOPT-NEXT: s_xor_b64 exec, exec, s[0:1]
; NOOPT-NEXT: s_cbranch_execnz .LBB14_1
; NOOPT-NEXT: ; %bb.2:
; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[16:17]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 4
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 5
+; NOOPT-NEXT: v_readlane_b32 s0, v31, 4
+; NOOPT-NEXT: v_readlane_b32 s1, v31, 5
; NOOPT-NEXT: s_mov_b64 exec, s[0:1]
; NOOPT-NEXT: ; %bb.3:
+; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:140 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:144 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:148 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v18, off, s[20:23], 0 offset:152 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v19, off, s[20:23], 0 offset:156 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v20, off, s[20:23], 0 offset:160 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v21, off, s[20:23], 0 offset:164 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v22, off, s[20:23], 0 offset:168 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v23, off, s[20:23], 0 offset:172 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v24, off, s[20:23], 0 offset:176 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v25, off, s[20:23], 0 offset:180 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v26, off, s[20:23], 0 offset:184 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v27, off, s[20:23], 0 offset:188 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v28, off, s[20:23], 0 offset:192 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v29, off, s[20:23], 0 offset:196 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v30, off, s[20:23], 0 offset:200 ; 4-byte Folded Reload
; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1
-; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[16:17]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 0
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 1
-; NOOPT-NEXT: v_readlane_b32 s2, v0, 2
-; NOOPT-NEXT: v_readlane_b32 s3, v0, 3
-; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:140 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:144 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v18, off, s[20:23], 0 offset:148 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v19, off, s[20:23], 0 offset:152 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v20, off, s[20:23], 0 offset:156 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v21, off, s[20:23], 0 offset:160 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v22, off, s[20:23], 0 offset:164 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v23, off, s[20:23], 0 offset:168 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v24, off, s[20:23], 0 offset:172 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v25, off, s[20:23], 0 offset:176 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v26, off, s[20:23], 0 offset:180 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v27, off, s[20:23], 0 offset:184 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v28, off, s[20:23], 0 offset:188 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v29, off, s[20:23], 0 offset:192 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v30, off, s[20:23], 0 offset:196 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 offset:200 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_waitcnt vmcnt(12)
-; NOOPT-NEXT: v_mov_b32_e32 v5, v19
-; NOOPT-NEXT: v_mov_b32_e32 v6, v18
-; NOOPT-NEXT: v_mov_b32_e32 v7, v17
-; NOOPT-NEXT: v_mov_b32_e32 v1, v16
-; NOOPT-NEXT: s_waitcnt vmcnt(8)
-; NOOPT-NEXT: v_mov_b32_e32 v2, v23
-; NOOPT-NEXT: v_mov_b32_e32 v3, v22
-; NOOPT-NEXT: v_mov_b32_e32 v4, v21
-; NOOPT-NEXT: v_mov_b32_e32 v8, v20
-; NOOPT-NEXT: s_waitcnt vmcnt(4)
-; NOOPT-NEXT: v_mov_b32_e32 v13, v27
-; NOOPT-NEXT: v_mov_b32_e32 v14, v26
-; NOOPT-NEXT: v_mov_b32_e32 v15, v25
-; NOOPT-NEXT: v_mov_b32_e32 v9, v24
-; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_mov_b32_e32 v10, v31
-; NOOPT-NEXT: v_mov_b32_e32 v11, v30
-; NOOPT-NEXT: v_mov_b32_e32 v12, v29
-; NOOPT-NEXT: v_mov_b32_e32 v16, v28
+; NOOPT-NEXT: v_readlane_b32 s0, v31, 0
+; NOOPT-NEXT: v_readlane_b32 s1, v31, 1
+; NOOPT-NEXT: v_readlane_b32 s2, v31, 2
+; NOOPT-NEXT: v_readlane_b32 s3, v31, 3
+; NOOPT-NEXT: v_mov_b32_e32 v4, v18
+; NOOPT-NEXT: v_mov_b32_e32 v5, v17
+; NOOPT-NEXT: v_mov_b32_e32 v6, v16
+; NOOPT-NEXT: v_mov_b32_e32 v0, v15
+; NOOPT-NEXT: v_mov_b32_e32 v1, v22
+; NOOPT-NEXT: v_mov_b32_e32 v2, v21
+; NOOPT-NEXT: v_mov_b32_e32 v3, v20
+; NOOPT-NEXT: v_mov_b32_e32 v7, v19
+; NOOPT-NEXT: v_mov_b32_e32 v12, v26
+; NOOPT-NEXT: v_mov_b32_e32 v13, v25
+; NOOPT-NEXT: v_mov_b32_e32 v14, v24
+; NOOPT-NEXT: v_mov_b32_e32 v8, v23
+; NOOPT-NEXT: v_mov_b32_e32 v9, v30
+; NOOPT-NEXT: v_mov_b32_e32 v10, v29
+; NOOPT-NEXT: v_mov_b32_e32 v11, v28
+; NOOPT-NEXT: v_mov_b32_e32 v15, v27
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
-; NOOPT-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17_vgpr18_vgpr19 killed $exec
-; NOOPT-NEXT: v_mov_b32_e32 v17, v12
-; NOOPT-NEXT: v_mov_b32_e32 v18, v11
-; NOOPT-NEXT: v_mov_b32_e32 v19, v10
-; NOOPT-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
+; NOOPT-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16_vgpr17_vgpr18 killed $exec
+; NOOPT-NEXT: v_mov_b32_e32 v16, v11
+; NOOPT-NEXT: v_mov_b32_e32 v17, v10
+; NOOPT-NEXT: v_mov_b32_e32 v18, v9
+; NOOPT-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:48
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
-; NOOPT-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10_vgpr11_vgpr12 killed $exec
-; NOOPT-NEXT: v_mov_b32_e32 v10, v15
-; NOOPT-NEXT: v_mov_b32_e32 v11, v14
-; NOOPT-NEXT: v_mov_b32_e32 v12, v13
-; NOOPT-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:32
+; NOOPT-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec
+; NOOPT-NEXT: v_mov_b32_e32 v9, v14
+; NOOPT-NEXT: v_mov_b32_e32 v10, v13
+; NOOPT-NEXT: v_mov_b32_e32 v11, v12
+; NOOPT-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
-; NOOPT-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec
+; NOOPT-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8_vgpr9_vgpr10 killed $exec
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: v_mov_b32_e32 v9, v4
-; NOOPT-NEXT: v_mov_b32_e32 v10, v3
-; NOOPT-NEXT: v_mov_b32_e32 v11, v2
-; NOOPT-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
+; NOOPT-NEXT: v_mov_b32_e32 v8, v3
+; NOOPT-NEXT: v_mov_b32_e32 v9, v2
+; NOOPT-NEXT: v_mov_b32_e32 v10, v1
+; NOOPT-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:16
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
-; NOOPT-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
-; NOOPT-NEXT: v_mov_b32_e32 v2, v7
-; NOOPT-NEXT: v_mov_b32_e32 v3, v6
-; NOOPT-NEXT: v_mov_b32_e32 v4, v5
-; NOOPT-NEXT: buffer_store_dwordx4 v[1:4], off, s[0:3], 0
-; NOOPT-NEXT: ; kill: killed $vgpr0
+; NOOPT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; NOOPT-NEXT: v_mov_b32_e32 v1, v6
+; NOOPT-NEXT: v_mov_b32_e32 v2, v5
+; NOOPT-NEXT: v_mov_b32_e32 v3, v4
+; NOOPT-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; NOOPT-NEXT: s_endpgm
;
; SI-MOVREL-LABEL: insert_neg_offset_vgpr:
@@ -4512,7 +4504,6 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p
; NOOPT-NEXT: s_mov_b32 s23, 0xe8f000
; NOOPT-NEXT: s_add_u32 s20, s20, s9
; NOOPT-NEXT: s_addc_u32 s21, s21, 0
-; NOOPT-NEXT: ; implicit-def: $vgpr16 : SGPR spill to VGPR lane
; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:136 ; 4-byte Folded Spill
; NOOPT-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xb
; NOOPT-NEXT: s_waitcnt lgkmcnt(0)
@@ -4524,10 +4515,11 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p
; NOOPT-NEXT: s_mov_b32 s1, s6
; NOOPT-NEXT: s_mov_b32 s2, s5
; NOOPT-NEXT: s_mov_b32 s3, s4
-; NOOPT-NEXT: v_writelane_b32 v16, s0, 0
-; NOOPT-NEXT: v_writelane_b32 v16, s1, 1
-; NOOPT-NEXT: v_writelane_b32 v16, s2, 2
-; NOOPT-NEXT: v_writelane_b32 v16, s3, 3
+; NOOPT-NEXT: ; implicit-def: $vgpr31 : SGPR spill to VGPR lane
+; NOOPT-NEXT: v_writelane_b32 v31, s0, 0
+; NOOPT-NEXT: v_writelane_b32 v31, s1, 1
+; NOOPT-NEXT: v_writelane_b32 v31, s2, 2
+; NOOPT-NEXT: v_writelane_b32 v31, s3, 3
; NOOPT-NEXT: s_mov_b32 s0, 16
; NOOPT-NEXT: s_mov_b32 s1, 15
; NOOPT-NEXT: s_mov_b32 s2, 14
@@ -4546,37 +4538,37 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p
; NOOPT-NEXT: s_mov_b32 s15, 1
; NOOPT-NEXT: s_waitcnt expcnt(0)
; NOOPT-NEXT: v_mov_b32_e32 v0, s15
-; NOOPT-NEXT: v_mov_b32_e32 v31, s14
-; NOOPT-NEXT: v_mov_b32_e32 v30, s13
-; NOOPT-NEXT: v_mov_b32_e32 v29, s12
-; NOOPT-NEXT: v_mov_b32_e32 v28, s11
-; NOOPT-NEXT: v_mov_b32_e32 v27, s10
-; NOOPT-NEXT: v_mov_b32_e32 v26, s9
-; NOOPT-NEXT: v_mov_b32_e32 v25, s8
-; NOOPT-NEXT: v_mov_b32_e32 v24, s7
-; NOOPT-NEXT: v_mov_b32_e32 v23, s6
-; NOOPT-NEXT: v_mov_b32_e32 v22, s5
-; NOOPT-NEXT: v_mov_b32_e32 v21, s4
-; NOOPT-NEXT: v_mov_b32_e32 v20, s3
-; NOOPT-NEXT: v_mov_b32_e32 v19, s2
-; NOOPT-NEXT: v_mov_b32_e32 v18, s1
-; NOOPT-NEXT: v_mov_b32_e32 v17, s0
+; NOOPT-NEXT: v_mov_b32_e32 v30, s14
+; NOOPT-NEXT: v_mov_b32_e32 v29, s13
+; NOOPT-NEXT: v_mov_b32_e32 v28, s12
+; NOOPT-NEXT: v_mov_b32_e32 v27, s11
+; NOOPT-NEXT: v_mov_b32_e32 v26, s10
+; NOOPT-NEXT: v_mov_b32_e32 v25, s9
+; NOOPT-NEXT: v_mov_b32_e32 v24, s8
+; NOOPT-NEXT: v_mov_b32_e32 v23, s7
+; NOOPT-NEXT: v_mov_b32_e32 v22, s6
+; NOOPT-NEXT: v_mov_b32_e32 v21, s5
+; NOOPT-NEXT: v_mov_b32_e32 v20, s4
+; NOOPT-NEXT: v_mov_b32_e32 v19, s3
+; NOOPT-NEXT: v_mov_b32_e32 v18, s2
+; NOOPT-NEXT: v_mov_b32_e32 v17, s1
+; NOOPT-NEXT: v_mov_b32_e32 v16, s0
; NOOPT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 killed $exec
-; NOOPT-NEXT: v_mov_b32_e32 v1, v31
-; NOOPT-NEXT: v_mov_b32_e32 v2, v30
-; NOOPT-NEXT: v_mov_b32_e32 v3, v29
-; NOOPT-NEXT: v_mov_b32_e32 v4, v28
-; NOOPT-NEXT: v_mov_b32_e32 v5, v27
-; NOOPT-NEXT: v_mov_b32_e32 v6, v26
-; NOOPT-NEXT: v_mov_b32_e32 v7, v25
-; NOOPT-NEXT: v_mov_b32_e32 v8, v24
-; NOOPT-NEXT: v_mov_b32_e32 v9, v23
-; NOOPT-NEXT: v_mov_b32_e32 v10, v22
-; NOOPT-NEXT: v_mov_b32_e32 v11, v21
-; NOOPT-NEXT: v_mov_b32_e32 v12, v20
-; NOOPT-NEXT: v_mov_b32_e32 v13, v19
-; NOOPT-NEXT: v_mov_b32_e32 v14, v18
-; NOOPT-NEXT: v_mov_b32_e32 v15, v17
+; NOOPT-NEXT: v_mov_b32_e32 v1, v30
+; NOOPT-NEXT: v_mov_b32_e32 v2, v29
+; NOOPT-NEXT: v_mov_b32_e32 v3, v28
+; NOOPT-NEXT: v_mov_b32_e32 v4, v27
+; NOOPT-NEXT: v_mov_b32_e32 v5, v26
+; NOOPT-NEXT: v_mov_b32_e32 v6, v25
+; NOOPT-NEXT: v_mov_b32_e32 v7, v24
+; NOOPT-NEXT: v_mov_b32_e32 v8, v23
+; NOOPT-NEXT: v_mov_b32_e32 v9, v22
+; NOOPT-NEXT: v_mov_b32_e32 v10, v21
+; NOOPT-NEXT: v_mov_b32_e32 v11, v20
+; NOOPT-NEXT: v_mov_b32_e32 v12, v19
+; NOOPT-NEXT: v_mov_b32_e32 v13, v18
+; NOOPT-NEXT: v_mov_b32_e32 v14, v17
+; NOOPT-NEXT: v_mov_b32_e32 v15, v16
; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:72 ; 4-byte Folded Spill
; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:76 ; 4-byte Folded Spill
; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:80 ; 4-byte Folded Spill
@@ -4593,202 +4585,195 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p
; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:124 ; 4-byte Folded Spill
; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:128 ; 4-byte Folded Spill
; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:132 ; 4-byte Folded Spill
-; NOOPT-NEXT: v_mov_b32_e32 v17, 0x1f4
-; NOOPT-NEXT: buffer_store_dword v17, off, s[20:23], 0 offset:68 ; 4-byte Folded Spill
+; NOOPT-NEXT: v_mov_b32_e32 v16, 0x1f4
+; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:68 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 s[0:1], exec
-; NOOPT-NEXT: v_writelane_b32 v16, s0, 4
-; NOOPT-NEXT: v_writelane_b32 v16, s1, 5
+; NOOPT-NEXT: v_writelane_b32 v31, s0, 4
+; NOOPT-NEXT: v_writelane_b32 v31, s1, 5
; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1
-; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v31, off, s[20:23], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[16:17]
-; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1
-; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1
-; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_mov_b64 exec, s[16:17]
-; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 6
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 7
-; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v4, off, s[20:23], 0 offset:12 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v5, off, s[20:23], 0 offset:16 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v6, off, s[20:23], 0 offset:20 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v7, off, s[20:23], 0 offset:24 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v8, off, s[20:23], 0 offset:28 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:12 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v3, off, s[20:23], 0 offset:16 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v4, off, s[20:23], 0 offset:20 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v5, off, s[20:23], 0 offset:24 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v6, off, s[20:23], 0 offset:28 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v7, off, s[20:23], 0 offset:32 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v8, off, s[20:23], 0 offset:36 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(6)
-; NOOPT-NEXT: buffer_load_dword v9, off, s[20:23], 0 offset:32 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v9, off, s[20:23], 0 offset:40 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(5)
-; NOOPT-NEXT: buffer_load_dword v10, off, s[20:23], 0 offset:36 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v10, off, s[20:23], 0 offset:44 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(4)
-; NOOPT-NEXT: buffer_load_dword v11, off, s[20:23], 0 offset:40 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v11, off, s[20:23], 0 offset:48 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(3)
-; NOOPT-NEXT: buffer_load_dword v12, off, s[20:23], 0 offset:44 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v12, off, s[20:23], 0 offset:52 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(2)
-; NOOPT-NEXT: buffer_load_dword v13, off, s[20:23], 0 offset:48 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v13, off, s[20:23], 0 offset:56 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(1)
-; NOOPT-NEXT: buffer_load_dword v14, off, s[20:23], 0 offset:52 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v14, off, s[20:23], 0 offset:60 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:56 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:60 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v18, off, s[20:23], 0 offset:136 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:136 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1
+; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_mov_b64 exec, s[16:17]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readfirstlane_b32 s2, v18
-; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v18
+; NOOPT-NEXT: v_readlane_b32 s0, v31, 6
+; NOOPT-NEXT: v_readlane_b32 s1, v31, 7
+; NOOPT-NEXT: v_readfirstlane_b32 s2, v17
+; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17
; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1]
; NOOPT-NEXT: s_add_i32 m0, s2, -16
-; NOOPT-NEXT: v_movreld_b32_e32 v1, v17
-; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:140 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:144 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:148 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:152 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:156 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:160 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:164 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:168 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:172 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:176 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:180 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:184 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:188 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:192 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:196 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:200 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill
+; NOOPT-NEXT: v_movreld_b32_e32 v0, v16
+; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:140 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:144 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:148 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:152 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:156 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:160 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:164 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:168 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:172 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:176 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:180 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:184 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:188 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:192 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:196 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:200 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 s[2:3], s[0:1]
-; NOOPT-NEXT: v_writelane_b32 v0, s2, 6
-; NOOPT-NEXT: v_writelane_b32 v0, s3, 7
+; NOOPT-NEXT: v_writelane_b32 v31, s2, 6
+; NOOPT-NEXT: v_writelane_b32 v31, s3, 7
; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v31, off, s[20:23], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[16:17]
; NOOPT-NEXT: s_xor_b64 exec, exec, s[0:1]
; NOOPT-NEXT: s_cbranch_execnz .LBB15_1
; NOOPT-NEXT: ; %bb.2:
; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[16:17]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 4
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 5
+; NOOPT-NEXT: v_readlane_b32 s0, v31, 4
+; NOOPT-NEXT: v_readlane_b32 s1, v31, 5
; NOOPT-NEXT: s_mov_b64 exec, s[0:1]
; NOOPT-NEXT: ; %bb.3:
+; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:140 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:144 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:148 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v18, off, s[20:23], 0 offset:152 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v19, off, s[20:23], 0 offset:156 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v20, off, s[20:23], 0 offset:160 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v21, off, s[20:23], 0 offset:164 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v22, off, s[20:23], 0 offset:168 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v23, off, s[20:23], 0 offset:172 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v24, off, s[20:23], 0 offset:176 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v25, off, s[20:23], 0 offset:180 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v26, off, s[20:23], 0 offset:184 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v27, off, s[20:23], 0 offset:188 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v28, off, s[20:23], 0 offset:192 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v29, off, s[20:23], 0 offset:196 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v30, off, s[20:23], 0 offset:200 ; 4-byte Folded Reload
; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1
-; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[16:17]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 0
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 1
-; NOOPT-NEXT: v_readlane_b32 s2, v0, 2
-; NOOPT-NEXT: v_readlane_b32 s3, v0, 3
-; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:140 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:144 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v18, off, s[20:23], 0 offset:148 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v19, off, s[20:23], 0 offset:152 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v20, off, s[20:23], 0 offset:156 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v21, off, s[20:23], 0 offset:160 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v22, off, s[20:23], 0 offset:164 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v23, off, s[20:23], 0 offset:168 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v24, off, s[20:23], 0 offset:172 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v25, off, s[20:23], 0 offset:176 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v26, off, s[20:23], 0 offset:180 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v27, off, s[20:23], 0 offset:184 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v28, off, s[20:23], 0 offset:188 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v29, off, s[20:23], 0 offset:192 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v30, off, s[20:23], 0 offset:196 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 offset:200 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_waitcnt vmcnt(12)
-; NOOPT-NEXT: v_mov_b32_e32 v5, v19
-; NOOPT-NEXT: v_mov_b32_e32 v6, v18
-; NOOPT-NEXT: v_mov_b32_e32 v7, v17
-; NOOPT-NEXT: v_mov_b32_e32 v1, v16
-; NOOPT-NEXT: s_waitcnt vmcnt(8)
-; NOOPT-NEXT: v_mov_b32_e32 v2, v23
-; NOOPT-NEXT: v_mov_b32_e32 v3, v22
-; NOOPT-NEXT: v_mov_b32_e32 v4, v21
-; NOOPT-NEXT: v_mov_b32_e32 v8, v20
-; NOOPT-NEXT: s_waitcnt vmcnt(4)
-; NOOPT-NEXT: v_mov_b32_e32 v13, v27
-; NOOPT-NEXT: v_mov_b32_e32 v14, v26
-; NOOPT-NEXT: v_mov_b32_e32 v15, v25
-; NOOPT-NEXT: v_mov_b32_e32 v9, v24
-; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_mov_b32_e32 v10, v31
-; NOOPT-NEXT: v_mov_b32_e32 v11, v30
-; NOOPT-NEXT: v_mov_b32_e32 v12, v29
-; NOOPT-NEXT: v_mov_b32_e32 v16, v28
+; NOOPT-NEXT: v_readlane_b32 s0, v31, 0
+; NOOPT-NEXT: v_readlane_b32 s1, v31, 1
+; NOOPT-NEXT: v_readlane_b32 s2, v31, 2
+; NOOPT-NEXT: v_readlane_b32 s3, v31, 3
+; NOOPT-NEXT: v_mov_b32_e32 v4, v18
+; NOOPT-NEXT: v_mov_b32_e32 v5, v17
+; NOOPT-NEXT: v_mov_b32_e32 v6, v16
+; NOOPT-NEXT: v_mov_b32_e32 v0, v15
+; NOOPT-NEXT: v_mov_b32_e32 v1, v22
+; NOOPT-NEXT: v_mov_b32_e32 v2, v21
+; NOOPT-NEXT: v_mov_b32_e32 v3, v20
+; NOOPT-NEXT: v_mov_b32_e32 v7, v19
+; NOOPT-NEXT: v_mov_b32_e32 v12, v26
+; NOOPT-NEXT: v_mov_b32_e32 v13, v25
+; NOOPT-NEXT: v_mov_b32_e32 v14, v24
+; NOOPT-NEXT: v_mov_b32_e32 v8, v23
+; NOOPT-NEXT: v_mov_b32_e32 v9, v30
+; NOOPT-NEXT: v_mov_b32_e32 v10, v29
+; NOOPT-NEXT: v_mov_b32_e32 v11, v28
+; NOOPT-NEXT: v_mov_b32_e32 v15, v27
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
-; NOOPT-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17_vgpr18_vgpr19 killed $exec
-; NOOPT-NEXT: v_mov_b32_e32 v17, v12
-; NOOPT-NEXT: v_mov_b32_e32 v18, v11
-; NOOPT-NEXT: v_mov_b32_e32 v19, v10
-; NOOPT-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
+; NOOPT-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16_vgpr17_vgpr18 killed $exec
+; NOOPT-NEXT: v_mov_b32_e32 v16, v11
+; NOOPT-NEXT: v_mov_b32_e32 v17, v10
+; NOOPT-NEXT: v_mov_b32_e32 v18, v9
+; NOOPT-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:48
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
-; NOOPT-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10_vgpr11_vgpr12 killed $exec
-; NOOPT-NEXT: v_mov_b32_e32 v10, v15
-; NOOPT-NEXT: v_mov_b32_e32 v11, v14
-; NOOPT-NEXT: v_mov_b32_e32 v12, v13
-; NOOPT-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:32
+; NOOPT-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec
+; NOOPT-NEXT: v_mov_b32_e32 v9, v14
+; NOOPT-NEXT: v_mov_b32_e32 v10, v13
+; NOOPT-NEXT: v_mov_b32_e32 v11, v12
+; NOOPT-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
-; NOOPT-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec
+; NOOPT-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8_vgpr9_vgpr10 killed $exec
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: v_mov_b32_e32 v9, v4
-; NOOPT-NEXT: v_mov_b32_e32 v10, v3
-; NOOPT-NEXT: v_mov_b32_e32 v11, v2
-; NOOPT-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
+; NOOPT-NEXT: v_mov_b32_e32 v8, v3
+; NOOPT-NEXT: v_mov_b32_e32 v9, v2
+; NOOPT-NEXT: v_mov_b32_e32 v10, v1
+; NOOPT-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:16
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
-; NOOPT-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
-; NOOPT-NEXT: v_mov_b32_e32 v2, v7
-; NOOPT-NEXT: v_mov_b32_e32 v3, v6
-; NOOPT-NEXT: v_mov_b32_e32 v4, v5
-; NOOPT-NEXT: buffer_store_dwordx4 v[1:4], off, s[0:3], 0
-; NOOPT-NEXT: ; kill: killed $vgpr0
+; NOOPT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; NOOPT-NEXT: v_mov_b32_e32 v1, v6
+; NOOPT-NEXT: v_mov_b32_e32 v2, v5
+; NOOPT-NEXT: v_mov_b32_e32 v3, v4
+; NOOPT-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; NOOPT-NEXT: s_endpgm
;
; SI-MOVREL-LABEL: insert_neg_inline_offset_vgpr:
@@ -5053,13 +5038,8 @@ define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1
; NOOPT-NEXT: s_mov_b32 s39, 0xe8f000
; NOOPT-NEXT: s_add_u32 s36, s36, s9
; NOOPT-NEXT: s_addc_u32 s37, s37, 0
-; NOOPT-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; NOOPT-NEXT: s_mov_b64 s[0:1], s[2:3]
-; NOOPT-NEXT: v_mov_b32_e32 v1, v0
-; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1
-; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_mov_b64 exec, s[28:29]
-; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:76 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:76 ; 4-byte Folded Spill
; NOOPT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9
; NOOPT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd
; NOOPT-NEXT: s_waitcnt lgkmcnt(0)
@@ -5071,32 +5051,32 @@ define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1
; NOOPT-NEXT: s_mov_b32 s5, s8
; NOOPT-NEXT: s_mov_b32 s6, s3
; NOOPT-NEXT: s_mov_b32 s7, s2
-; NOOPT-NEXT: s_waitcnt vmcnt(1)
-; NOOPT-NEXT: v_writelane_b32 v0, s4, 0
-; NOOPT-NEXT: v_writelane_b32 v0, s5, 1
-; NOOPT-NEXT: v_writelane_b32 v0, s6, 2
-; NOOPT-NEXT: v_writelane_b32 v0, s7, 3
+; NOOPT-NEXT: ; implicit-def: $vgpr18 : SGPR spill to VGPR lane
+; NOOPT-NEXT: v_writelane_b32 v18, s4, 0
+; NOOPT-NEXT: v_writelane_b32 v18, s5, 1
+; NOOPT-NEXT: v_writelane_b32 v18, s6, 2
+; NOOPT-NEXT: v_writelane_b32 v18, s7, 3
; NOOPT-NEXT: s_mov_b32 s4, 0
-; NOOPT-NEXT: v_writelane_b32 v0, s4, 4
+; NOOPT-NEXT: v_writelane_b32 v18, s4, 4
; NOOPT-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
; NOOPT-NEXT: s_mov_b32 s5, s2
; NOOPT-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3
; NOOPT-NEXT: s_mov_b64 s[2:3], s[4:5]
; NOOPT-NEXT: s_mov_b32 s4, 2
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: v_lshlrev_b32_e64 v1, s4, v1
+; NOOPT-NEXT: v_lshlrev_b32_e64 v0, s4, v0
; NOOPT-NEXT: s_mov_b32 s4, 0
; NOOPT-NEXT: ; implicit-def: $sgpr4
-; NOOPT-NEXT: v_mov_b32_e32 v3, 0
-; NOOPT-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; NOOPT-NEXT: v_mov_b32_e32 v2, v3
-; NOOPT-NEXT: buffer_load_dword v1, v[1:2], s[0:3], 0 addr64 glc
+; NOOPT-NEXT: v_mov_b32_e32 v2, 0
+; NOOPT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; NOOPT-NEXT: v_mov_b32_e32 v1, v2
+; NOOPT-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:72 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:72 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b32 s0, 1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: v_add_i32_e64 v1, s[0:1], v1, s0
-; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:68 ; 4-byte Folded Spill
+; NOOPT-NEXT: v_add_i32_e64 v0, s[0:1], v0, s0
+; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:68 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b32 s16, 16
; NOOPT-NEXT: s_mov_b32 s17, 15
; NOOPT-NEXT: s_mov_b32 s18, 14
@@ -5125,255 +5105,266 @@ define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1
; NOOPT-NEXT: s_mov_b32 s13, s18
; NOOPT-NEXT: s_mov_b32 s14, s17
; NOOPT-NEXT: s_mov_b32 s15, s16
-; NOOPT-NEXT: v_writelane_b32 v0, s0, 5
-; NOOPT-NEXT: v_writelane_b32 v0, s1, 6
-; NOOPT-NEXT: v_writelane_b32 v0, s2, 7
-; NOOPT-NEXT: v_writelane_b32 v0, s3, 8
-; NOOPT-NEXT: v_writelane_b32 v0, s4, 9
-; NOOPT-NEXT: v_writelane_b32 v0, s5, 10
-; NOOPT-NEXT: v_writelane_b32 v0, s6, 11
-; NOOPT-NEXT: v_writelane_b32 v0, s7, 12
-; NOOPT-NEXT: v_writelane_b32 v0, s8, 13
-; NOOPT-NEXT: v_writelane_b32 v0, s9, 14
-; NOOPT-NEXT: v_writelane_b32 v0, s10, 15
-; NOOPT-NEXT: v_writelane_b32 v0, s11, 16
-; NOOPT-NEXT: v_writelane_b32 v0, s12, 17
-; NOOPT-NEXT: v_writelane_b32 v0, s13, 18
-; NOOPT-NEXT: v_writelane_b32 v0, s14, 19
-; NOOPT-NEXT: v_writelane_b32 v0, s15, 20
+; NOOPT-NEXT: v_writelane_b32 v18, s0, 5
+; NOOPT-NEXT: v_writelane_b32 v18, s1, 6
+; NOOPT-NEXT: v_writelane_b32 v18, s2, 7
+; NOOPT-NEXT: v_writelane_b32 v18, s3, 8
+; NOOPT-NEXT: v_writelane_b32 v18, s4, 9
+; NOOPT-NEXT: v_writelane_b32 v18, s5, 10
+; NOOPT-NEXT: v_writelane_b32 v18, s6, 11
+; NOOPT-NEXT: v_writelane_b32 v18, s7, 12
+; NOOPT-NEXT: v_writelane_b32 v18, s8, 13
+; NOOPT-NEXT: v_writelane_b32 v18, s9, 14
+; NOOPT-NEXT: v_writelane_b32 v18, s10, 15
+; NOOPT-NEXT: v_writelane_b32 v18, s11, 16
+; NOOPT-NEXT: v_writelane_b32 v18, s12, 17
+; NOOPT-NEXT: v_writelane_b32 v18, s13, 18
+; NOOPT-NEXT: v_writelane_b32 v18, s14, 19
+; NOOPT-NEXT: v_writelane_b32 v18, s15, 20
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: v_mov_b32_e32 v16, s15
-; NOOPT-NEXT: v_mov_b32_e32 v15, s14
-; NOOPT-NEXT: v_mov_b32_e32 v14, s13
-; NOOPT-NEXT: v_mov_b32_e32 v13, s12
-; NOOPT-NEXT: v_mov_b32_e32 v12, s11
-; NOOPT-NEXT: v_mov_b32_e32 v11, s10
-; NOOPT-NEXT: v_mov_b32_e32 v10, s9
-; NOOPT-NEXT: v_mov_b32_e32 v9, s8
-; NOOPT-NEXT: v_mov_b32_e32 v8, s7
-; NOOPT-NEXT: v_mov_b32_e32 v7, s6
-; NOOPT-NEXT: v_mov_b32_e32 v6, s5
-; NOOPT-NEXT: v_mov_b32_e32 v5, s4
-; NOOPT-NEXT: v_mov_b32_e32 v4, s3
-; NOOPT-NEXT: v_mov_b32_e32 v3, s2
-; NOOPT-NEXT: v_mov_b32_e32 v2, s1
-; NOOPT-NEXT: v_mov_b32_e32 v1, s0
-; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:4 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v2, off, s[36:39], 0 offset:8 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v3, off, s[36:39], 0 offset:12 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v4, off, s[36:39], 0 offset:16 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v5, off, s[36:39], 0 offset:20 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v6, off, s[36:39], 0 offset:24 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v7, off, s[36:39], 0 offset:28 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v8, off, s[36:39], 0 offset:32 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v9, off, s[36:39], 0 offset:36 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v10, off, s[36:39], 0 offset:40 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v11, off, s[36:39], 0 offset:44 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v12, off, s[36:39], 0 offset:48 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v13, off, s[36:39], 0 offset:52 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v14, off, s[36:39], 0 offset:56 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v15, off, s[36:39], 0 offset:60 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v16, off, s[36:39], 0 offset:64 ; 4-byte Folded Spill
+; NOOPT-NEXT: v_mov_b32_e32 v0, s0
+; NOOPT-NEXT: v_mov_b32_e32 v1, s1
+; NOOPT-NEXT: v_mov_b32_e32 v2, s2
+; NOOPT-NEXT: v_mov_b32_e32 v3, s3
+; NOOPT-NEXT: v_mov_b32_e32 v4, s4
+; NOOPT-NEXT: v_mov_b32_e32 v5, s5
+; NOOPT-NEXT: v_mov_b32_e32 v6, s6
+; NOOPT-NEXT: v_mov_b32_e32 v7, s7
+; NOOPT-NEXT: v_mov_b32_e32 v8, s8
+; NOOPT-NEXT: v_mov_b32_e32 v9, s9
+; NOOPT-NEXT: v_mov_b32_e32 v10, s10
+; NOOPT-NEXT: v_mov_b32_e32 v11, s11
+; NOOPT-NEXT: v_mov_b32_e32 v12, s12
+; NOOPT-NEXT: v_mov_b32_e32 v13, s13
+; NOOPT-NEXT: v_mov_b32_e32 v14, s14
+; NOOPT-NEXT: v_mov_b32_e32 v15, s15
+; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:8 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v2, off, s[36:39], 0 offset:12 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v3, off, s[36:39], 0 offset:16 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[36:39], 0 offset:20 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v5, off, s[36:39], 0 offset:24 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v6, off, s[36:39], 0 offset:28 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v7, off, s[36:39], 0 offset:32 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v8, off, s[36:39], 0 offset:36 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v9, off, s[36:39], 0 offset:40 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v10, off, s[36:39], 0 offset:44 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v11, off, s[36:39], 0 offset:48 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v12, off, s[36:39], 0 offset:52 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v13, off, s[36:39], 0 offset:56 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v14, off, s[36:39], 0 offset:60 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v15, off, s[36:39], 0 offset:64 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 s[0:1], exec
-; NOOPT-NEXT: v_writelane_b32 v0, s0, 21
-; NOOPT-NEXT: v_writelane_b32 v0, s1, 22
+; NOOPT-NEXT: v_writelane_b32 v18, s0, 21
+; NOOPT-NEXT: v_writelane_b32 v18, s1, 22
; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v18, off, s[36:39], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[28:29]
; NOOPT-NEXT: ; implicit-def: $vgpr0
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: .LBB16_1: ; =>This Inner Loop Header: Depth=1
+; NOOPT-NEXT: buffer_load_dword v17, off, s[36:39], 0 offset:80 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(1)
+; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:8 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v2, off, s[36:39], 0 offset:12 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v3, off, s[36:39], 0 offset:16 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v4, off, s[36:39], 0 offset:20 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v5, off, s[36:39], 0 offset:24 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v6, off, s[36:39], 0 offset:28 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v7, off, s[36:39], 0 offset:32 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v8, off, s[36:39], 0 offset:36 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v9, off, s[36:39], 0 offset:40 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(6)
+; NOOPT-NEXT: buffer_load_dword v10, off, s[36:39], 0 offset:44 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(5)
+; NOOPT-NEXT: buffer_load_dword v11, off, s[36:39], 0 offset:48 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(4)
+; NOOPT-NEXT: buffer_load_dword v12, off, s[36:39], 0 offset:52 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(3)
+; NOOPT-NEXT: buffer_load_dword v13, off, s[36:39], 0 offset:56 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(2)
+; NOOPT-NEXT: buffer_load_dword v14, off, s[36:39], 0 offset:60 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(1)
+; NOOPT-NEXT: buffer_load_dword v15, off, s[36:39], 0 offset:64 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v16, off, s[36:39], 0 offset:72 ; 4-byte Folded Reload
; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[28:29]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 23
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 24
-; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 offset:80 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:4 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v2, off, s[36:39], 0 offset:8 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v3, off, s[36:39], 0 offset:12 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v4, off, s[36:39], 0 offset:16 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v5, off, s[36:39], 0 offset:20 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v6, off, s[36:39], 0 offset:24 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v7, off, s[36:39], 0 offset:28 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v8, off, s[36:39], 0 offset:32 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v9, off, s[36:39], 0 offset:36 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v10, off, s[36:39], 0 offset:40 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v11, off, s[36:39], 0 offset:44 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v12, off, s[36:39], 0 offset:48 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v13, off, s[36:39], 0 offset:52 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v14, off, s[36:39], 0 offset:56 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v15, off, s[36:39], 0 offset:60 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v16, off, s[36:39], 0 offset:64 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v17, off, s[36:39], 0 offset:72 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readfirstlane_b32 s2, v17
-; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17
+; NOOPT-NEXT: v_readlane_b32 s0, v18, 23
+; NOOPT-NEXT: v_readlane_b32 s1, v18, 24
+; NOOPT-NEXT: v_readfirstlane_b32 s2, v16
+; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v16
; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1]
; NOOPT-NEXT: s_mov_b32 m0, s2
-; NOOPT-NEXT: v_movrels_b32_e32 v1, v1
-; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:84 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:80 ; 4-byte Folded Spill
+; NOOPT-NEXT: v_movrels_b32_e32 v0, v0
+; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:84 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:80 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 s[2:3], s[0:1]
-; NOOPT-NEXT: v_writelane_b32 v0, s2, 23
-; NOOPT-NEXT: v_writelane_b32 v0, s3, 24
+; NOOPT-NEXT: v_writelane_b32 v18, s2, 23
+; NOOPT-NEXT: v_writelane_b32 v18, s3, 24
; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v18, off, s[36:39], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[28:29]
; NOOPT-NEXT: s_xor_b64 exec, exec, s[0:1]
; NOOPT-NEXT: s_cbranch_execnz .LBB16_1
; NOOPT-NEXT: ; %bb.2:
; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[28:29]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 21
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 22
+; NOOPT-NEXT: v_readlane_b32 s0, v18, 21
+; NOOPT-NEXT: v_readlane_b32 s1, v18, 22
; NOOPT-NEXT: s_mov_b64 exec, s[0:1]
; NOOPT-NEXT: ; %bb.3:
; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1
-; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[28:29]
; NOOPT-NEXT: ;;#ASMSTART
; NOOPT-NEXT: s_mov_b32 s4, 17
; NOOPT-NEXT: ;;#ASMEND
; NOOPT-NEXT: s_mov_b32 s16, s4
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 5
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 6
-; NOOPT-NEXT: v_readlane_b32 s2, v0, 7
-; NOOPT-NEXT: v_readlane_b32 s3, v0, 8
-; NOOPT-NEXT: v_readlane_b32 s4, v0, 9
-; NOOPT-NEXT: v_readlane_b32 s5, v0, 10
-; NOOPT-NEXT: v_readlane_b32 s6, v0, 11
-; NOOPT-NEXT: v_readlane_b32 s7, v0, 12
-; NOOPT-NEXT: v_readlane_b32 s8, v0, 13
-; NOOPT-NEXT: v_readlane_b32 s9, v0, 14
-; NOOPT-NEXT: v_readlane_b32 s10, v0, 15
-; NOOPT-NEXT: v_readlane_b32 s11, v0, 16
-; NOOPT-NEXT: v_readlane_b32 s12, v0, 17
-; NOOPT-NEXT: v_readlane_b32 s13, v0, 18
-; NOOPT-NEXT: v_readlane_b32 s14, v0, 19
-; NOOPT-NEXT: v_readlane_b32 s15, v0, 20
-; NOOPT-NEXT: v_writelane_b32 v0, s16, 25
-; NOOPT-NEXT: v_mov_b32_e32 v16, s15
-; NOOPT-NEXT: v_mov_b32_e32 v15, s14
-; NOOPT-NEXT: v_mov_b32_e32 v14, s13
-; NOOPT-NEXT: v_mov_b32_e32 v13, s12
-; NOOPT-NEXT: v_mov_b32_e32 v12, s11
-; NOOPT-NEXT: v_mov_b32_e32 v11, s10
-; NOOPT-NEXT: v_mov_b32_e32 v10, s9
-; NOOPT-NEXT: v_mov_b32_e32 v9, s8
-; NOOPT-NEXT: v_mov_b32_e32 v8, s7
-; NOOPT-NEXT: v_mov_b32_e32 v7, s6
-; NOOPT-NEXT: v_mov_b32_e32 v6, s5
-; NOOPT-NEXT: v_mov_b32_e32 v5, s4
-; NOOPT-NEXT: v_mov_b32_e32 v4, s3
-; NOOPT-NEXT: v_mov_b32_e32 v3, s2
-; NOOPT-NEXT: v_mov_b32_e32 v2, s1
-; NOOPT-NEXT: v_mov_b32_e32 v1, s0
-; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:88 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v2, off, s[36:39], 0 offset:92 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v3, off, s[36:39], 0 offset:96 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v4, off, s[36:39], 0 offset:100 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v5, off, s[36:39], 0 offset:104 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v6, off, s[36:39], 0 offset:108 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v7, off, s[36:39], 0 offset:112 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v8, off, s[36:39], 0 offset:116 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v9, off, s[36:39], 0 offset:120 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v10, off, s[36:39], 0 offset:124 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v11, off, s[36:39], 0 offset:128 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v12, off, s[36:39], 0 offset:132 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v13, off, s[36:39], 0 offset:136 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v14, off, s[36:39], 0 offset:140 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v15, off, s[36:39], 0 offset:144 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v16, off, s[36:39], 0 offset:148 ; 4-byte Folded Spill
+; NOOPT-NEXT: v_readlane_b32 s0, v18, 5
+; NOOPT-NEXT: v_readlane_b32 s1, v18, 6
+; NOOPT-NEXT: v_readlane_b32 s2, v18, 7
+; NOOPT-NEXT: v_readlane_b32 s3, v18, 8
+; NOOPT-NEXT: v_readlane_b32 s4, v18, 9
+; NOOPT-NEXT: v_readlane_b32 s5, v18, 10
+; NOOPT-NEXT: v_readlane_b32 s6, v18, 11
+; NOOPT-NEXT: v_readlane_b32 s7, v18, 12
+; NOOPT-NEXT: v_readlane_b32 s8, v18, 13
+; NOOPT-NEXT: v_readlane_b32 s9, v18, 14
+; NOOPT-NEXT: v_readlane_b32 s10, v18, 15
+; NOOPT-NEXT: v_readlane_b32 s11, v18, 16
+; NOOPT-NEXT: v_readlane_b32 s12, v18, 17
+; NOOPT-NEXT: v_readlane_b32 s13, v18, 18
+; NOOPT-NEXT: v_readlane_b32 s14, v18, 19
+; NOOPT-NEXT: v_readlane_b32 s15, v18, 20
+; NOOPT-NEXT: v_writelane_b32 v18, s16, 25
+; NOOPT-NEXT: v_mov_b32_e32 v0, s0
+; NOOPT-NEXT: v_mov_b32_e32 v1, s1
+; NOOPT-NEXT: v_mov_b32_e32 v2, s2
+; NOOPT-NEXT: v_mov_b32_e32 v3, s3
+; NOOPT-NEXT: v_mov_b32_e32 v4, s4
+; NOOPT-NEXT: v_mov_b32_e32 v5, s5
+; NOOPT-NEXT: v_mov_b32_e32 v6, s6
+; NOOPT-NEXT: v_mov_b32_e32 v7, s7
+; NOOPT-NEXT: v_mov_b32_e32 v8, s8
+; NOOPT-NEXT: v_mov_b32_e32 v9, s9
+; NOOPT-NEXT: v_mov_b32_e32 v10, s10
+; NOOPT-NEXT: v_mov_b32_e32 v11, s11
+; NOOPT-NEXT: v_mov_b32_e32 v12, s12
+; NOOPT-NEXT: v_mov_b32_e32 v13, s13
+; NOOPT-NEXT: v_mov_b32_e32 v14, s14
+; NOOPT-NEXT: v_mov_b32_e32 v15, s15
+; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:88 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:92 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v2, off, s[36:39], 0 offset:96 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v3, off, s[36:39], 0 offset:100 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[36:39], 0 offset:104 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v5, off, s[36:39], 0 offset:108 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v6, off, s[36:39], 0 offset:112 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v7, off, s[36:39], 0 offset:116 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v8, off, s[36:39], 0 offset:120 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v9, off, s[36:39], 0 offset:124 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v10, off, s[36:39], 0 offset:128 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v11, off, s[36:39], 0 offset:132 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v12, off, s[36:39], 0 offset:136 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v13, off, s[36:39], 0 offset:140 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v14, off, s[36:39], 0 offset:144 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v15, off, s[36:39], 0 offset:148 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 s[0:1], exec
-; NOOPT-NEXT: v_writelane_b32 v0, s0, 26
-; NOOPT-NEXT: v_writelane_b32 v0, s1, 27
+; NOOPT-NEXT: v_writelane_b32 v18, s0, 26
+; NOOPT-NEXT: v_writelane_b32 v18, s1, 27
; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v18, off, s[36:39], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[28:29]
; NOOPT-NEXT: ; implicit-def: $vgpr0
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: .LBB16_4: ; =>This Inner Loop Header: Depth=1
+; NOOPT-NEXT: buffer_load_dword v17, off, s[36:39], 0 offset:152 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(1)
+; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:88 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:92 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v2, off, s[36:39], 0 offset:96 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v3, off, s[36:39], 0 offset:100 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v4, off, s[36:39], 0 offset:104 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v5, off, s[36:39], 0 offset:108 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v6, off, s[36:39], 0 offset:112 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v7, off, s[36:39], 0 offset:116 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v8, off, s[36:39], 0 offset:120 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v9, off, s[36:39], 0 offset:124 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(6)
+; NOOPT-NEXT: buffer_load_dword v10, off, s[36:39], 0 offset:128 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(5)
+; NOOPT-NEXT: buffer_load_dword v11, off, s[36:39], 0 offset:132 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(4)
+; NOOPT-NEXT: buffer_load_dword v12, off, s[36:39], 0 offset:136 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(3)
+; NOOPT-NEXT: buffer_load_dword v13, off, s[36:39], 0 offset:140 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(2)
+; NOOPT-NEXT: buffer_load_dword v14, off, s[36:39], 0 offset:144 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(1)
+; NOOPT-NEXT: buffer_load_dword v15, off, s[36:39], 0 offset:148 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v16, off, s[36:39], 0 offset:68 ; 4-byte Folded Reload
; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[28:29]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 28
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 29
-; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 offset:152 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:88 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v2, off, s[36:39], 0 offset:92 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v3, off, s[36:39], 0 offset:96 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v4, off, s[36:39], 0 offset:100 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v5, off, s[36:39], 0 offset:104 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v6, off, s[36:39], 0 offset:108 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v7, off, s[36:39], 0 offset:112 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v8, off, s[36:39], 0 offset:116 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v9, off, s[36:39], 0 offset:120 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v10, off, s[36:39], 0 offset:124 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v11, off, s[36:39], 0 offset:128 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v12, off, s[36:39], 0 offset:132 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v13, off, s[36:39], 0 offset:136 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v14, off, s[36:39], 0 offset:140 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v15, off, s[36:39], 0 offset:144 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v16, off, s[36:39], 0 offset:148 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v17, off, s[36:39], 0 offset:68 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readfirstlane_b32 s2, v17
-; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17
+; NOOPT-NEXT: v_readlane_b32 s0, v18, 28
+; NOOPT-NEXT: v_readlane_b32 s1, v18, 29
+; NOOPT-NEXT: v_readfirstlane_b32 s2, v16
+; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v16
; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1]
; NOOPT-NEXT: s_mov_b32 m0, s2
-; NOOPT-NEXT: v_movrels_b32_e32 v1, v1
-; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:156 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:152 ; 4-byte Folded Spill
+; NOOPT-NEXT: v_movrels_b32_e32 v0, v0
+; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:156 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:152 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 s[2:3], s[0:1]
-; NOOPT-NEXT: v_writelane_b32 v0, s2, 28
-; NOOPT-NEXT: v_writelane_b32 v0, s3, 29
+; NOOPT-NEXT: v_writelane_b32 v18, s2, 28
+; NOOPT-NEXT: v_writelane_b32 v18, s3, 29
; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v18, off, s[36:39], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[28:29]
; NOOPT-NEXT: s_xor_b64 exec, exec, s[0:1]
; NOOPT-NEXT: s_cbranch_execnz .LBB16_4
; NOOPT-NEXT: ; %bb.5:
; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[28:29]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 26
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 27
+; NOOPT-NEXT: v_readlane_b32 s0, v18, 26
+; NOOPT-NEXT: v_readlane_b32 s1, v18, 27
; NOOPT-NEXT: s_mov_b64 exec, s[0:1]
; NOOPT-NEXT: ; %bb.6:
+; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:76 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:156 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v2, off, s[36:39], 0 offset:84 ; 4-byte Folded Reload
; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1
-; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[28:29]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 4
-; NOOPT-NEXT: v_readlane_b32 s4, v0, 0
-; NOOPT-NEXT: v_readlane_b32 s5, v0, 1
-; NOOPT-NEXT: v_readlane_b32 s6, v0, 2
-; NOOPT-NEXT: v_readlane_b32 s7, v0, 3
-; NOOPT-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:76 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v2, off, s[36:39], 0 offset:156 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v3, off, s[36:39], 0 offset:84 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: buffer_store_dword v3, off, s[4:7], 0
-; NOOPT-NEXT: s_waitcnt vmcnt(0)
+; NOOPT-NEXT: v_readlane_b32 s0, v18, 4
+; NOOPT-NEXT: v_readlane_b32 s4, v18, 0
+; NOOPT-NEXT: v_readlane_b32 s5, v18, 1
+; NOOPT-NEXT: v_readlane_b32 s6, v18, 2
+; NOOPT-NEXT: v_readlane_b32 s7, v18, 3
; NOOPT-NEXT: buffer_store_dword v2, off, s[4:7], 0
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_cmp_eq_u32_e64 s[2:3], v1, s0
+; NOOPT-NEXT: buffer_store_dword v1, off, s[4:7], 0
+; NOOPT-NEXT: s_waitcnt vmcnt(0)
+; NOOPT-NEXT: v_cmp_eq_u32_e64 s[2:3], v0, s0
; NOOPT-NEXT: s_mov_b64 s[0:1], exec
-; NOOPT-NEXT: v_writelane_b32 v0, s0, 30
-; NOOPT-NEXT: v_writelane_b32 v0, s1, 31
+; NOOPT-NEXT: v_writelane_b32 v18, s0, 30
+; NOOPT-NEXT: v_writelane_b32 v18, s1, 31
; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v18, off, s[36:39], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[28:29]
; NOOPT-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; NOOPT-NEXT: s_mov_b64 exec, s[0:1]
@@ -5381,10 +5372,10 @@ define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1
; NOOPT-NEXT: ; %bb.7: ; %bb1
; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[28:29]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s4, v0, 25
+; NOOPT-NEXT: v_readlane_b32 s4, v18, 25
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: s_mov_b32 s7, s1
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
@@ -5401,13 +5392,12 @@ define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1
; NOOPT-NEXT: .LBB16_8: ; %bb2
; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[28:29]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 30
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 31
+; NOOPT-NEXT: v_readlane_b32 s0, v18, 30
+; NOOPT-NEXT: v_readlane_b32 s1, v18, 31
; NOOPT-NEXT: s_or_b64 exec, exec, s[0:1]
-; NOOPT-NEXT: ; kill: killed $vgpr0
; NOOPT-NEXT: s_endpgm
;
; SI-MOVREL-LABEL: extract_vgpr_offset_multiple_in_block:
@@ -5827,7 +5817,6 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1)
; NOOPT-NEXT: s_mov_b32 s31, 0xe8f000
; NOOPT-NEXT: s_add_u32 s28, s28, s9
; NOOPT-NEXT: s_addc_u32 s29, s29, 0
-; NOOPT-NEXT: ; implicit-def: $vgpr16 : SGPR spill to VGPR lane
; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:84 ; 4-byte Folded Spill
; NOOPT-NEXT: s_load_dwordx2 s[18:19], s[2:3], 0x9
; NOOPT-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0xd
@@ -5841,12 +5830,13 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1)
; NOOPT-NEXT: s_mov_b32 s21, s24
; NOOPT-NEXT: s_mov_b32 s22, s19
; NOOPT-NEXT: s_mov_b32 s23, s18
-; NOOPT-NEXT: v_writelane_b32 v16, s20, 0
-; NOOPT-NEXT: v_writelane_b32 v16, s21, 1
-; NOOPT-NEXT: v_writelane_b32 v16, s22, 2
-; NOOPT-NEXT: v_writelane_b32 v16, s23, 3
+; NOOPT-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
+; NOOPT-NEXT: v_writelane_b32 v32, s20, 0
+; NOOPT-NEXT: v_writelane_b32 v32, s21, 1
+; NOOPT-NEXT: v_writelane_b32 v32, s22, 2
+; NOOPT-NEXT: v_writelane_b32 v32, s23, 3
; NOOPT-NEXT: s_mov_b32 s20, 0
-; NOOPT-NEXT: v_writelane_b32 v16, s20, 4
+; NOOPT-NEXT: v_writelane_b32 v32, s20, 4
; NOOPT-NEXT: ; kill: def $sgpr20 killed $sgpr20 def $sgpr20_sgpr21
; NOOPT-NEXT: s_mov_b32 s21, s18
; NOOPT-NEXT: ; kill: def $sgpr16_sgpr17 killed $sgpr16_sgpr17 def $sgpr16_sgpr17_sgpr18_sgpr19
@@ -5890,115 +5880,113 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1)
; NOOPT-NEXT: v_mov_b32_e32 v14, s14
; NOOPT-NEXT: v_mov_b32_e32 v15, s15
; NOOPT-NEXT: s_mov_b64 s[0:1], exec
-; NOOPT-NEXT: v_writelane_b32 v16, s0, 5
-; NOOPT-NEXT: v_writelane_b32 v16, s1, 6
+; NOOPT-NEXT: v_writelane_b32 v32, s0, 5
+; NOOPT-NEXT: v_writelane_b32 v32, s1, 6
; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1
-; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:64 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v32, off, s[28:31], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[26:27]
-; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:4 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:8 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:12 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:16 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:20 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:24 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:28 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:32 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:36 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:40 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:44 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:48 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:52 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:56 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:60 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:4 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:8 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:12 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:16 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:20 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:24 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:28 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:32 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:36 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:40 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:44 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:48 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:52 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:56 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:60 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:64 ; 4-byte Folded Spill
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: .LBB17_1: ; =>This Inner Loop Header: Depth=1
-; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1
-; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:64 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_mov_b64 exec, s[26:27]
-; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 7
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 8
-; NOOPT-NEXT: buffer_load_dword v1, off, s[28:31], 0 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v2, off, s[28:31], 0 offset:4 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v3, off, s[28:31], 0 offset:8 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v4, off, s[28:31], 0 offset:12 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v5, off, s[28:31], 0 offset:16 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v6, off, s[28:31], 0 offset:20 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v7, off, s[28:31], 0 offset:24 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v8, off, s[28:31], 0 offset:28 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:4 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v1, off, s[28:31], 0 offset:8 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v2, off, s[28:31], 0 offset:12 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v3, off, s[28:31], 0 offset:16 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v4, off, s[28:31], 0 offset:20 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v5, off, s[28:31], 0 offset:24 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v6, off, s[28:31], 0 offset:28 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v7, off, s[28:31], 0 offset:32 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v8, off, s[28:31], 0 offset:36 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(6)
-; NOOPT-NEXT: buffer_load_dword v9, off, s[28:31], 0 offset:32 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v9, off, s[28:31], 0 offset:40 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(5)
-; NOOPT-NEXT: buffer_load_dword v10, off, s[28:31], 0 offset:36 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v10, off, s[28:31], 0 offset:44 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(4)
-; NOOPT-NEXT: buffer_load_dword v11, off, s[28:31], 0 offset:40 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v11, off, s[28:31], 0 offset:48 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(3)
-; NOOPT-NEXT: buffer_load_dword v12, off, s[28:31], 0 offset:44 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v12, off, s[28:31], 0 offset:52 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(2)
-; NOOPT-NEXT: buffer_load_dword v13, off, s[28:31], 0 offset:48 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v13, off, s[28:31], 0 offset:56 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(1)
-; NOOPT-NEXT: buffer_load_dword v14, off, s[28:31], 0 offset:52 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v14, off, s[28:31], 0 offset:60 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v15, off, s[28:31], 0 offset:56 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:60 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:72 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v18, off, s[28:31], 0 offset:80 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v15, off, s[28:31], 0 offset:64 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:72 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:80 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1
+; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_mov_b64 exec, s[26:27]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readfirstlane_b32 s2, v18
-; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v18
+; NOOPT-NEXT: v_readlane_b32 s0, v32, 7
+; NOOPT-NEXT: v_readlane_b32 s1, v32, 8
+; NOOPT-NEXT: v_readfirstlane_b32 s2, v17
+; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17
; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1]
; NOOPT-NEXT: s_mov_b32 m0, s2
-; NOOPT-NEXT: v_movreld_b32_e32 v1, v17
-; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:88 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:92 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:96 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:100 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:104 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:108 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:112 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:116 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:120 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:124 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:128 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:132 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:136 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:140 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:144 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:148 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:4 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:8 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:12 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:16 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:20 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:24 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:28 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:32 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:36 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:40 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:44 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:48 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:52 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:56 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:60 ; 4-byte Folded Spill
+; NOOPT-NEXT: v_movreld_b32_e32 v0, v16
+; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:88 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:92 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:96 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:100 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:104 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:108 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:112 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:116 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:120 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:124 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:128 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:132 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:136 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:140 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:144 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:148 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:4 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:8 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:12 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:16 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:20 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:24 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:28 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:32 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:36 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:40 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:44 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:48 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:52 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:56 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:60 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:64 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 s[2:3], s[0:1]
-; NOOPT-NEXT: v_writelane_b32 v0, s2, 7
-; NOOPT-NEXT: v_writelane_b32 v0, s3, 8
+; NOOPT-NEXT: v_writelane_b32 v32, s2, 7
+; NOOPT-NEXT: v_writelane_b32 v32, s3, 8
; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:64 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v32, off, s[28:31], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[26:27]
; NOOPT-NEXT: s_xor_b64 exec, exec, s[0:1]
; NOOPT-NEXT: s_cbranch_execnz .LBB17_1
; NOOPT-NEXT: ; %bb.2:
; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:64 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[26:27]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 5
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 6
+; NOOPT-NEXT: v_readlane_b32 s0, v32, 5
+; NOOPT-NEXT: v_readlane_b32 s1, v32, 6
; NOOPT-NEXT: s_mov_b64 exec, s[0:1]
; NOOPT-NEXT: ; %bb.3:
; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:88 ; 4-byte Folded Reload
@@ -6018,16 +6006,16 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1)
; NOOPT-NEXT: buffer_load_dword v14, off, s[28:31], 0 offset:144 ; 4-byte Folded Reload
; NOOPT-NEXT: buffer_load_dword v15, off, s[28:31], 0 offset:148 ; 4-byte Folded Reload
; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1
-; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:64 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[26:27]
-; NOOPT-NEXT: v_mov_b32_e32 v17, 63
-; NOOPT-NEXT: buffer_store_dword v17, off, s[28:31], 0 offset:216 ; 4-byte Folded Spill
+; NOOPT-NEXT: v_mov_b32_e32 v16, 63
+; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:216 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 s[0:1], exec
; NOOPT-NEXT: s_waitcnt vmcnt(1)
-; NOOPT-NEXT: v_writelane_b32 v16, s0, 9
-; NOOPT-NEXT: v_writelane_b32 v16, s1, 10
+; NOOPT-NEXT: v_writelane_b32 v32, s0, 9
+; NOOPT-NEXT: v_writelane_b32 v32, s1, 10
; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1
-; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:64 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v32, off, s[28:31], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[26:27]
; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:152 ; 4-byte Folded Spill
; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:156 ; 4-byte Folded Spill
@@ -6047,193 +6035,186 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1)
; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:212 ; 4-byte Folded Spill
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: .LBB17_4: ; =>This Inner Loop Header: Depth=1
-; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1
-; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:64 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_mov_b64 exec, s[26:27]
-; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 11
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 12
-; NOOPT-NEXT: buffer_load_dword v1, off, s[28:31], 0 offset:152 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v2, off, s[28:31], 0 offset:156 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v3, off, s[28:31], 0 offset:160 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v4, off, s[28:31], 0 offset:164 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v5, off, s[28:31], 0 offset:168 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v6, off, s[28:31], 0 offset:172 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v7, off, s[28:31], 0 offset:176 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v8, off, s[28:31], 0 offset:180 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:152 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v1, off, s[28:31], 0 offset:156 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v2, off, s[28:31], 0 offset:160 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v3, off, s[28:31], 0 offset:164 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v4, off, s[28:31], 0 offset:168 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v5, off, s[28:31], 0 offset:172 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v6, off, s[28:31], 0 offset:176 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v7, off, s[28:31], 0 offset:180 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v8, off, s[28:31], 0 offset:184 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(6)
-; NOOPT-NEXT: buffer_load_dword v9, off, s[28:31], 0 offset:184 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v9, off, s[28:31], 0 offset:188 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(5)
-; NOOPT-NEXT: buffer_load_dword v10, off, s[28:31], 0 offset:188 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v10, off, s[28:31], 0 offset:192 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(4)
-; NOOPT-NEXT: buffer_load_dword v11, off, s[28:31], 0 offset:192 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v11, off, s[28:31], 0 offset:196 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(3)
-; NOOPT-NEXT: buffer_load_dword v12, off, s[28:31], 0 offset:196 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v12, off, s[28:31], 0 offset:200 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(2)
-; NOOPT-NEXT: buffer_load_dword v13, off, s[28:31], 0 offset:200 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v13, off, s[28:31], 0 offset:204 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(1)
-; NOOPT-NEXT: buffer_load_dword v14, off, s[28:31], 0 offset:204 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v14, off, s[28:31], 0 offset:208 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v15, off, s[28:31], 0 offset:208 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:212 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:216 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v18, off, s[28:31], 0 offset:76 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v15, off, s[28:31], 0 offset:212 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:216 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:76 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1
+; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_mov_b64 exec, s[26:27]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readfirstlane_b32 s2, v18
-; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v18
+; NOOPT-NEXT: v_readlane_b32 s0, v32, 11
+; NOOPT-NEXT: v_readlane_b32 s1, v32, 12
+; NOOPT-NEXT: v_readfirstlane_b32 s2, v17
+; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17
; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1]
; NOOPT-NEXT: s_mov_b32 m0, s2
-; NOOPT-NEXT: v_movreld_b32_e32 v1, v17
-; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:220 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:224 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:228 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:232 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:236 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:240 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:244 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:248 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:252 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:256 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:260 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:264 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:268 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:272 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:276 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:280 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:152 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:156 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:160 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:164 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:168 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:172 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:176 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:180 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:184 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:188 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:192 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:196 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:200 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:204 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:208 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:212 ; 4-byte Folded Spill
+; NOOPT-NEXT: v_movreld_b32_e32 v0, v16
+; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:220 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:224 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:228 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:232 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:236 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:240 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:244 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:248 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:252 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:256 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:260 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:264 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:268 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:272 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:276 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:280 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:152 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:156 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:160 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:164 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:168 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:172 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:176 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:180 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:184 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:188 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:192 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:196 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:200 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:204 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:208 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:212 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 s[2:3], s[0:1]
-; NOOPT-NEXT: v_writelane_b32 v0, s2, 11
-; NOOPT-NEXT: v_writelane_b32 v0, s3, 12
+; NOOPT-NEXT: v_writelane_b32 v32, s2, 11
+; NOOPT-NEXT: v_writelane_b32 v32, s3, 12
; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:64 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v32, off, s[28:31], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[26:27]
; NOOPT-NEXT: s_xor_b64 exec, exec, s[0:1]
; NOOPT-NEXT: s_cbranch_execnz .LBB17_4
; NOOPT-NEXT: ; %bb.5:
; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:64 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[26:27]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 9
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 10
+; NOOPT-NEXT: v_readlane_b32 s0, v32, 9
+; NOOPT-NEXT: v_readlane_b32 s1, v32, 10
; NOOPT-NEXT: s_mov_b64 exec, s[0:1]
; NOOPT-NEXT: ; %bb.6:
+; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:84 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:220 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:224 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v18, off, s[28:31], 0 offset:228 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v19, off, s[28:31], 0 offset:232 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v20, off, s[28:31], 0 offset:236 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v21, off, s[28:31], 0 offset:240 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v22, off, s[28:31], 0 offset:244 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v23, off, s[28:31], 0 offset:248 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v24, off, s[28:31], 0 offset:252 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v25, off, s[28:31], 0 offset:256 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v26, off, s[28:31], 0 offset:260 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v27, off, s[28:31], 0 offset:264 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v28, off, s[28:31], 0 offset:268 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v29, off, s[28:31], 0 offset:272 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v30, off, s[28:31], 0 offset:276 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v31, off, s[28:31], 0 offset:280 ; 4-byte Folded Reload
; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1
-; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:64 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[26:27]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 4
-; NOOPT-NEXT: v_readlane_b32 s4, v0, 0
-; NOOPT-NEXT: v_readlane_b32 s5, v0, 1
-; NOOPT-NEXT: v_readlane_b32 s6, v0, 2
-; NOOPT-NEXT: v_readlane_b32 s7, v0, 3
-; NOOPT-NEXT: buffer_load_dword v1, off, s[28:31], 0 offset:84 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:220 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v18, off, s[28:31], 0 offset:224 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v19, off, s[28:31], 0 offset:228 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v20, off, s[28:31], 0 offset:232 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v21, off, s[28:31], 0 offset:236 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v22, off, s[28:31], 0 offset:240 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v23, off, s[28:31], 0 offset:244 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v24, off, s[28:31], 0 offset:248 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v25, off, s[28:31], 0 offset:252 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v26, off, s[28:31], 0 offset:256 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v27, off, s[28:31], 0 offset:260 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v28, off, s[28:31], 0 offset:264 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v29, off, s[28:31], 0 offset:268 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v30, off, s[28:31], 0 offset:272 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v31, off, s[28:31], 0 offset:276 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 offset:280 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_waitcnt vmcnt(12)
-; NOOPT-NEXT: v_mov_b32_e32 v6, v20
-; NOOPT-NEXT: v_mov_b32_e32 v7, v19
-; NOOPT-NEXT: v_mov_b32_e32 v8, v18
-; NOOPT-NEXT: v_mov_b32_e32 v2, v17
-; NOOPT-NEXT: s_waitcnt vmcnt(8)
-; NOOPT-NEXT: v_mov_b32_e32 v3, v24
-; NOOPT-NEXT: v_mov_b32_e32 v4, v23
-; NOOPT-NEXT: v_mov_b32_e32 v5, v22
-; NOOPT-NEXT: v_mov_b32_e32 v9, v21
-; NOOPT-NEXT: s_waitcnt vmcnt(4)
-; NOOPT-NEXT: v_mov_b32_e32 v14, v28
-; NOOPT-NEXT: v_mov_b32_e32 v15, v27
-; NOOPT-NEXT: v_mov_b32_e32 v16, v26
-; NOOPT-NEXT: v_mov_b32_e32 v10, v25
-; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_mov_b32_e32 v11, v32
-; NOOPT-NEXT: v_mov_b32_e32 v12, v31
-; NOOPT-NEXT: v_mov_b32_e32 v13, v30
-; NOOPT-NEXT: v_mov_b32_e32 v17, v29
+; NOOPT-NEXT: v_readlane_b32 s0, v32, 4
+; NOOPT-NEXT: v_readlane_b32 s4, v32, 0
+; NOOPT-NEXT: v_readlane_b32 s5, v32, 1
+; NOOPT-NEXT: v_readlane_b32 s6, v32, 2
+; NOOPT-NEXT: v_readlane_b32 s7, v32, 3
+; NOOPT-NEXT: v_mov_b32_e32 v5, v19
+; NOOPT-NEXT: v_mov_b32_e32 v6, v18
+; NOOPT-NEXT: v_mov_b32_e32 v7, v17
+; NOOPT-NEXT: v_mov_b32_e32 v1, v16
+; NOOPT-NEXT: v_mov_b32_e32 v2, v23
+; NOOPT-NEXT: v_mov_b32_e32 v3, v22
+; NOOPT-NEXT: v_mov_b32_e32 v4, v21
+; NOOPT-NEXT: v_mov_b32_e32 v8, v20
+; NOOPT-NEXT: v_mov_b32_e32 v13, v27
+; NOOPT-NEXT: v_mov_b32_e32 v14, v26
+; NOOPT-NEXT: v_mov_b32_e32 v15, v25
+; NOOPT-NEXT: v_mov_b32_e32 v9, v24
+; NOOPT-NEXT: v_mov_b32_e32 v10, v31
+; NOOPT-NEXT: v_mov_b32_e32 v11, v30
+; NOOPT-NEXT: v_mov_b32_e32 v12, v29
+; NOOPT-NEXT: v_mov_b32_e32 v16, v28
; NOOPT-NEXT: ; implicit-def: $sgpr1
; NOOPT-NEXT: ; implicit-def: $sgpr1
; NOOPT-NEXT: ; implicit-def: $sgpr1
; NOOPT-NEXT: ; implicit-def: $sgpr1
-; NOOPT-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18_vgpr19_vgpr20 killed $exec
-; NOOPT-NEXT: v_mov_b32_e32 v18, v13
-; NOOPT-NEXT: v_mov_b32_e32 v19, v12
-; NOOPT-NEXT: v_mov_b32_e32 v20, v11
-; NOOPT-NEXT: buffer_store_dwordx4 v[17:20], off, s[4:7], 0 offset:48
+; NOOPT-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17_vgpr18_vgpr19 killed $exec
+; NOOPT-NEXT: v_mov_b32_e32 v17, v12
+; NOOPT-NEXT: v_mov_b32_e32 v18, v11
+; NOOPT-NEXT: v_mov_b32_e32 v19, v10
+; NOOPT-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 offset:48
; NOOPT-NEXT: s_waitcnt vmcnt(0)
; NOOPT-NEXT: ; implicit-def: $sgpr1
; NOOPT-NEXT: ; implicit-def: $sgpr1
; NOOPT-NEXT: ; implicit-def: $sgpr1
; NOOPT-NEXT: ; implicit-def: $sgpr1
-; NOOPT-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11_vgpr12_vgpr13 killed $exec
-; NOOPT-NEXT: v_mov_b32_e32 v11, v16
-; NOOPT-NEXT: v_mov_b32_e32 v12, v15
-; NOOPT-NEXT: v_mov_b32_e32 v13, v14
-; NOOPT-NEXT: buffer_store_dwordx4 v[10:13], off, s[4:7], 0 offset:32
+; NOOPT-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10_vgpr11_vgpr12 killed $exec
+; NOOPT-NEXT: v_mov_b32_e32 v10, v15
+; NOOPT-NEXT: v_mov_b32_e32 v11, v14
+; NOOPT-NEXT: v_mov_b32_e32 v12, v13
+; NOOPT-NEXT: buffer_store_dwordx4 v[9:12], off, s[4:7], 0 offset:32
; NOOPT-NEXT: s_waitcnt vmcnt(0)
; NOOPT-NEXT: ; implicit-def: $sgpr1
; NOOPT-NEXT: ; implicit-def: $sgpr1
; NOOPT-NEXT: ; implicit-def: $sgpr1
; NOOPT-NEXT: ; implicit-def: $sgpr1
-; NOOPT-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10_vgpr11_vgpr12 killed $exec
+; NOOPT-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: v_mov_b32_e32 v10, v5
-; NOOPT-NEXT: v_mov_b32_e32 v11, v4
-; NOOPT-NEXT: v_mov_b32_e32 v12, v3
-; NOOPT-NEXT: buffer_store_dwordx4 v[9:12], off, s[4:7], 0 offset:16
+; NOOPT-NEXT: v_mov_b32_e32 v9, v4
+; NOOPT-NEXT: v_mov_b32_e32 v10, v3
+; NOOPT-NEXT: v_mov_b32_e32 v11, v2
+; NOOPT-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16
; NOOPT-NEXT: s_waitcnt vmcnt(0)
; NOOPT-NEXT: ; implicit-def: $sgpr1
; NOOPT-NEXT: ; implicit-def: $sgpr1
; NOOPT-NEXT: ; implicit-def: $sgpr1
; NOOPT-NEXT: ; implicit-def: $sgpr1
-; NOOPT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec
-; NOOPT-NEXT: v_mov_b32_e32 v3, v8
-; NOOPT-NEXT: v_mov_b32_e32 v4, v7
-; NOOPT-NEXT: v_mov_b32_e32 v5, v6
-; NOOPT-NEXT: buffer_store_dwordx4 v[2:5], off, s[4:7], 0
+; NOOPT-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
+; NOOPT-NEXT: v_mov_b32_e32 v2, v7
+; NOOPT-NEXT: v_mov_b32_e32 v3, v6
+; NOOPT-NEXT: v_mov_b32_e32 v4, v5
+; NOOPT-NEXT: buffer_store_dwordx4 v[1:4], off, s[4:7], 0
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_cmp_eq_u32_e64 s[2:3], v1, s0
+; NOOPT-NEXT: v_cmp_eq_u32_e64 s[2:3], v0, s0
; NOOPT-NEXT: s_mov_b64 s[0:1], exec
-; NOOPT-NEXT: v_writelane_b32 v0, s0, 13
-; NOOPT-NEXT: v_writelane_b32 v0, s1, 14
+; NOOPT-NEXT: v_writelane_b32 v32, s0, 13
+; NOOPT-NEXT: v_writelane_b32 v32, s1, 14
; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:64 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v32, off, s[28:31], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[26:27]
; NOOPT-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; NOOPT-NEXT: s_mov_b64 exec, s[0:1]
; NOOPT-NEXT: s_cbranch_execz .LBB17_8
; NOOPT-NEXT: ; %bb.7: ; %bb1
-; NOOPT-NEXT: s_waitcnt expcnt(0)
; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:68 ; 4-byte Folded Reload
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: s_mov_b32 s6, s1
@@ -6251,13 +6232,12 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1)
; NOOPT-NEXT: .LBB17_8: ; %bb2
; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:64 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[26:27]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 13
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 14
+; NOOPT-NEXT: v_readlane_b32 s0, v32, 13
+; NOOPT-NEXT: v_readlane_b32 s1, v32, 14
; NOOPT-NEXT: s_or_b64 exec, exec, s[0:1]
-; NOOPT-NEXT: ; kill: killed $vgpr0
; NOOPT-NEXT: s_endpgm
;
; SI-MOVREL-LABEL: insert_vgpr_offset_multiple_in_block:
@@ -7279,28 +7259,28 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) {
; NOOPT-NEXT: s_mov_b32 s15, 0xe8f000
; NOOPT-NEXT: s_add_u32 s12, s12, s9
; NOOPT-NEXT: s_addc_u32 s13, s13, 0
-; NOOPT-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; NOOPT-NEXT: s_load_dword s2, s[2:3], 0x9
; NOOPT-NEXT: s_mov_b64 s[0:1], -1
; NOOPT-NEXT: ; implicit-def: $sgpr3
; NOOPT-NEXT: s_mov_b32 s3, 0
; NOOPT-NEXT: s_waitcnt lgkmcnt(0)
; NOOPT-NEXT: s_cmp_lg_u32 s2, s3
-; NOOPT-NEXT: v_writelane_b32 v0, s0, 0
-; NOOPT-NEXT: v_writelane_b32 v0, s1, 1
+; NOOPT-NEXT: ; implicit-def: $vgpr4 : SGPR spill to VGPR lane
+; NOOPT-NEXT: v_writelane_b32 v4, s0, 0
+; NOOPT-NEXT: v_writelane_b32 v4, s1, 1
; NOOPT-NEXT: s_mov_b64 s[8:9], exec
; NOOPT-NEXT: s_mov_b64 exec, -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[8:9]
; NOOPT-NEXT: s_cbranch_scc1 .LBB19_3
; NOOPT-NEXT: .LBB19_1: ; %Flow
; NOOPT-NEXT: s_or_saveexec_b64 s[8:9], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[8:9]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 0
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 1
+; NOOPT-NEXT: v_readlane_b32 s0, v4, 0
+; NOOPT-NEXT: v_readlane_b32 s1, v4, 1
; NOOPT-NEXT: ; implicit-def: $sgpr2
; NOOPT-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; NOOPT-NEXT: s_mov_b32 s0, 1
@@ -7330,7 +7310,7 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) {
; NOOPT-NEXT: .LBB19_3: ; %bb4
; NOOPT-NEXT: s_or_saveexec_b64 s[8:9], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[8:9]
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: s_mov_b32 s6, s1
@@ -7342,24 +7322,21 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) {
; NOOPT-NEXT: s_mov_b32 s1, s6
; NOOPT-NEXT: s_mov_b32 s2, s5
; NOOPT-NEXT: s_mov_b32 s3, s4
-; NOOPT-NEXT: buffer_load_dwordx4 v[1:4], off, s[0:3], 0 glc
+; NOOPT-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc
; NOOPT-NEXT: s_waitcnt vmcnt(0)
; NOOPT-NEXT: ; implicit-def: $sgpr0
; NOOPT-NEXT: s_waitcnt vmcnt(0)
; NOOPT-NEXT: ;;#ASMSTART
-; NOOPT-NEXT: ; reg use v[1:4]
+; NOOPT-NEXT: ; reg use v[0:3]
; NOOPT-NEXT: ;;#ASMEND
; NOOPT-NEXT: s_mov_b64 s[0:1], 0
-; NOOPT-NEXT: v_writelane_b32 v0, s0, 0
-; NOOPT-NEXT: v_writelane_b32 v0, s1, 1
+; NOOPT-NEXT: v_writelane_b32 v4, s0, 0
+; NOOPT-NEXT: v_writelane_b32 v4, s1, 1
; NOOPT-NEXT: s_or_saveexec_b64 s[8:9], -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[8:9]
; NOOPT-NEXT: s_branch .LBB19_1
; NOOPT-NEXT: .LBB19_4: ; %bb7
-; NOOPT-NEXT: s_or_saveexec_b64 s[8:9], -1
-; NOOPT-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_mov_b64 exec, s[8:9]
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: s_mov_b32 s7, s1
@@ -7371,10 +7348,9 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) {
; NOOPT-NEXT: s_mov_b32 s1, s7
; NOOPT-NEXT: s_mov_b32 s2, s6
; NOOPT-NEXT: s_mov_b32 s3, s5
-; NOOPT-NEXT: v_mov_b32_e32 v1, s4
-; NOOPT-NEXT: buffer_store_dword v1, off, s[0:3], 0
+; NOOPT-NEXT: v_mov_b32_e32 v0, s4
+; NOOPT-NEXT: buffer_store_dword v0, off, s[0:3], 0
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: ; kill: killed $vgpr0
; NOOPT-NEXT: s_endpgm
;
; SI-MOVREL-LABEL: extract_adjacent_blocks:
@@ -7525,7 +7501,6 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) {
; NOOPT-NEXT: s_mov_b32 s19, 0xe8f000
; NOOPT-NEXT: s_add_u32 s16, s16, s9
; NOOPT-NEXT: s_addc_u32 s17, s17, 0
-; NOOPT-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; NOOPT-NEXT: s_mov_b64 s[0:1], s[2:3]
; NOOPT-NEXT: s_load_dword s2, s[0:1], 0x9
; NOOPT-NEXT: s_load_dword s0, s[0:1], 0xa
@@ -7534,21 +7509,22 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) {
; NOOPT-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7
; NOOPT-NEXT: s_mov_b32 s3, 0
; NOOPT-NEXT: s_cmp_lg_u32 s2, s3
-; NOOPT-NEXT: v_writelane_b32 v0, s0, 0
-; NOOPT-NEXT: v_writelane_b32 v0, s1, 1
+; NOOPT-NEXT: ; implicit-def: $vgpr4 : SGPR spill to VGPR lane
+; NOOPT-NEXT: v_writelane_b32 v4, s0, 0
+; NOOPT-NEXT: v_writelane_b32 v4, s1, 1
; NOOPT-NEXT: s_mov_b64 s[12:13], exec
; NOOPT-NEXT: s_mov_b64 exec, -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[16:19], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[12:13]
; NOOPT-NEXT: s_cbranch_scc1 .LBB20_3
; NOOPT-NEXT: .LBB20_1: ; %Flow
; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[16:19], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v4, off, s[16:19], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[12:13]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 0
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 1
+; NOOPT-NEXT: v_readlane_b32 s0, v4, 0
+; NOOPT-NEXT: v_readlane_b32 s1, v4, 1
; NOOPT-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7
; NOOPT-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; NOOPT-NEXT: s_mov_b32 s0, 1
@@ -7579,7 +7555,7 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) {
; NOOPT-NEXT: .LBB20_3: ; %bb4
; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[16:19], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v4, off, s[16:19], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[12:13]
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: s_mov_b32 s6, s1
@@ -7591,25 +7567,22 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) {
; NOOPT-NEXT: s_mov_b32 s1, s6
; NOOPT-NEXT: s_mov_b32 s2, s5
; NOOPT-NEXT: s_mov_b32 s3, s4
-; NOOPT-NEXT: buffer_load_dwordx4 v[1:4], off, s[0:3], 0 glc
+; NOOPT-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc
; NOOPT-NEXT: s_waitcnt vmcnt(0)
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1_sgpr2_sgpr3
-; NOOPT-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
+; NOOPT-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; NOOPT-NEXT: s_waitcnt vmcnt(0)
; NOOPT-NEXT: ;;#ASMSTART
-; NOOPT-NEXT: ; reg use v[1:4]
+; NOOPT-NEXT: ; reg use v[0:3]
; NOOPT-NEXT: ;;#ASMEND
; NOOPT-NEXT: s_mov_b64 s[0:1], 0
-; NOOPT-NEXT: v_writelane_b32 v0, s0, 0
-; NOOPT-NEXT: v_writelane_b32 v0, s1, 1
+; NOOPT-NEXT: v_writelane_b32 v4, s0, 0
+; NOOPT-NEXT: v_writelane_b32 v4, s1, 1
; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[16:19], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[12:13]
; NOOPT-NEXT: s_branch .LBB20_1
; NOOPT-NEXT: .LBB20_4: ; %bb7
-; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1
-; NOOPT-NEXT: buffer_load_dword v0, off, s[16:19], 0 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_mov_b64 exec, s[12:13]
; NOOPT-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: s_mov_b32 s10, s1
@@ -7621,13 +7594,12 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) {
; NOOPT-NEXT: s_mov_b32 s1, s10
; NOOPT-NEXT: s_mov_b32 s2, s9
; NOOPT-NEXT: s_mov_b32 s3, s8
-; NOOPT-NEXT: v_mov_b32_e32 v1, s4
-; NOOPT-NEXT: v_mov_b32_e32 v2, s5
-; NOOPT-NEXT: v_mov_b32_e32 v3, s6
-; NOOPT-NEXT: v_mov_b32_e32 v4, s7
-; NOOPT-NEXT: buffer_store_dwordx4 v[1:4], off, s[0:3], 0
+; NOOPT-NEXT: v_mov_b32_e32 v0, s4
+; NOOPT-NEXT: v_mov_b32_e32 v1, s5
+; NOOPT-NEXT: v_mov_b32_e32 v2, s6
+; NOOPT-NEXT: v_mov_b32_e32 v3, s7
+; NOOPT-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: ; kill: killed $vgpr0
; NOOPT-NEXT: s_endpgm
;
; SI-MOVREL-LABEL: insert_adjacent_blocks:
@@ -9084,49 +9056,48 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) {
; NOOPT-NEXT: s_mov_b32 s27, 0xe8f000
; NOOPT-NEXT: s_add_u32 s24, s24, s9
; NOOPT-NEXT: s_addc_u32 s25, s25, 0
-; NOOPT-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; NOOPT-NEXT: s_load_dword s1, s[2:3], 0x9
; NOOPT-NEXT: s_load_dword s0, s[2:3], 0xa
+; NOOPT-NEXT: ; implicit-def: $vgpr18 : SGPR spill to VGPR lane
; NOOPT-NEXT: s_waitcnt lgkmcnt(0)
-; NOOPT-NEXT: v_writelane_b32 v0, s1, 0
+; NOOPT-NEXT: v_writelane_b32 v18, s1, 0
; NOOPT-NEXT: s_mov_b32 s1, 8
-; NOOPT-NEXT: v_writelane_b32 v0, s0, 1
+; NOOPT-NEXT: v_writelane_b32 v18, s0, 1
; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v18, off, s[24:27], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[20:21]
-; NOOPT-NEXT: s_waitcnt expcnt(0)
; NOOPT-NEXT: v_mov_b32_e32 v0, 8
-; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill
; NOOPT-NEXT: .LBB26_1: ; %bb2
; NOOPT-NEXT: ; =>This Loop Header: Depth=1
; NOOPT-NEXT: ; Child Loop BB26_3 Depth 2
-; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1
+; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[20:21]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s2, v0, 0
-; NOOPT-NEXT: buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: v_readlane_b32 s2, v18, 0
; NOOPT-NEXT: s_mov_b64 s[0:1], -1
; NOOPT-NEXT: ; implicit-def: $sgpr4
-; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_cmp_ge_i32_e64 s[2:3], v1, s2
-; NOOPT-NEXT: v_mov_b32_e32 v1, s4
+; NOOPT-NEXT: v_cmp_ge_i32_e64 s[2:3], v0, s2
+; NOOPT-NEXT: v_mov_b32_e32 v0, s4
; NOOPT-NEXT: s_and_b64 vcc, exec, s[2:3]
-; NOOPT-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:8 ; 4-byte Folded Spill
-; NOOPT-NEXT: v_writelane_b32 v0, s0, 2
-; NOOPT-NEXT: v_writelane_b32 v0, s1, 3
+; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:8 ; 4-byte Folded Spill
+; NOOPT-NEXT: v_writelane_b32 v18, s0, 2
+; NOOPT-NEXT: v_writelane_b32 v18, s1, 3
; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v18, off, s[24:27], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[20:21]
; NOOPT-NEXT: s_cbranch_vccnz .LBB26_6
; NOOPT-NEXT: ; %bb.2: ; %bb4
; NOOPT-NEXT: ; in Loop: Header=BB26_1 Depth=1
; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1
-; NOOPT-NEXT: buffer_load_dword v16, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_waitcnt expcnt(0)
+; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[20:21]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v16, 1
+; NOOPT-NEXT: v_readlane_b32 s0, v18, 1
; NOOPT-NEXT: ; implicit-def: $sgpr2_sgpr3
; NOOPT-NEXT: ; kill: def $sgpr3 killed $sgpr3 killed $sgpr2_sgpr3
; NOOPT-NEXT: ; implicit-def: $sgpr4_sgpr5
@@ -9137,7 +9108,6 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) {
; NOOPT-NEXT: s_mov_b32 s5, s3
; NOOPT-NEXT: s_mov_b32 s6, s2
; NOOPT-NEXT: s_mov_b32 s7, s1
-; NOOPT-NEXT: s_waitcnt expcnt(0)
; NOOPT-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc
; NOOPT-NEXT: s_waitcnt vmcnt(0)
; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:80 ; 4-byte Folded Spill
@@ -9159,13 +9129,13 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) {
; NOOPT-NEXT: v_mov_b32_e32 v13, s17
; NOOPT-NEXT: v_mov_b32_e32 v14, s18
; NOOPT-NEXT: v_mov_b32_e32 v15, s19
-; NOOPT-NEXT: v_mov_b32_e32 v17, s0
-; NOOPT-NEXT: buffer_store_dword v17, off, s[24:27], 0 offset:76 ; 4-byte Folded Spill
+; NOOPT-NEXT: v_mov_b32_e32 v16, s0
+; NOOPT-NEXT: buffer_store_dword v16, off, s[24:27], 0 offset:76 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 s[0:1], exec
-; NOOPT-NEXT: v_writelane_b32 v16, s0, 4
-; NOOPT-NEXT: v_writelane_b32 v16, s1, 5
+; NOOPT-NEXT: v_writelane_b32 v18, s0, 4
+; NOOPT-NEXT: v_writelane_b32 v18, s1, 5
; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1
-; NOOPT-NEXT: buffer_store_dword v16, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v18, off, s[24:27], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[20:21]
; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:12 ; 4-byte Folded Spill
; NOOPT-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:16 ; 4-byte Folded Spill
@@ -9186,146 +9156,139 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) {
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: .LBB26_3: ; Parent Loop BB26_1 Depth=1
; NOOPT-NEXT: ; => This Inner Loop Header: Depth=2
-; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1
-; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_mov_b64 exec, s[20:21]
-; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 6
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 7
-; NOOPT-NEXT: buffer_load_dword v1, off, s[24:27], 0 offset:12 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v2, off, s[24:27], 0 offset:16 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v3, off, s[24:27], 0 offset:20 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v4, off, s[24:27], 0 offset:24 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v5, off, s[24:27], 0 offset:28 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v6, off, s[24:27], 0 offset:32 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v7, off, s[24:27], 0 offset:36 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v8, off, s[24:27], 0 offset:40 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:12 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v1, off, s[24:27], 0 offset:16 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v2, off, s[24:27], 0 offset:20 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v3, off, s[24:27], 0 offset:24 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v4, off, s[24:27], 0 offset:28 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v5, off, s[24:27], 0 offset:32 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v6, off, s[24:27], 0 offset:36 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v7, off, s[24:27], 0 offset:40 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v8, off, s[24:27], 0 offset:44 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(6)
-; NOOPT-NEXT: buffer_load_dword v9, off, s[24:27], 0 offset:44 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v9, off, s[24:27], 0 offset:48 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(5)
-; NOOPT-NEXT: buffer_load_dword v10, off, s[24:27], 0 offset:48 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v10, off, s[24:27], 0 offset:52 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(4)
-; NOOPT-NEXT: buffer_load_dword v11, off, s[24:27], 0 offset:52 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v11, off, s[24:27], 0 offset:56 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(3)
-; NOOPT-NEXT: buffer_load_dword v12, off, s[24:27], 0 offset:56 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v12, off, s[24:27], 0 offset:60 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(2)
-; NOOPT-NEXT: buffer_load_dword v13, off, s[24:27], 0 offset:60 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v13, off, s[24:27], 0 offset:64 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(1)
-; NOOPT-NEXT: buffer_load_dword v14, off, s[24:27], 0 offset:64 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v14, off, s[24:27], 0 offset:68 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v15, off, s[24:27], 0 offset:68 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v16, off, s[24:27], 0 offset:72 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v17, off, s[24:27], 0 offset:76 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 offset:80 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v15, off, s[24:27], 0 offset:72 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v16, off, s[24:27], 0 offset:76 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v17, off, s[24:27], 0 offset:80 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1
+; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_mov_b64 exec, s[20:21]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readfirstlane_b32 s2, v18
-; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v18
+; NOOPT-NEXT: v_readlane_b32 s0, v18, 6
+; NOOPT-NEXT: v_readlane_b32 s1, v18, 7
+; NOOPT-NEXT: v_readfirstlane_b32 s2, v17
+; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17
; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1]
; NOOPT-NEXT: s_mov_b32 m0, s2
-; NOOPT-NEXT: v_movreld_b32_e32 v1, v17
-; NOOPT-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:84 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v2, off, s[24:27], 0 offset:88 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v3, off, s[24:27], 0 offset:92 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v4, off, s[24:27], 0 offset:96 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v5, off, s[24:27], 0 offset:100 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v6, off, s[24:27], 0 offset:104 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v7, off, s[24:27], 0 offset:108 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v8, off, s[24:27], 0 offset:112 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v9, off, s[24:27], 0 offset:116 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v10, off, s[24:27], 0 offset:120 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v11, off, s[24:27], 0 offset:124 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v12, off, s[24:27], 0 offset:128 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v13, off, s[24:27], 0 offset:132 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v14, off, s[24:27], 0 offset:136 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v15, off, s[24:27], 0 offset:140 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v16, off, s[24:27], 0 offset:144 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:12 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v2, off, s[24:27], 0 offset:16 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v3, off, s[24:27], 0 offset:20 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v4, off, s[24:27], 0 offset:24 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v5, off, s[24:27], 0 offset:28 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v6, off, s[24:27], 0 offset:32 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v7, off, s[24:27], 0 offset:36 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v8, off, s[24:27], 0 offset:40 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v9, off, s[24:27], 0 offset:44 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v10, off, s[24:27], 0 offset:48 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v11, off, s[24:27], 0 offset:52 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v12, off, s[24:27], 0 offset:56 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v13, off, s[24:27], 0 offset:60 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v14, off, s[24:27], 0 offset:64 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v15, off, s[24:27], 0 offset:68 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v16, off, s[24:27], 0 offset:72 ; 4-byte Folded Spill
+; NOOPT-NEXT: v_movreld_b32_e32 v0, v16
+; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:84 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:88 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v2, off, s[24:27], 0 offset:92 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v3, off, s[24:27], 0 offset:96 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[24:27], 0 offset:100 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v5, off, s[24:27], 0 offset:104 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v6, off, s[24:27], 0 offset:108 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v7, off, s[24:27], 0 offset:112 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v8, off, s[24:27], 0 offset:116 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v9, off, s[24:27], 0 offset:120 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v10, off, s[24:27], 0 offset:124 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v11, off, s[24:27], 0 offset:128 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v12, off, s[24:27], 0 offset:132 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v13, off, s[24:27], 0 offset:136 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v14, off, s[24:27], 0 offset:140 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v15, off, s[24:27], 0 offset:144 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:12 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:16 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v2, off, s[24:27], 0 offset:20 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v3, off, s[24:27], 0 offset:24 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[24:27], 0 offset:28 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v5, off, s[24:27], 0 offset:32 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v6, off, s[24:27], 0 offset:36 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v7, off, s[24:27], 0 offset:40 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v8, off, s[24:27], 0 offset:44 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v9, off, s[24:27], 0 offset:48 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v10, off, s[24:27], 0 offset:52 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v11, off, s[24:27], 0 offset:56 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v12, off, s[24:27], 0 offset:60 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v13, off, s[24:27], 0 offset:64 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v14, off, s[24:27], 0 offset:68 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v15, off, s[24:27], 0 offset:72 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 s[2:3], s[0:1]
-; NOOPT-NEXT: v_writelane_b32 v0, s2, 6
-; NOOPT-NEXT: v_writelane_b32 v0, s3, 7
+; NOOPT-NEXT: v_writelane_b32 v18, s2, 6
+; NOOPT-NEXT: v_writelane_b32 v18, s3, 7
; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v18, off, s[24:27], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[20:21]
; NOOPT-NEXT: s_xor_b64 exec, exec, s[0:1]
; NOOPT-NEXT: s_cbranch_execnz .LBB26_3
; NOOPT-NEXT: ; %bb.4: ; in Loop: Header=BB26_1 Depth=1
; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[20:21]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 4
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 5
+; NOOPT-NEXT: v_readlane_b32 s0, v18, 4
+; NOOPT-NEXT: v_readlane_b32 s1, v18, 5
; NOOPT-NEXT: s_mov_b64 exec, s[0:1]
; NOOPT-NEXT: ; %bb.5: ; in Loop: Header=BB26_1 Depth=1
+; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:84 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v1, off, s[24:27], 0 offset:88 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v2, off, s[24:27], 0 offset:92 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v3, off, s[24:27], 0 offset:96 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v4, off, s[24:27], 0 offset:100 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v5, off, s[24:27], 0 offset:104 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v6, off, s[24:27], 0 offset:108 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v7, off, s[24:27], 0 offset:112 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v8, off, s[24:27], 0 offset:116 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v9, off, s[24:27], 0 offset:120 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v10, off, s[24:27], 0 offset:124 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v11, off, s[24:27], 0 offset:128 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v12, off, s[24:27], 0 offset:132 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v13, off, s[24:27], 0 offset:136 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v14, off, s[24:27], 0 offset:140 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v15, off, s[24:27], 0 offset:144 ; 4-byte Folded Reload
; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1
-; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[20:21]
-; NOOPT-NEXT: buffer_load_dword v1, off, s[24:27], 0 offset:84 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v2, off, s[24:27], 0 offset:88 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v3, off, s[24:27], 0 offset:92 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v4, off, s[24:27], 0 offset:96 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v5, off, s[24:27], 0 offset:100 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v6, off, s[24:27], 0 offset:104 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v7, off, s[24:27], 0 offset:108 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v8, off, s[24:27], 0 offset:112 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v9, off, s[24:27], 0 offset:116 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v10, off, s[24:27], 0 offset:120 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v11, off, s[24:27], 0 offset:124 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v12, off, s[24:27], 0 offset:128 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v13, off, s[24:27], 0 offset:132 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v14, off, s[24:27], 0 offset:136 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v15, off, s[24:27], 0 offset:140 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v16, off, s[24:27], 0 offset:144 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 s[0:1], 0
; NOOPT-NEXT: s_waitcnt vmcnt(14)
-; NOOPT-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:8 ; 4-byte Folded Spill
-; NOOPT-NEXT: v_writelane_b32 v0, s0, 2
-; NOOPT-NEXT: v_writelane_b32 v0, s1, 3
+; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:8 ; 4-byte Folded Spill
+; NOOPT-NEXT: s_waitcnt vmcnt(1)
+; NOOPT-NEXT: v_writelane_b32 v18, s0, 2
+; NOOPT-NEXT: v_writelane_b32 v18, s1, 3
; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v18, off, s[24:27], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[20:21]
; NOOPT-NEXT: .LBB26_6: ; %Flow
; NOOPT-NEXT: ; in Loop: Header=BB26_1 Depth=1
-; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1
; NOOPT-NEXT: s_waitcnt expcnt(1)
-; NOOPT-NEXT: buffer_load_dword v1, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:8 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1
+; NOOPT-NEXT: s_waitcnt expcnt(0)
+; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[20:21]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v1, 2
-; NOOPT-NEXT: v_readlane_b32 s1, v1, 3
-; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:8 ; 4-byte Folded Reload
+; NOOPT-NEXT: v_readlane_b32 s0, v18, 2
+; NOOPT-NEXT: v_readlane_b32 s1, v18, 3
; NOOPT-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
; NOOPT-NEXT: s_mov_b32 s0, 1
; NOOPT-NEXT: ; implicit-def: $sgpr1
; NOOPT-NEXT: v_cmp_ne_u32_e64 s[0:1], v1, s0
; NOOPT-NEXT: s_and_b64 vcc, exec, s[0:1]
-; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill
; NOOPT-NEXT: s_cbranch_vccnz .LBB26_1
; NOOPT-NEXT: ; %bb.7: ; %bb8
-; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1
-; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_mov_b64 exec, s[20:21]
-; NOOPT-NEXT: ; kill: killed $vgpr0
; NOOPT-NEXT: s_endpgm
;
; SI-MOVREL-LABEL: broken_phi_bb:
@@ -9570,13 +9533,13 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace
; NOOPT-NEXT: s_mov_b32 s19, 0xe8f000
; NOOPT-NEXT: s_add_u32 s16, s16, s5
; NOOPT-NEXT: s_addc_u32 s17, s17, 0
-; NOOPT-NEXT: ; implicit-def: $vgpr16 : SGPR spill to VGPR lane
-; NOOPT-NEXT: v_writelane_b32 v16, s4, 0
+; NOOPT-NEXT: ; implicit-def: $vgpr33 : SGPR spill to VGPR lane
+; NOOPT-NEXT: v_writelane_b32 v33, s4, 0
; NOOPT-NEXT: s_mov_b32 s4, s1
-; NOOPT-NEXT: v_readlane_b32 s1, v16, 0
-; NOOPT-NEXT: v_writelane_b32 v16, s4, 1
+; NOOPT-NEXT: v_readlane_b32 s1, v33, 0
+; NOOPT-NEXT: v_writelane_b32 v33, s4, 1
; NOOPT-NEXT: s_mov_b32 s4, s0
-; NOOPT-NEXT: v_readlane_b32 s0, v16, 1
+; NOOPT-NEXT: v_readlane_b32 s0, v33, 1
; NOOPT-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:144 ; 4-byte Folded Spill
; NOOPT-NEXT: v_mov_b32_e32 v2, v1
; NOOPT-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
@@ -9591,17 +9554,17 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace
; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:140 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b32 s8, 0xf000
; NOOPT-NEXT: s_mov_b32 s0, 0
-; NOOPT-NEXT: v_writelane_b32 v16, s0, 2
+; NOOPT-NEXT: v_writelane_b32 v33, s0, 2
; NOOPT-NEXT: s_mov_b32 s2, s0
; NOOPT-NEXT: s_mov_b32 s3, s8
; NOOPT-NEXT: s_mov_b32 s8, s0
; NOOPT-NEXT: s_mov_b32 s9, s0
; NOOPT-NEXT: ; kill: def $sgpr8_sgpr9 killed $sgpr8_sgpr9 def $sgpr8_sgpr9_sgpr10_sgpr11
; NOOPT-NEXT: s_mov_b64 s[10:11], s[2:3]
-; NOOPT-NEXT: v_writelane_b32 v16, s8, 3
-; NOOPT-NEXT: v_writelane_b32 v16, s9, 4
-; NOOPT-NEXT: v_writelane_b32 v16, s10, 5
-; NOOPT-NEXT: v_writelane_b32 v16, s11, 6
+; NOOPT-NEXT: v_writelane_b32 v33, s8, 3
+; NOOPT-NEXT: v_writelane_b32 v33, s9, 4
+; NOOPT-NEXT: v_writelane_b32 v33, s10, 5
+; NOOPT-NEXT: v_writelane_b32 v33, s11, 6
; NOOPT-NEXT: ; kill: def $sgpr8_sgpr9_sgpr10_sgpr11 killed $sgpr4_sgpr5_sgpr6_sgpr7
; NOOPT-NEXT: ; implicit-def: $sgpr2_sgpr3
; NOOPT-NEXT: s_waitcnt expcnt(1)
@@ -9611,7 +9574,6 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace
; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:132 ; 4-byte Folded Spill
; NOOPT-NEXT: s_waitcnt expcnt(0)
; NOOPT-NEXT: v_mov_b32_e32 v0, s0
-; NOOPT-NEXT: v_mov_b32_e32 v31, s0
; NOOPT-NEXT: v_mov_b32_e32 v30, s0
; NOOPT-NEXT: v_mov_b32_e32 v29, s0
; NOOPT-NEXT: v_mov_b32_e32 v28, s0
@@ -9626,22 +9588,23 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace
; NOOPT-NEXT: v_mov_b32_e32 v19, s0
; NOOPT-NEXT: v_mov_b32_e32 v18, s0
; NOOPT-NEXT: v_mov_b32_e32 v17, s0
+; NOOPT-NEXT: v_mov_b32_e32 v16, s0
; NOOPT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 killed $exec
-; NOOPT-NEXT: v_mov_b32_e32 v1, v31
-; NOOPT-NEXT: v_mov_b32_e32 v2, v30
-; NOOPT-NEXT: v_mov_b32_e32 v3, v29
-; NOOPT-NEXT: v_mov_b32_e32 v4, v28
-; NOOPT-NEXT: v_mov_b32_e32 v5, v27
-; NOOPT-NEXT: v_mov_b32_e32 v6, v26
-; NOOPT-NEXT: v_mov_b32_e32 v7, v25
-; NOOPT-NEXT: v_mov_b32_e32 v8, v24
-; NOOPT-NEXT: v_mov_b32_e32 v9, v23
-; NOOPT-NEXT: v_mov_b32_e32 v10, v22
-; NOOPT-NEXT: v_mov_b32_e32 v11, v21
-; NOOPT-NEXT: v_mov_b32_e32 v12, v20
-; NOOPT-NEXT: v_mov_b32_e32 v13, v19
-; NOOPT-NEXT: v_mov_b32_e32 v14, v18
-; NOOPT-NEXT: v_mov_b32_e32 v15, v17
+; NOOPT-NEXT: v_mov_b32_e32 v1, v30
+; NOOPT-NEXT: v_mov_b32_e32 v2, v29
+; NOOPT-NEXT: v_mov_b32_e32 v3, v28
+; NOOPT-NEXT: v_mov_b32_e32 v4, v27
+; NOOPT-NEXT: v_mov_b32_e32 v5, v26
+; NOOPT-NEXT: v_mov_b32_e32 v6, v25
+; NOOPT-NEXT: v_mov_b32_e32 v7, v24
+; NOOPT-NEXT: v_mov_b32_e32 v8, v23
+; NOOPT-NEXT: v_mov_b32_e32 v9, v22
+; NOOPT-NEXT: v_mov_b32_e32 v10, v21
+; NOOPT-NEXT: v_mov_b32_e32 v11, v20
+; NOOPT-NEXT: v_mov_b32_e32 v12, v19
+; NOOPT-NEXT: v_mov_b32_e32 v13, v18
+; NOOPT-NEXT: v_mov_b32_e32 v14, v17
+; NOOPT-NEXT: v_mov_b32_e32 v15, v16
; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:68 ; 4-byte Folded Spill
; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:72 ; 4-byte Folded Spill
; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:76 ; 4-byte Folded Spill
@@ -9659,207 +9622,200 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace
; NOOPT-NEXT: buffer_store_dword v14, off, s[16:19], 0 offset:124 ; 4-byte Folded Spill
; NOOPT-NEXT: buffer_store_dword v15, off, s[16:19], 0 offset:128 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 s[0:1], exec
-; NOOPT-NEXT: v_writelane_b32 v16, s0, 7
-; NOOPT-NEXT: v_writelane_b32 v16, s1, 8
+; NOOPT-NEXT: v_writelane_b32 v33, s0, 7
+; NOOPT-NEXT: v_writelane_b32 v33, s1, 8
; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1
-; NOOPT-NEXT: buffer_store_dword v16, off, s[16:19], 0 offset:64 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v33, off, s[16:19], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[12:13]
-; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v5, off, s[16:19], 0 offset:20 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v6, off, s[16:19], 0 offset:24 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v7, off, s[16:19], 0 offset:28 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v8, off, s[16:19], 0 offset:32 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v9, off, s[16:19], 0 offset:36 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v10, off, s[16:19], 0 offset:40 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v11, off, s[16:19], 0 offset:44 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v12, off, s[16:19], 0 offset:48 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v13, off, s[16:19], 0 offset:52 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v14, off, s[16:19], 0 offset:56 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v15, off, s[16:19], 0 offset:60 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:20 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v5, off, s[16:19], 0 offset:24 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v6, off, s[16:19], 0 offset:28 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v7, off, s[16:19], 0 offset:32 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v8, off, s[16:19], 0 offset:36 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v9, off, s[16:19], 0 offset:40 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v10, off, s[16:19], 0 offset:44 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v11, off, s[16:19], 0 offset:48 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v12, off, s[16:19], 0 offset:52 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v13, off, s[16:19], 0 offset:56 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v14, off, s[16:19], 0 offset:60 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v15, off, s[16:19], 0 offset:64 ; 4-byte Folded Spill
; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1
; NOOPT-NEXT: .LBB27_1: ; =>This Inner Loop Header: Depth=1
-; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1
-; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:64 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_mov_b64 exec, s[12:13]
-; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 9
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 10
-; NOOPT-NEXT: buffer_load_dword v1, off, s[16:19], 0 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v2, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v4, off, s[16:19], 0 offset:12 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v5, off, s[16:19], 0 offset:16 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v6, off, s[16:19], 0 offset:20 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v7, off, s[16:19], 0 offset:24 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v8, off, s[16:19], 0 offset:28 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v2, off, s[16:19], 0 offset:12 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:16 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v4, off, s[16:19], 0 offset:20 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v5, off, s[16:19], 0 offset:24 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v6, off, s[16:19], 0 offset:28 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v7, off, s[16:19], 0 offset:32 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v8, off, s[16:19], 0 offset:36 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(6)
-; NOOPT-NEXT: buffer_load_dword v9, off, s[16:19], 0 offset:32 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v9, off, s[16:19], 0 offset:40 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(5)
-; NOOPT-NEXT: buffer_load_dword v10, off, s[16:19], 0 offset:36 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v10, off, s[16:19], 0 offset:44 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(4)
-; NOOPT-NEXT: buffer_load_dword v11, off, s[16:19], 0 offset:40 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v11, off, s[16:19], 0 offset:48 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(3)
-; NOOPT-NEXT: buffer_load_dword v12, off, s[16:19], 0 offset:44 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v12, off, s[16:19], 0 offset:52 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(2)
-; NOOPT-NEXT: buffer_load_dword v13, off, s[16:19], 0 offset:48 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v13, off, s[16:19], 0 offset:56 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(1)
-; NOOPT-NEXT: buffer_load_dword v14, off, s[16:19], 0 offset:52 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v14, off, s[16:19], 0 offset:60 ; 4-byte Folded Reload
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v15, off, s[16:19], 0 offset:56 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v16, off, s[16:19], 0 offset:60 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v17, off, s[16:19], 0 offset:144 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v18, off, s[16:19], 0 offset:132 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v15, off, s[16:19], 0 offset:64 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v16, off, s[16:19], 0 offset:144 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v17, off, s[16:19], 0 offset:132 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1
+; NOOPT-NEXT: buffer_load_dword v33, off, s[16:19], 0 ; 4-byte Folded Reload
+; NOOPT-NEXT: s_mov_b64 exec, s[12:13]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readfirstlane_b32 s2, v18
-; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v18
+; NOOPT-NEXT: v_readlane_b32 s0, v33, 9
+; NOOPT-NEXT: v_readlane_b32 s1, v33, 10
+; NOOPT-NEXT: v_readfirstlane_b32 s2, v17
+; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17
; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1]
; NOOPT-NEXT: s_mov_b32 m0, s2
-; NOOPT-NEXT: v_movreld_b32_e32 v2, v17
-; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:148 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:152 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:156 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:160 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v5, off, s[16:19], 0 offset:164 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v6, off, s[16:19], 0 offset:168 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v7, off, s[16:19], 0 offset:172 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v8, off, s[16:19], 0 offset:176 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v9, off, s[16:19], 0 offset:180 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v10, off, s[16:19], 0 offset:184 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v11, off, s[16:19], 0 offset:188 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v12, off, s[16:19], 0 offset:192 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v13, off, s[16:19], 0 offset:196 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v14, off, s[16:19], 0 offset:200 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v15, off, s[16:19], 0 offset:204 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v16, off, s[16:19], 0 offset:208 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v5, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v6, off, s[16:19], 0 offset:20 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v7, off, s[16:19], 0 offset:24 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v8, off, s[16:19], 0 offset:28 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v9, off, s[16:19], 0 offset:32 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v10, off, s[16:19], 0 offset:36 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v11, off, s[16:19], 0 offset:40 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v12, off, s[16:19], 0 offset:44 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v13, off, s[16:19], 0 offset:48 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v14, off, s[16:19], 0 offset:52 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v15, off, s[16:19], 0 offset:56 ; 4-byte Folded Spill
-; NOOPT-NEXT: buffer_store_dword v16, off, s[16:19], 0 offset:60 ; 4-byte Folded Spill
+; NOOPT-NEXT: v_movreld_b32_e32 v1, v16
+; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:148 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:152 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:156 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:160 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:164 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v5, off, s[16:19], 0 offset:168 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v6, off, s[16:19], 0 offset:172 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v7, off, s[16:19], 0 offset:176 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v8, off, s[16:19], 0 offset:180 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v9, off, s[16:19], 0 offset:184 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v10, off, s[16:19], 0 offset:188 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v11, off, s[16:19], 0 offset:192 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v12, off, s[16:19], 0 offset:196 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v13, off, s[16:19], 0 offset:200 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v14, off, s[16:19], 0 offset:204 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v15, off, s[16:19], 0 offset:208 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:20 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v5, off, s[16:19], 0 offset:24 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v6, off, s[16:19], 0 offset:28 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v7, off, s[16:19], 0 offset:32 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v8, off, s[16:19], 0 offset:36 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v9, off, s[16:19], 0 offset:40 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v10, off, s[16:19], 0 offset:44 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v11, off, s[16:19], 0 offset:48 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v12, off, s[16:19], 0 offset:52 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v13, off, s[16:19], 0 offset:56 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v14, off, s[16:19], 0 offset:60 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v15, off, s[16:19], 0 offset:64 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 s[2:3], s[0:1]
-; NOOPT-NEXT: v_writelane_b32 v0, s2, 9
-; NOOPT-NEXT: v_writelane_b32 v0, s3, 10
+; NOOPT-NEXT: v_writelane_b32 v33, s2, 9
+; NOOPT-NEXT: v_writelane_b32 v33, s3, 10
; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1
-; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:64 ; 4-byte Folded Spill
+; NOOPT-NEXT: buffer_store_dword v33, off, s[16:19], 0 ; 4-byte Folded Spill
; NOOPT-NEXT: s_mov_b64 exec, s[12:13]
; NOOPT-NEXT: s_xor_b64 exec, exec, s[0:1]
; NOOPT-NEXT: s_cbranch_execnz .LBB27_1
; NOOPT-NEXT: ; %bb.2:
; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:64 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v33, off, s[16:19], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[12:13]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 7
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 8
+; NOOPT-NEXT: v_readlane_b32 s0, v33, 7
+; NOOPT-NEXT: v_readlane_b32 s1, v33, 8
; NOOPT-NEXT: s_mov_b64 exec, s[0:1]
; NOOPT-NEXT: ; %bb.3:
+; NOOPT-NEXT: buffer_load_dword v4, off, s[16:19], 0 offset:136 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v5, off, s[16:19], 0 offset:140 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v17, off, s[16:19], 0 offset:148 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v18, off, s[16:19], 0 offset:152 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v19, off, s[16:19], 0 offset:156 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v20, off, s[16:19], 0 offset:160 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v21, off, s[16:19], 0 offset:164 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v22, off, s[16:19], 0 offset:168 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v23, off, s[16:19], 0 offset:172 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v24, off, s[16:19], 0 offset:176 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v25, off, s[16:19], 0 offset:180 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v26, off, s[16:19], 0 offset:184 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v27, off, s[16:19], 0 offset:188 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v28, off, s[16:19], 0 offset:192 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v29, off, s[16:19], 0 offset:196 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v30, off, s[16:19], 0 offset:200 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v31, off, s[16:19], 0 offset:204 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v32, off, s[16:19], 0 offset:208 ; 4-byte Folded Reload
; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1
-; NOOPT-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:64 ; 4-byte Folded Reload
+; NOOPT-NEXT: buffer_load_dword v33, off, s[16:19], 0 ; 4-byte Folded Reload
; NOOPT-NEXT: s_mov_b64 exec, s[12:13]
; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_readlane_b32 s0, v0, 3
-; NOOPT-NEXT: v_readlane_b32 s1, v0, 4
-; NOOPT-NEXT: v_readlane_b32 s2, v0, 5
-; NOOPT-NEXT: v_readlane_b32 s3, v0, 6
-; NOOPT-NEXT: buffer_load_dword v5, off, s[16:19], 0 offset:136 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v6, off, s[16:19], 0 offset:140 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v18, off, s[16:19], 0 offset:148 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v19, off, s[16:19], 0 offset:152 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v20, off, s[16:19], 0 offset:156 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v21, off, s[16:19], 0 offset:160 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v22, off, s[16:19], 0 offset:164 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v23, off, s[16:19], 0 offset:168 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v24, off, s[16:19], 0 offset:172 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v25, off, s[16:19], 0 offset:176 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v26, off, s[16:19], 0 offset:180 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v27, off, s[16:19], 0 offset:184 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v28, off, s[16:19], 0 offset:188 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v29, off, s[16:19], 0 offset:192 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v30, off, s[16:19], 0 offset:196 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v31, off, s[16:19], 0 offset:200 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v32, off, s[16:19], 0 offset:204 ; 4-byte Folded Reload
-; NOOPT-NEXT: buffer_load_dword v33, off, s[16:19], 0 offset:208 ; 4-byte Folded Reload
-; NOOPT-NEXT: s_waitcnt vmcnt(12)
-; NOOPT-NEXT: v_mov_b32_e32 v7, v21
-; NOOPT-NEXT: v_mov_b32_e32 v8, v20
-; NOOPT-NEXT: v_mov_b32_e32 v9, v19
-; NOOPT-NEXT: v_mov_b32_e32 v1, v18
-; NOOPT-NEXT: s_waitcnt vmcnt(8)
-; NOOPT-NEXT: v_mov_b32_e32 v2, v25
-; NOOPT-NEXT: v_mov_b32_e32 v3, v24
-; NOOPT-NEXT: v_mov_b32_e32 v4, v23
-; NOOPT-NEXT: v_mov_b32_e32 v10, v22
-; NOOPT-NEXT: s_waitcnt vmcnt(4)
-; NOOPT-NEXT: v_mov_b32_e32 v15, v29
-; NOOPT-NEXT: v_mov_b32_e32 v16, v28
-; NOOPT-NEXT: v_mov_b32_e32 v17, v27
-; NOOPT-NEXT: v_mov_b32_e32 v11, v26
-; NOOPT-NEXT: s_waitcnt vmcnt(0)
-; NOOPT-NEXT: v_mov_b32_e32 v12, v33
-; NOOPT-NEXT: v_mov_b32_e32 v13, v32
-; NOOPT-NEXT: v_mov_b32_e32 v14, v31
-; NOOPT-NEXT: v_mov_b32_e32 v18, v30
+; NOOPT-NEXT: v_readlane_b32 s0, v33, 3
+; NOOPT-NEXT: v_readlane_b32 s1, v33, 4
+; NOOPT-NEXT: v_readlane_b32 s2, v33, 5
+; NOOPT-NEXT: v_readlane_b32 s3, v33, 6
+; NOOPT-NEXT: v_mov_b32_e32 v6, v20
+; NOOPT-NEXT: v_mov_b32_e32 v7, v19
+; NOOPT-NEXT: v_mov_b32_e32 v8, v18
+; NOOPT-NEXT: v_mov_b32_e32 v0, v17
+; NOOPT-NEXT: v_mov_b32_e32 v1, v24
+; NOOPT-NEXT: v_mov_b32_e32 v2, v23
+; NOOPT-NEXT: v_mov_b32_e32 v3, v22
+; NOOPT-NEXT: v_mov_b32_e32 v9, v21
+; NOOPT-NEXT: v_mov_b32_e32 v14, v28
+; NOOPT-NEXT: v_mov_b32_e32 v15, v27
+; NOOPT-NEXT: v_mov_b32_e32 v16, v26
+; NOOPT-NEXT: v_mov_b32_e32 v10, v25
+; NOOPT-NEXT: v_mov_b32_e32 v11, v32
+; NOOPT-NEXT: v_mov_b32_e32 v12, v31
+; NOOPT-NEXT: v_mov_b32_e32 v13, v30
+; NOOPT-NEXT: v_mov_b32_e32 v17, v29
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
-; NOOPT-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19_vgpr20_vgpr21 killed $exec
-; NOOPT-NEXT: v_mov_b32_e32 v19, v14
-; NOOPT-NEXT: v_mov_b32_e32 v20, v13
-; NOOPT-NEXT: v_mov_b32_e32 v21, v12
-; NOOPT-NEXT: v_mov_b32_e32 v13, v6
+; NOOPT-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18_vgpr19_vgpr20 killed $exec
+; NOOPT-NEXT: v_mov_b32_e32 v18, v13
+; NOOPT-NEXT: v_mov_b32_e32 v19, v12
+; NOOPT-NEXT: v_mov_b32_e32 v20, v11
; NOOPT-NEXT: v_mov_b32_e32 v12, v5
-; NOOPT-NEXT: buffer_store_dwordx4 v[18:21], v[12:13], s[0:3], 0 addr64 offset:48
+; NOOPT-NEXT: v_mov_b32_e32 v11, v4
+; NOOPT-NEXT: buffer_store_dwordx4 v[17:20], v[11:12], s[0:3], 0 addr64 offset:48
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
-; NOOPT-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12_vgpr13_vgpr14 killed $exec
-; NOOPT-NEXT: v_mov_b32_e32 v12, v17
-; NOOPT-NEXT: v_mov_b32_e32 v13, v16
-; NOOPT-NEXT: v_mov_b32_e32 v14, v15
-; NOOPT-NEXT: v_mov_b32_e32 v16, v6
+; NOOPT-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11_vgpr12_vgpr13 killed $exec
+; NOOPT-NEXT: v_mov_b32_e32 v11, v16
+; NOOPT-NEXT: v_mov_b32_e32 v12, v15
+; NOOPT-NEXT: v_mov_b32_e32 v13, v14
; NOOPT-NEXT: v_mov_b32_e32 v15, v5
-; NOOPT-NEXT: buffer_store_dwordx4 v[11:14], v[15:16], s[0:3], 0 addr64 offset:32
+; NOOPT-NEXT: v_mov_b32_e32 v14, v4
+; NOOPT-NEXT: buffer_store_dwordx4 v[10:13], v[14:15], s[0:3], 0 addr64 offset:32
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
-; NOOPT-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11_vgpr12_vgpr13 killed $exec
+; NOOPT-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10_vgpr11_vgpr12 killed $exec
; NOOPT-NEXT: s_waitcnt expcnt(0)
-; NOOPT-NEXT: v_mov_b32_e32 v11, v4
-; NOOPT-NEXT: v_mov_b32_e32 v12, v3
-; NOOPT-NEXT: v_mov_b32_e32 v13, v2
+; NOOPT-NEXT: v_mov_b32_e32 v10, v3
+; NOOPT-NEXT: v_mov_b32_e32 v11, v2
+; NOOPT-NEXT: v_mov_b32_e32 v12, v1
+; NOOPT-NEXT: v_mov_b32_e32 v1, v4
; NOOPT-NEXT: v_mov_b32_e32 v2, v5
-; NOOPT-NEXT: v_mov_b32_e32 v3, v6
-; NOOPT-NEXT: buffer_store_dwordx4 v[10:13], v[2:3], s[0:3], 0 addr64 offset:16
+; NOOPT-NEXT: buffer_store_dwordx4 v[9:12], v[1:2], s[0:3], 0 addr64 offset:16
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
; NOOPT-NEXT: ; implicit-def: $sgpr4
-; NOOPT-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
-; NOOPT-NEXT: v_mov_b32_e32 v2, v9
-; NOOPT-NEXT: v_mov_b32_e32 v3, v8
-; NOOPT-NEXT: v_mov_b32_e32 v4, v7
-; NOOPT-NEXT: buffer_store_dwordx4 v[1:4], v[5:6], s[0:3], 0 addr64
-; NOOPT-NEXT: ; kill: killed $vgpr0
+; NOOPT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; NOOPT-NEXT: v_mov_b32_e32 v1, v8
+; NOOPT-NEXT: v_mov_b32_e32 v2, v7
+; NOOPT-NEXT: v_mov_b32_e32 v3, v6
+; NOOPT-NEXT: buffer_store_dwordx4 v[0:3], v[4:5], s[0:3], 0 addr64
; NOOPT-NEXT: s_endpgm
;
; SI-MOVREL-LABEL: insert_or_disj_index:
diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
index 9e336a714ca67f..eef51acc4e12e5 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
@@ -7,13 +7,13 @@ define fastcc i32 @foo() {
; CHECK-LABEL: name: foo
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: successors: %bb.1(0x80000000)
- ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $vgpr40, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_WAITCNT 0
; CHECK-NEXT: $sgpr16 = S_MOV_B32 $sgpr33
; CHECK-NEXT: $sgpr33 = S_MOV_B32 $sgpr32
; CHECK-NEXT: $sgpr17 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr17
; CHECK-NEXT: $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 512, implicit-def dead $scc
; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40
@@ -26,24 +26,22 @@ define fastcc i32 @foo() {
; CHECK-NEXT: BUFFER_GL1_INV implicit $exec
; CHECK-NEXT: BUFFER_GL0_INV implicit $exec
; CHECK-NEXT: renamable $sgpr16_sgpr17 = S_LOAD_DWORDX2_IMM killed renamable $sgpr16_sgpr17, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
- ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr30, 0, killed $vgpr40
- ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, killed $vgpr40
+ ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr40
+ ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr40
; CHECK-NEXT: S_WAITCNT 49279
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @bar, csr_amdgpu, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit killed $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $vcc_lo = S_MOV_B32 $exec_lo
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1 (%ir-block.1):
; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
- ; CHECK-NEXT: liveins: $vcc_lo, $vgpr40
+ ; CHECK-NEXT: liveins: $vcc_lo
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.DummyReturnBlock:
- ; CHECK-NEXT: liveins: $vgpr40
- ; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sgpr31 = V_READLANE_B32 $vgpr40, 1
; CHECK-NEXT: $sgpr30 = V_READLANE_B32 $vgpr40, 0
- ; CHECK-NEXT: $sgpr4 = V_READLANE_B32 killed $vgpr40, 2
+ ; CHECK-NEXT: $sgpr4 = V_READLANE_B32 $vgpr40, 2
; CHECK-NEXT: $sgpr5 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
index f7715364637787..ea18e0d9eeefbd 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll
@@ -1010,73 +1010,73 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
; GCN-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x34
; GCN-NEXT: s_load_dword s8, s[2:3], 0x44
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
-; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
+; GCN-NEXT: ; implicit-def: $vgpr6 : SGPR spill to VGPR lane
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_bfe_u32 s9, s4, 0xf0001
; GCN-NEXT: s_lshr_b32 s42, s5, 16
-; GCN-NEXT: v_writelane_b32 v0, s0, 0
-; GCN-NEXT: v_writelane_b32 v0, s1, 1
+; GCN-NEXT: v_writelane_b32 v6, s0, 0
+; GCN-NEXT: v_writelane_b32 v6, s1, 1
; GCN-NEXT: s_lshr_b32 s0, s4, 16
-; GCN-NEXT: v_writelane_b32 v0, s0, 2
+; GCN-NEXT: v_writelane_b32 v6, s0, 2
; GCN-NEXT: s_lshr_b32 s0, s4, 17
-; GCN-NEXT: v_writelane_b32 v0, s0, 3
+; GCN-NEXT: v_writelane_b32 v6, s0, 3
; GCN-NEXT: s_lshr_b32 s0, s4, 18
-; GCN-NEXT: v_writelane_b32 v0, s0, 4
+; GCN-NEXT: v_writelane_b32 v6, s0, 4
; GCN-NEXT: s_lshr_b32 s0, s4, 19
-; GCN-NEXT: v_writelane_b32 v0, s0, 5
+; GCN-NEXT: v_writelane_b32 v6, s0, 5
; GCN-NEXT: s_lshr_b32 s0, s4, 20
-; GCN-NEXT: v_writelane_b32 v0, s0, 6
+; GCN-NEXT: v_writelane_b32 v6, s0, 6
; GCN-NEXT: s_lshr_b32 s0, s4, 21
-; GCN-NEXT: v_writelane_b32 v0, s0, 7
+; GCN-NEXT: v_writelane_b32 v6, s0, 7
; GCN-NEXT: s_lshr_b32 s0, s4, 22
-; GCN-NEXT: v_writelane_b32 v0, s0, 8
+; GCN-NEXT: v_writelane_b32 v6, s0, 8
; GCN-NEXT: s_lshr_b32 s0, s4, 23
-; GCN-NEXT: v_writelane_b32 v0, s0, 9
+; GCN-NEXT: v_writelane_b32 v6, s0, 9
; GCN-NEXT: s_lshr_b32 s0, s4, 24
-; GCN-NEXT: v_writelane_b32 v0, s0, 10
+; GCN-NEXT: v_writelane_b32 v6, s0, 10
; GCN-NEXT: s_lshr_b32 s0, s4, 25
-; GCN-NEXT: v_writelane_b32 v0, s0, 11
+; GCN-NEXT: v_writelane_b32 v6, s0, 11
; GCN-NEXT: s_lshr_b32 s0, s4, 26
-; GCN-NEXT: v_writelane_b32 v0, s0, 12
+; GCN-NEXT: v_writelane_b32 v6, s0, 12
; GCN-NEXT: s_lshr_b32 s0, s4, 27
-; GCN-NEXT: v_writelane_b32 v0, s0, 13
+; GCN-NEXT: v_writelane_b32 v6, s0, 13
; GCN-NEXT: s_lshr_b32 s0, s4, 28
-; GCN-NEXT: v_writelane_b32 v0, s0, 14
+; GCN-NEXT: v_writelane_b32 v6, s0, 14
; GCN-NEXT: s_lshr_b32 s0, s4, 29
-; GCN-NEXT: v_writelane_b32 v0, s0, 15
+; GCN-NEXT: v_writelane_b32 v6, s0, 15
; GCN-NEXT: s_lshr_b32 s0, s4, 30
-; GCN-NEXT: v_writelane_b32 v0, s0, 16
+; GCN-NEXT: v_writelane_b32 v6, s0, 16
; GCN-NEXT: s_lshr_b32 s0, s4, 31
-; GCN-NEXT: v_writelane_b32 v0, s0, 17
-; GCN-NEXT: v_writelane_b32 v0, s9, 18
+; GCN-NEXT: v_writelane_b32 v6, s0, 17
+; GCN-NEXT: v_writelane_b32 v6, s9, 18
; GCN-NEXT: s_bfe_u32 s9, s4, 0xe0002
-; GCN-NEXT: v_writelane_b32 v0, s9, 19
+; GCN-NEXT: v_writelane_b32 v6, s9, 19
; GCN-NEXT: s_bfe_u32 s9, s4, 0xd0003
-; GCN-NEXT: v_writelane_b32 v0, s9, 20
+; GCN-NEXT: v_writelane_b32 v6, s9, 20
; GCN-NEXT: s_bfe_u32 s9, s4, 0xc0004
-; GCN-NEXT: v_writelane_b32 v0, s9, 21
+; GCN-NEXT: v_writelane_b32 v6, s9, 21
; GCN-NEXT: s_bfe_u32 s9, s4, 0xb0005
-; GCN-NEXT: v_writelane_b32 v0, s9, 22
+; GCN-NEXT: v_writelane_b32 v6, s9, 22
; GCN-NEXT: s_bfe_u32 s9, s4, 0xa0006
-; GCN-NEXT: v_writelane_b32 v0, s9, 23
+; GCN-NEXT: v_writelane_b32 v6, s9, 23
; GCN-NEXT: s_bfe_u32 s9, s4, 0x90007
-; GCN-NEXT: v_writelane_b32 v0, s9, 24
+; GCN-NEXT: v_writelane_b32 v6, s9, 24
; GCN-NEXT: s_bfe_u32 s9, s4, 0x80008
-; GCN-NEXT: v_writelane_b32 v0, s9, 25
+; GCN-NEXT: v_writelane_b32 v6, s9, 25
; GCN-NEXT: s_bfe_u32 s9, s4, 0x70009
-; GCN-NEXT: v_writelane_b32 v0, s9, 26
+; GCN-NEXT: v_writelane_b32 v6, s9, 26
; GCN-NEXT: s_bfe_u32 s9, s4, 0x6000a
-; GCN-NEXT: v_writelane_b32 v0, s9, 27
+; GCN-NEXT: v_writelane_b32 v6, s9, 27
; GCN-NEXT: s_bfe_u32 s9, s4, 0x5000b
-; GCN-NEXT: v_writelane_b32 v0, s9, 28
+; GCN-NEXT: v_writelane_b32 v6, s9, 28
; GCN-NEXT: s_bfe_u32 s9, s4, 0x4000c
-; GCN-NEXT: v_writelane_b32 v0, s9, 29
+; GCN-NEXT: v_writelane_b32 v6, s9, 29
; GCN-NEXT: s_bfe_u32 s9, s4, 0x3000d
-; GCN-NEXT: v_writelane_b32 v0, s9, 30
+; GCN-NEXT: v_writelane_b32 v6, s9, 30
; GCN-NEXT: s_bfe_u32 s9, s4, 0x2000e
-; GCN-NEXT: v_writelane_b32 v0, s9, 31
+; GCN-NEXT: v_writelane_b32 v6, s9, 31
; GCN-NEXT: s_bfe_u32 s9, s4, 0x1000f
-; GCN-NEXT: v_writelane_b32 v0, s9, 32
+; GCN-NEXT: v_writelane_b32 v6, s9, 32
; GCN-NEXT: s_bfe_u32 s9, s5, 0xf0001
; GCN-NEXT: s_lshr_b32 s43, s5, 17
; GCN-NEXT: s_lshr_b32 s45, s5, 18
@@ -1125,7 +1125,7 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
; GCN-NEXT: s_lshr_b32 s2, s7, 29
; GCN-NEXT: s_lshr_b32 s1, s7, 30
; GCN-NEXT: s_lshr_b32 s0, s7, 31
-; GCN-NEXT: v_writelane_b32 v0, s9, 33
+; GCN-NEXT: v_writelane_b32 v6, s9, 33
; GCN-NEXT: s_bfe_u32 s40, s5, 0xe0002
; GCN-NEXT: s_bfe_u32 s41, s5, 0xd0003
; GCN-NEXT: s_bfe_u32 s44, s5, 0xc0004
@@ -1630,7 +1630,7 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
; GCN-NEXT: s_cselect_b32 s5, s5, 1
; GCN-NEXT: s_and_b32 s5, s5, 1
; GCN-NEXT: s_cmp_lg_u32 s8, 33
-; GCN-NEXT: v_readlane_b32 s9, v0, 33
+; GCN-NEXT: v_readlane_b32 s9, v6, 33
; GCN-NEXT: s_cselect_b32 s9, s9, 1
; GCN-NEXT: s_lshl_b32 s9, s9, 1
; GCN-NEXT: s_or_b32 s5, s5, s9
@@ -1643,21 +1643,21 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
; GCN-NEXT: s_and_b32 s1, s1, 0xffff
; GCN-NEXT: s_or_b32 s0, s1, s0
; GCN-NEXT: s_cmp_lg_u32 s8, 31
-; GCN-NEXT: v_readlane_b32 s1, v0, 17
+; GCN-NEXT: v_readlane_b32 s1, v6, 17
; GCN-NEXT: s_cselect_b32 s1, s1, 1
; GCN-NEXT: s_lshl_b32 s1, s1, 3
; GCN-NEXT: s_cmp_lg_u32 s8, 30
-; GCN-NEXT: v_readlane_b32 s2, v0, 16
+; GCN-NEXT: v_readlane_b32 s2, v6, 16
; GCN-NEXT: s_cselect_b32 s2, s2, 1
; GCN-NEXT: s_and_b32 s2, s2, 1
; GCN-NEXT: s_lshl_b32 s2, s2, 2
; GCN-NEXT: s_or_b32 s1, s1, s2
; GCN-NEXT: s_cmp_lg_u32 s8, 29
-; GCN-NEXT: v_readlane_b32 s2, v0, 15
+; GCN-NEXT: v_readlane_b32 s2, v6, 15
; GCN-NEXT: s_cselect_b32 s2, s2, 1
; GCN-NEXT: s_lshl_b32 s2, s2, 1
; GCN-NEXT: s_cmp_lg_u32 s8, 28
-; GCN-NEXT: v_readlane_b32 s3, v0, 14
+; GCN-NEXT: v_readlane_b32 s3, v6, 14
; GCN-NEXT: s_cselect_b32 s3, s3, 1
; GCN-NEXT: s_and_b32 s3, s3, 1
; GCN-NEXT: s_or_b32 s2, s3, s2
@@ -1665,21 +1665,21 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
; GCN-NEXT: s_or_b32 s1, s2, s1
; GCN-NEXT: s_lshl_b32 s1, s1, 12
; GCN-NEXT: s_cmp_lg_u32 s8, 27
-; GCN-NEXT: v_readlane_b32 s2, v0, 13
+; GCN-NEXT: v_readlane_b32 s2, v6, 13
; GCN-NEXT: s_cselect_b32 s2, s2, 1
; GCN-NEXT: s_lshl_b32 s2, s2, 3
; GCN-NEXT: s_cmp_lg_u32 s8, 26
-; GCN-NEXT: v_readlane_b32 s3, v0, 12
+; GCN-NEXT: v_readlane_b32 s3, v6, 12
; GCN-NEXT: s_cselect_b32 s3, s3, 1
; GCN-NEXT: s_and_b32 s3, s3, 1
; GCN-NEXT: s_lshl_b32 s3, s3, 2
; GCN-NEXT: s_or_b32 s2, s2, s3
; GCN-NEXT: s_cmp_lg_u32 s8, 25
-; GCN-NEXT: v_readlane_b32 s3, v0, 11
+; GCN-NEXT: v_readlane_b32 s3, v6, 11
; GCN-NEXT: s_cselect_b32 s3, s3, 1
; GCN-NEXT: s_lshl_b32 s3, s3, 1
; GCN-NEXT: s_cmp_lg_u32 s8, 24
-; GCN-NEXT: v_readlane_b32 s5, v0, 10
+; GCN-NEXT: v_readlane_b32 s5, v6, 10
; GCN-NEXT: s_cselect_b32 s5, s5, 1
; GCN-NEXT: s_and_b32 s5, s5, 1
; GCN-NEXT: s_or_b32 s3, s5, s3
@@ -1689,21 +1689,21 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
; GCN-NEXT: s_lshl_b32 s2, s2, 8
; GCN-NEXT: s_or_b32 s1, s1, s2
; GCN-NEXT: s_cmp_lg_u32 s8, 23
-; GCN-NEXT: v_readlane_b32 s2, v0, 9
+; GCN-NEXT: v_readlane_b32 s2, v6, 9
; GCN-NEXT: s_cselect_b32 s2, s2, 1
; GCN-NEXT: s_lshl_b32 s2, s2, 3
; GCN-NEXT: s_cmp_lg_u32 s8, 22
-; GCN-NEXT: v_readlane_b32 s3, v0, 8
+; GCN-NEXT: v_readlane_b32 s3, v6, 8
; GCN-NEXT: s_cselect_b32 s3, s3, 1
; GCN-NEXT: s_and_b32 s3, s3, 1
; GCN-NEXT: s_lshl_b32 s3, s3, 2
; GCN-NEXT: s_or_b32 s2, s2, s3
; GCN-NEXT: s_cmp_lg_u32 s8, 21
-; GCN-NEXT: v_readlane_b32 s3, v0, 7
+; GCN-NEXT: v_readlane_b32 s3, v6, 7
; GCN-NEXT: s_cselect_b32 s3, s3, 1
; GCN-NEXT: s_lshl_b32 s3, s3, 1
; GCN-NEXT: s_cmp_lg_u32 s8, 20
-; GCN-NEXT: v_readlane_b32 s5, v0, 6
+; GCN-NEXT: v_readlane_b32 s5, v6, 6
; GCN-NEXT: s_cselect_b32 s5, s5, 1
; GCN-NEXT: s_and_b32 s5, s5, 1
; GCN-NEXT: s_or_b32 s3, s5, s3
@@ -1711,21 +1711,21 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
; GCN-NEXT: s_or_b32 s2, s3, s2
; GCN-NEXT: s_lshl_b32 s2, s2, 4
; GCN-NEXT: s_cmp_lg_u32 s8, 19
-; GCN-NEXT: v_readlane_b32 s3, v0, 5
+; GCN-NEXT: v_readlane_b32 s3, v6, 5
; GCN-NEXT: s_cselect_b32 s3, s3, 1
; GCN-NEXT: s_lshl_b32 s3, s3, 3
; GCN-NEXT: s_cmp_lg_u32 s8, 18
-; GCN-NEXT: v_readlane_b32 s5, v0, 4
+; GCN-NEXT: v_readlane_b32 s5, v6, 4
; GCN-NEXT: s_cselect_b32 s5, s5, 1
; GCN-NEXT: s_and_b32 s5, s5, 1
; GCN-NEXT: s_lshl_b32 s5, s5, 2
; GCN-NEXT: s_or_b32 s3, s3, s5
; GCN-NEXT: s_cmp_lg_u32 s8, 17
-; GCN-NEXT: v_readlane_b32 s5, v0, 3
+; GCN-NEXT: v_readlane_b32 s5, v6, 3
; GCN-NEXT: s_cselect_b32 s5, s5, 1
; GCN-NEXT: s_lshl_b32 s5, s5, 1
; GCN-NEXT: s_cmp_lg_u32 s8, 16
-; GCN-NEXT: v_readlane_b32 s9, v0, 2
+; GCN-NEXT: v_readlane_b32 s9, v6, 2
; GCN-NEXT: s_cselect_b32 s9, s9, 1
; GCN-NEXT: s_and_b32 s9, s9, 1
; GCN-NEXT: s_or_b32 s5, s9, s5
@@ -1737,21 +1737,21 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
; GCN-NEXT: s_or_b32 s1, s2, s1
; GCN-NEXT: s_lshl_b32 s1, s1, 16
; GCN-NEXT: s_cmp_lg_u32 s8, 15
-; GCN-NEXT: v_readlane_b32 s2, v0, 32
+; GCN-NEXT: v_readlane_b32 s2, v6, 32
; GCN-NEXT: s_cselect_b32 s2, s2, 1
; GCN-NEXT: s_lshl_b32 s2, s2, 3
; GCN-NEXT: s_cmp_lg_u32 s8, 14
-; GCN-NEXT: v_readlane_b32 s3, v0, 31
+; GCN-NEXT: v_readlane_b32 s3, v6, 31
; GCN-NEXT: s_cselect_b32 s3, s3, 1
; GCN-NEXT: s_and_b32 s3, s3, 1
; GCN-NEXT: s_lshl_b32 s3, s3, 2
; GCN-NEXT: s_or_b32 s2, s2, s3
; GCN-NEXT: s_cmp_lg_u32 s8, 13
-; GCN-NEXT: v_readlane_b32 s3, v0, 30
+; GCN-NEXT: v_readlane_b32 s3, v6, 30
; GCN-NEXT: s_cselect_b32 s3, s3, 1
; GCN-NEXT: s_lshl_b32 s3, s3, 1
; GCN-NEXT: s_cmp_lg_u32 s8, 12
-; GCN-NEXT: v_readlane_b32 s5, v0, 29
+; GCN-NEXT: v_readlane_b32 s5, v6, 29
; GCN-NEXT: s_cselect_b32 s5, s5, 1
; GCN-NEXT: s_and_b32 s5, s5, 1
; GCN-NEXT: s_or_b32 s3, s5, s3
@@ -1759,21 +1759,21 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
; GCN-NEXT: s_or_b32 s2, s3, s2
; GCN-NEXT: s_lshl_b32 s2, s2, 12
; GCN-NEXT: s_cmp_lg_u32 s8, 11
-; GCN-NEXT: v_readlane_b32 s3, v0, 28
+; GCN-NEXT: v_readlane_b32 s3, v6, 28
; GCN-NEXT: s_cselect_b32 s3, s3, 1
; GCN-NEXT: s_lshl_b32 s3, s3, 3
; GCN-NEXT: s_cmp_lg_u32 s8, 10
-; GCN-NEXT: v_readlane_b32 s5, v0, 27
+; GCN-NEXT: v_readlane_b32 s5, v6, 27
; GCN-NEXT: s_cselect_b32 s5, s5, 1
; GCN-NEXT: s_and_b32 s5, s5, 1
; GCN-NEXT: s_lshl_b32 s5, s5, 2
; GCN-NEXT: s_or_b32 s3, s3, s5
; GCN-NEXT: s_cmp_lg_u32 s8, 9
-; GCN-NEXT: v_readlane_b32 s5, v0, 26
+; GCN-NEXT: v_readlane_b32 s5, v6, 26
; GCN-NEXT: s_cselect_b32 s5, s5, 1
; GCN-NEXT: s_lshl_b32 s5, s5, 1
; GCN-NEXT: s_cmp_lg_u32 s8, 8
-; GCN-NEXT: v_readlane_b32 s9, v0, 25
+; GCN-NEXT: v_readlane_b32 s9, v6, 25
; GCN-NEXT: s_cselect_b32 s9, s9, 1
; GCN-NEXT: s_and_b32 s9, s9, 1
; GCN-NEXT: s_or_b32 s5, s9, s5
@@ -1783,21 +1783,21 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
; GCN-NEXT: s_lshl_b32 s3, s3, 8
; GCN-NEXT: s_or_b32 s2, s2, s3
; GCN-NEXT: s_cmp_lg_u32 s8, 7
-; GCN-NEXT: v_readlane_b32 s3, v0, 24
+; GCN-NEXT: v_readlane_b32 s3, v6, 24
; GCN-NEXT: s_cselect_b32 s3, s3, 1
; GCN-NEXT: s_lshl_b32 s3, s3, 3
; GCN-NEXT: s_cmp_lg_u32 s8, 6
-; GCN-NEXT: v_readlane_b32 s5, v0, 23
+; GCN-NEXT: v_readlane_b32 s5, v6, 23
; GCN-NEXT: s_cselect_b32 s5, s5, 1
; GCN-NEXT: s_and_b32 s5, s5, 1
; GCN-NEXT: s_lshl_b32 s5, s5, 2
; GCN-NEXT: s_or_b32 s3, s3, s5
; GCN-NEXT: s_cmp_lg_u32 s8, 5
-; GCN-NEXT: v_readlane_b32 s5, v0, 22
+; GCN-NEXT: v_readlane_b32 s5, v6, 22
; GCN-NEXT: s_cselect_b32 s5, s5, 1
; GCN-NEXT: s_lshl_b32 s5, s5, 1
; GCN-NEXT: s_cmp_lg_u32 s8, 4
-; GCN-NEXT: v_readlane_b32 s9, v0, 21
+; GCN-NEXT: v_readlane_b32 s9, v6, 21
; GCN-NEXT: s_cselect_b32 s9, s9, 1
; GCN-NEXT: s_and_b32 s9, s9, 1
; GCN-NEXT: s_or_b32 s5, s9, s5
@@ -1805,11 +1805,11 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
; GCN-NEXT: s_or_b32 s3, s5, s3
; GCN-NEXT: s_lshl_b32 s3, s3, 4
; GCN-NEXT: s_cmp_lg_u32 s8, 3
-; GCN-NEXT: v_readlane_b32 s5, v0, 20
+; GCN-NEXT: v_readlane_b32 s5, v6, 20
; GCN-NEXT: s_cselect_b32 s5, s5, 1
; GCN-NEXT: s_lshl_b32 s5, s5, 3
; GCN-NEXT: s_cmp_lg_u32 s8, 2
-; GCN-NEXT: v_readlane_b32 s9, v0, 19
+; GCN-NEXT: v_readlane_b32 s9, v6, 19
; GCN-NEXT: s_cselect_b32 s9, s9, 1
; GCN-NEXT: s_and_b32 s9, s9, 1
; GCN-NEXT: s_lshl_b32 s9, s9, 2
@@ -1818,7 +1818,7 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
; GCN-NEXT: s_cselect_b32 s4, s4, 1
; GCN-NEXT: s_and_b32 s4, s4, 1
; GCN-NEXT: s_cmp_lg_u32 s8, 1
-; GCN-NEXT: v_readlane_b32 s8, v0, 18
+; GCN-NEXT: v_readlane_b32 s8, v6, 18
; GCN-NEXT: s_cselect_b32 s8, s8, 1
; GCN-NEXT: s_lshl_b32 s8, s8, 1
; GCN-NEXT: s_or_b32 s4, s4, s8
@@ -1830,16 +1830,15 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec,
; GCN-NEXT: s_or_b32 s2, s3, s2
; GCN-NEXT: s_and_b32 s2, s2, 0xffff
; GCN-NEXT: s_or_b32 s1, s2, s1
-; GCN-NEXT: v_mov_b32_e32 v1, s1
-; GCN-NEXT: v_mov_b32_e32 v2, s0
-; GCN-NEXT: v_readlane_b32 s0, v0, 0
-; GCN-NEXT: v_readlane_b32 s1, v0, 1
-; GCN-NEXT: v_mov_b32_e32 v6, s1
-; GCN-NEXT: v_mov_b32_e32 v3, s6
-; GCN-NEXT: v_mov_b32_e32 v4, s7
-; GCN-NEXT: v_mov_b32_e32 v5, s0
-; GCN-NEXT: flat_store_dwordx4 v[5:6], v[1:4]
-; GCN-NEXT: ; kill: killed $vgpr0
+; GCN-NEXT: v_mov_b32_e32 v0, s1
+; GCN-NEXT: v_mov_b32_e32 v1, s0
+; GCN-NEXT: v_readlane_b32 s0, v6, 0
+; GCN-NEXT: v_readlane_b32 s1, v6, 1
+; GCN-NEXT: v_mov_b32_e32 v5, s1
+; GCN-NEXT: v_mov_b32_e32 v2, s6
+; GCN-NEXT: v_mov_b32_e32 v3, s7
+; GCN-NEXT: v_mov_b32_e32 v4, s0
+; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GCN-NEXT: s_endpgm
entry:
%v = insertelement <128 x i1> %vec, i1 1, i32 %sel
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
index ec446f1f3bf27d..7b195f8e86220c 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
@@ -13,22 +13,14 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s11, 0
; CHECK-NEXT: s_add_u32 s0, s0, s15
; CHECK-NEXT: s_addc_u32 s1, s1, 0
-; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9]
; CHECK-NEXT: v_mov_b32_e32 v3, v2
; CHECK-NEXT: v_mov_b32_e32 v2, v1
; CHECK-NEXT: v_mov_b32_e32 v1, v0
-; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
-; CHECK-NEXT: s_add_i32 s8, s33, 0x100200
-; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s8 ; 4-byte Folded Reload
-; CHECK-NEXT: s_mov_b64 exec, s[34:35]
; CHECK-NEXT: s_load_dword s8, s[6:7], 0x0
-; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; CHECK-NEXT: v_writelane_b32 v0, s8, 0
-; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
-; CHECK-NEXT: s_add_i32 s8, s33, 0x100200
-; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s8 ; 4-byte Folded Spill
-; CHECK-NEXT: s_mov_b64 exec, s[34:35]
+; CHECK-NEXT: ; implicit-def: $vgpr40 : SGPR spill to VGPR lane
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: v_writelane_b32 v40, s8, 0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def vgpr10
; CHECK-NEXT: ;;#ASMEND
@@ -62,14 +54,9 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
-; CHECK-NEXT: s_add_i32 s4, s33, 0x100200
-; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
-; CHECK-NEXT: s_mov_b64 exec, s[34:35]
; CHECK-NEXT: s_add_i32 s4, s33, 0x100100
; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s4 ; 4-byte Folded Reload
-; CHECK-NEXT: s_waitcnt vmcnt(1)
-; CHECK-NEXT: v_readlane_b32 s4, v0, 0
+; CHECK-NEXT: v_readlane_b32 s4, v40, 0
; CHECK-NEXT: s_mov_b32 s5, 0
; CHECK-NEXT: s_cmp_eq_u32 s4, s5
; CHECK-NEXT: v_mov_b32_e32 v0, 0x4000
@@ -77,24 +64,14 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], s33 offen ; 4-byte Folded Spill
; CHECK-NEXT: s_cbranch_scc1 .LBB0_2
; CHECK-NEXT: ; %bb.1: ; %store
-; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
-; CHECK-NEXT: s_add_i32 s4, s33, 0x100200
-; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
-; CHECK-NEXT: s_mov_b64 exec, s[34:35]
; CHECK-NEXT: s_add_i32 s4, s33, 0x100000
-; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s4 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s4 ; 4-byte Folded Reload
; CHECK-NEXT: ; implicit-def: $sgpr4
-; CHECK-NEXT: v_mov_b32_e32 v1, s4
+; CHECK-NEXT: v_mov_b32_e32 v0, s4
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: ds_write_b32 v1, v2
-; CHECK-NEXT: ; kill: killed $vgpr0
+; CHECK-NEXT: ds_write_b32 v0, v1
; CHECK-NEXT: s_endpgm
; CHECK-NEXT: .LBB0_2: ; %end
-; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
-; CHECK-NEXT: s_add_i32 s4, s33, 0x100200
-; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
-; CHECK-NEXT: s_mov_b64 exec, s[34:35]
-; CHECK-NEXT: ; kill: killed $vgpr0
; CHECK-NEXT: s_endpgm
%arr = alloca < 1339 x i32>, align 8192, addrspace(5)
%cmp = icmp ne i32 %val, 0
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index c9e24b721c41e1..b192fdec157396 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -123,6 +123,8 @@
; GCN-O0-NEXT: SI Pre-allocate WWM Registers
; GCN-O0-NEXT: Fast Register Allocator
; GCN-O0-NEXT: SI Lower WWM Copies
+; GCN-O0-NEXT: AMDGPU Reserve WWM Registers
+; GCN-O0-NEXT: Fast Register Allocator
; GCN-O0-NEXT: SI Fix VGPR copies
; GCN-O0-NEXT: Remove Redundant DEBUG_VALUE analysis
; GCN-O0-NEXT: Fixup Statepoint Caller Saved
@@ -370,6 +372,11 @@
; GCN-O1-NEXT: SI Pre-allocate WWM Registers
; GCN-O1-NEXT: Greedy Register Allocator
; GCN-O1-NEXT: SI Lower WWM Copies
+; GCN-O1-NEXT: Virtual Register Rewriter
+; GCN-O1-NEXT: AMDGPU Reserve WWM Registers
+; GCN-O1-NEXT: Virtual Register Map
+; GCN-O1-NEXT: Live Register Matrix
+; GCN-O1-NEXT: Greedy Register Allocator
; GCN-O1-NEXT: GCN NSA Reassign
; GCN-O1-NEXT: Virtual Register Rewriter
; GCN-O1-NEXT: AMDGPU Mark Last Scratch Load
@@ -673,6 +680,11 @@
; GCN-O1-OPTS-NEXT: SI Pre-allocate WWM Registers
; GCN-O1-OPTS-NEXT: Greedy Register Allocator
; GCN-O1-OPTS-NEXT: SI Lower WWM Copies
+; GCN-O1-OPTS-NEXT: Virtual Register Rewriter
+; GCN-O1-OPTS-NEXT: AMDGPU Reserve WWM Registers
+; GCN-O1-OPTS-NEXT: Virtual Register Map
+; GCN-O1-OPTS-NEXT: Live Register Matrix
+; GCN-O1-OPTS-NEXT: Greedy Register Allocator
; GCN-O1-OPTS-NEXT: GCN NSA Reassign
; GCN-O1-OPTS-NEXT: Virtual Register Rewriter
; GCN-O1-OPTS-NEXT: AMDGPU Mark Last Scratch Load
@@ -982,6 +994,11 @@
; GCN-O2-NEXT: SI Pre-allocate WWM Registers
; GCN-O2-NEXT: Greedy Register Allocator
; GCN-O2-NEXT: SI Lower WWM Copies
+; GCN-O2-NEXT: Virtual Register Rewriter
+; GCN-O2-NEXT: AMDGPU Reserve WWM Registers
+; GCN-O2-NEXT: Virtual Register Map
+; GCN-O2-NEXT: Live Register Matrix
+; GCN-O2-NEXT: Greedy Register Allocator
; GCN-O2-NEXT: GCN NSA Reassign
; GCN-O2-NEXT: Virtual Register Rewriter
; GCN-O2-NEXT: AMDGPU Mark Last Scratch Load
@@ -1303,6 +1320,11 @@
; GCN-O3-NEXT: SI Pre-allocate WWM Registers
; GCN-O3-NEXT: Greedy Register Allocator
; GCN-O3-NEXT: SI Lower WWM Copies
+; GCN-O3-NEXT: Virtual Register Rewriter
+; GCN-O3-NEXT: AMDGPU Reserve WWM Registers
+; GCN-O3-NEXT: Virtual Register Map
+; GCN-O3-NEXT: Live Register Matrix
+; GCN-O3-NEXT: Greedy Register Allocator
; GCN-O3-NEXT: GCN NSA Reassign
; GCN-O3-NEXT: Virtual Register Rewriter
; GCN-O3-NEXT: AMDGPU Mark Last Scratch Load
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
index db88ddf1807f38..32abe50ff04d81 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
@@ -8759,11 +8759,11 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
; GFX8-NEXT: s_add_u32 s88, s88, s9
; GFX8-NEXT: s_addc_u32 s89, s89, 0
-; GFX8-NEXT: ; implicit-def: $vgpr44 : SGPR spill to VGPR lane
+; GFX8-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_lshr_b32 s0, s3, 8
-; GFX8-NEXT: v_writelane_b32 v44, s0, 0
-; GFX8-NEXT: v_writelane_b32 v44, s1, 1
+; GFX8-NEXT: v_writelane_b32 v62, s0, 0
+; GFX8-NEXT: v_writelane_b32 v62, s1, 1
; GFX8-NEXT: s_lshr_b32 s0, s2, 1
; GFX8-NEXT: s_lshr_b32 s36, s3, 21
; GFX8-NEXT: s_lshr_b32 s30, s3, 19
@@ -8789,7 +8789,7 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: s_lshr_b32 s54, s3, 10
; GFX8-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000
; GFX8-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000
-; GFX8-NEXT: v_writelane_b32 v44, s0, 2
+; GFX8-NEXT: v_writelane_b32 v62, s0, 2
; GFX8-NEXT: s_lshr_b32 s52, s3, 11
; GFX8-NEXT: s_bfe_i64 s[74:75], s[74:75], 0x10000
; GFX8-NEXT: s_bfe_i64 s[72:73], s[72:73], 0x10000
@@ -8814,7 +8814,7 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: s_bfe_i64 s[30:31], s[44:45], 0x10000
; GFX8-NEXT: s_bfe_i64 s[36:37], s[38:39], 0x10000
; GFX8-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000
-; GFX8-NEXT: v_writelane_b32 v44, s1, 3
+; GFX8-NEXT: v_writelane_b32 v62, s1, 3
; GFX8-NEXT: s_lshr_b32 s6, s3, 9
; GFX8-NEXT: s_lshr_b32 s8, s3, 6
; GFX8-NEXT: s_lshr_b32 s10, s3, 7
@@ -8830,7 +8830,7 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: v_mov_b32_e32 v4, s74
; GFX8-NEXT: v_mov_b32_e32 v8, s72
; GFX8-NEXT: v_mov_b32_e32 v0, s70
-; GFX8-NEXT: v_mov_b32_e32 v55, s68
+; GFX8-NEXT: v_mov_b32_e32 v54, s68
; GFX8-NEXT: v_mov_b32_e32 v20, s66
; GFX8-NEXT: v_mov_b32_e32 v16, s64
; GFX8-NEXT: v_mov_b32_e32 v24, s62
@@ -8851,7 +8851,7 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: v_mov_b32_e32 v2, s46
; GFX8-NEXT: s_lshr_b32 s70, s2, 21
; GFX8-NEXT: s_lshr_b32 s68, s2, 18
-; GFX8-NEXT: v_mov_b32_e32 v57, s42
+; GFX8-NEXT: v_mov_b32_e32 v56, s42
; GFX8-NEXT: s_lshr_b32 s66, s2, 19
; GFX8-NEXT: s_lshr_b32 s64, s2, 16
; GFX8-NEXT: v_mov_b32_e32 v22, s40
@@ -8876,16 +8876,16 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: s_lshr_b32 s36, s2, 2
; GFX8-NEXT: s_lshr_b32 s30, s2, 3
; GFX8-NEXT: s_bfe_i64 s[18:19], s[2:3], 0x10000
-; GFX8-NEXT: v_readlane_b32 s2, v44, 0
-; GFX8-NEXT: v_readlane_b32 s3, v44, 1
+; GFX8-NEXT: v_readlane_b32 s2, v62, 0
+; GFX8-NEXT: v_readlane_b32 s3, v62, 1
; GFX8-NEXT: v_mov_b32_e32 v5, s75
; GFX8-NEXT: v_mov_b32_e32 v7, s51
; GFX8-NEXT: v_mov_b32_e32 v9, s73
; GFX8-NEXT: v_mov_b32_e32 v11, s49
; GFX8-NEXT: v_mov_b32_e32 v1, s71
; GFX8-NEXT: v_mov_b32_e32 v3, s47
-; GFX8-NEXT: v_mov_b32_e32 v56, s69
-; GFX8-NEXT: v_mov_b32_e32 v58, s43
+; GFX8-NEXT: v_mov_b32_e32 v55, s69
+; GFX8-NEXT: v_mov_b32_e32 v57, s43
; GFX8-NEXT: v_mov_b32_e32 v21, s67
; GFX8-NEXT: v_mov_b32_e32 v23, s41
; GFX8-NEXT: v_mov_b32_e32 v17, s65
@@ -8942,24 +8942,24 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: v_mov_b32_e32 v42, s2
; GFX8-NEXT: s_add_u32 s2, s4, 0x1e0
; GFX8-NEXT: s_addc_u32 s3, s5, 0
-; GFX8-NEXT: v_mov_b32_e32 v46, s3
-; GFX8-NEXT: v_mov_b32_e32 v45, s2
+; GFX8-NEXT: v_mov_b32_e32 v45, s3
+; GFX8-NEXT: v_mov_b32_e32 v44, s2
; GFX8-NEXT: s_add_u32 s2, s4, 0x1d0
; GFX8-NEXT: s_addc_u32 s3, s5, 0
-; GFX8-NEXT: v_mov_b32_e32 v48, s3
-; GFX8-NEXT: v_mov_b32_e32 v47, s2
+; GFX8-NEXT: v_mov_b32_e32 v47, s3
+; GFX8-NEXT: v_mov_b32_e32 v46, s2
; GFX8-NEXT: s_add_u32 s2, s4, 0x1c0
; GFX8-NEXT: s_addc_u32 s3, s5, 0
-; GFX8-NEXT: v_mov_b32_e32 v50, s3
-; GFX8-NEXT: v_mov_b32_e32 v49, s2
+; GFX8-NEXT: v_mov_b32_e32 v49, s3
+; GFX8-NEXT: v_mov_b32_e32 v48, s2
; GFX8-NEXT: s_add_u32 s2, s4, 0x1b0
; GFX8-NEXT: s_addc_u32 s3, s5, 0
-; GFX8-NEXT: v_mov_b32_e32 v52, s3
-; GFX8-NEXT: v_mov_b32_e32 v51, s2
+; GFX8-NEXT: v_mov_b32_e32 v51, s3
+; GFX8-NEXT: v_mov_b32_e32 v50, s2
; GFX8-NEXT: s_add_u32 s2, s4, 0x1a0
; GFX8-NEXT: s_addc_u32 s3, s5, 0
-; GFX8-NEXT: v_mov_b32_e32 v54, s3
-; GFX8-NEXT: v_mov_b32_e32 v53, s2
+; GFX8-NEXT: v_mov_b32_e32 v53, s3
+; GFX8-NEXT: v_mov_b32_e32 v52, s2
; GFX8-NEXT: s_add_u32 s2, s4, 0x190
; GFX8-NEXT: s_addc_u32 s3, s5, 0
; GFX8-NEXT: v_mov_b32_e32 v15, s3
@@ -8971,26 +8971,26 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: buffer_store_dword v12, off, s[88:91], 0 ; 4-byte Folded Spill
; GFX8-NEXT: buffer_store_dword v13, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill
; GFX8-NEXT: flat_store_dwordx4 v[42:43], v[4:7]
-; GFX8-NEXT: flat_store_dwordx4 v[45:46], v[8:11]
-; GFX8-NEXT: flat_store_dwordx4 v[47:48], v[0:3]
-; GFX8-NEXT: flat_store_dwordx4 v[49:50], v[55:58]
-; GFX8-NEXT: flat_store_dwordx4 v[51:52], v[20:23]
-; GFX8-NEXT: flat_store_dwordx4 v[53:54], v[16:19]
+; GFX8-NEXT: flat_store_dwordx4 v[44:45], v[8:11]
+; GFX8-NEXT: flat_store_dwordx4 v[46:47], v[0:3]
+; GFX8-NEXT: flat_store_dwordx4 v[48:49], v[54:57]
+; GFX8-NEXT: flat_store_dwordx4 v[50:51], v[20:23]
+; GFX8-NEXT: flat_store_dwordx4 v[52:53], v[16:19]
; GFX8-NEXT: flat_store_dwordx4 v[14:15], v[24:27]
; GFX8-NEXT: buffer_load_dword v18, off, s[88:91], 0 ; 4-byte Folded Reload
; GFX8-NEXT: buffer_load_dword v19, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload
; GFX8-NEXT: s_add_u32 s2, s4, 0x170
; GFX8-NEXT: s_addc_u32 s3, s5, 0
-; GFX8-NEXT: v_mov_b32_e32 v60, s3
-; GFX8-NEXT: v_mov_b32_e32 v59, s2
+; GFX8-NEXT: v_mov_b32_e32 v59, s3
+; GFX8-NEXT: v_mov_b32_e32 v58, s2
; GFX8-NEXT: s_add_u32 s2, s4, 0x160
; GFX8-NEXT: s_addc_u32 s3, s5, 0
-; GFX8-NEXT: v_mov_b32_e32 v62, s3
-; GFX8-NEXT: v_mov_b32_e32 v61, s2
+; GFX8-NEXT: v_mov_b32_e32 v61, s3
+; GFX8-NEXT: v_mov_b32_e32 v60, s2
; GFX8-NEXT: s_add_u32 s2, s4, 0x150
; GFX8-NEXT: s_addc_u32 s3, s5, 0
-; GFX8-NEXT: v_mov_b32_e32 v46, s3
-; GFX8-NEXT: v_mov_b32_e32 v45, s2
+; GFX8-NEXT: v_mov_b32_e32 v45, s3
+; GFX8-NEXT: v_mov_b32_e32 v44, s2
; GFX8-NEXT: s_add_u32 s2, s4, 0x140
; GFX8-NEXT: s_addc_u32 s3, s5, 0
; GFX8-NEXT: v_mov_b32_e32 v6, s0
@@ -9021,9 +9021,9 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: v_mov_b32_e32 v11, s15
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: flat_store_dwordx4 v[18:19], v[28:31]
-; GFX8-NEXT: flat_store_dwordx4 v[59:60], v[32:35]
-; GFX8-NEXT: flat_store_dwordx4 v[61:62], v[36:39]
-; GFX8-NEXT: flat_store_dwordx4 v[45:46], v[40:43]
+; GFX8-NEXT: flat_store_dwordx4 v[58:59], v[32:35]
+; GFX8-NEXT: flat_store_dwordx4 v[60:61], v[36:39]
+; GFX8-NEXT: flat_store_dwordx4 v[44:45], v[40:43]
; GFX8-NEXT: flat_store_dwordx4 v[12:13], v[4:7]
; GFX8-NEXT: flat_store_dwordx4 v[16:17], v[0:3]
; GFX8-NEXT: flat_store_dwordx4 v[14:15], v[8:11]
@@ -9177,9 +9177,9 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: v_mov_b32_e32 v2, s30
; GFX8-NEXT: v_mov_b32_e32 v3, s31
; GFX8-NEXT: v_mov_b32_e32 v4, s0
-; GFX8-NEXT: v_readlane_b32 s0, v44, 2
+; GFX8-NEXT: v_readlane_b32 s0, v62, 2
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NEXT: v_readlane_b32 s1, v44, 3
+; GFX8-NEXT: v_readlane_b32 s1, v62, 3
; GFX8-NEXT: v_mov_b32_e32 v4, s4
; GFX8-NEXT: v_mov_b32_e32 v0, s18
; GFX8-NEXT: v_mov_b32_e32 v1, s19
@@ -9187,7 +9187,6 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX8-NEXT: v_mov_b32_e32 v3, s1
; GFX8-NEXT: v_mov_b32_e32 v5, s5
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NEXT: ; kill: killed $vgpr44
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_sextload_v64i1_to_v64i64:
diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
index 9829b7e787d479..e9cd94620a6b9a 100644
--- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll
@@ -1520,9 +1520,9 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX7-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX7-NEXT: s_add_i32 s6, s32, 0x202000
; GFX7-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX7-NEXT: s_add_i32 s6, s32, 0x201200
+; GFX7-NEXT: s_add_i32 s6, s32, 0x202100
; GFX7-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
; GFX7-NEXT: s_mov_b64 exec, s[4:5]
; GFX7-NEXT: v_writelane_b32 v23, s28, 28
@@ -1562,36 +1562,57 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX7-NEXT: ;;#ASMEND
-; GFX7-NEXT: ; implicit-def: $vgpr22
-; GFX7-NEXT: v_writelane_b32 v23, s59, 27
+; GFX7-NEXT: buffer_store_dword v16, off, s[0:3], s32
+; GFX7-NEXT: v_mov_b32_e32 v16, 0x8040
+; GFX7-NEXT: buffer_store_dword v0, v16, s[0:3], s32 offen ; 4-byte Folded Spill
+; GFX7-NEXT: buffer_store_dword v1, v16, s[0:3], s32 offen offset:4 ; 4-byte Folded Spill
+; GFX7-NEXT: buffer_store_dword v2, v16, s[0:3], s32 offen offset:8 ; 4-byte Folded Spill
+; GFX7-NEXT: buffer_store_dword v3, v16, s[0:3], s32 offen offset:12 ; 4-byte Folded Spill
+; GFX7-NEXT: buffer_store_dword v4, v16, s[0:3], s32 offen offset:16 ; 4-byte Folded Spill
+; GFX7-NEXT: buffer_store_dword v5, v16, s[0:3], s32 offen offset:20 ; 4-byte Folded Spill
+; GFX7-NEXT: buffer_store_dword v6, v16, s[0:3], s32 offen offset:24 ; 4-byte Folded Spill
+; GFX7-NEXT: buffer_store_dword v7, v16, s[0:3], s32 offen offset:28 ; 4-byte Folded Spill
+; GFX7-NEXT: buffer_store_dword v8, v16, s[0:3], s32 offen offset:32 ; 4-byte Folded Spill
+; GFX7-NEXT: buffer_store_dword v9, v16, s[0:3], s32 offen offset:36 ; 4-byte Folded Spill
+; GFX7-NEXT: buffer_store_dword v10, v16, s[0:3], s32 offen offset:40 ; 4-byte Folded Spill
+; GFX7-NEXT: buffer_store_dword v11, v16, s[0:3], s32 offen offset:44 ; 4-byte Folded Spill
+; GFX7-NEXT: buffer_store_dword v12, v16, s[0:3], s32 offen offset:48 ; 4-byte Folded Spill
+; GFX7-NEXT: buffer_store_dword v13, v16, s[0:3], s32 offen offset:52 ; 4-byte Folded Spill
+; GFX7-NEXT: buffer_store_dword v14, v16, s[0:3], s32 offen offset:56 ; 4-byte Folded Spill
+; GFX7-NEXT: buffer_store_dword v15, v16, s[0:3], s32 offen offset:60 ; 4-byte Folded Spill
+; GFX7-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX7-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
+; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6
; GFX7-NEXT: v_writelane_b32 v22, vcc_lo, 0
; GFX7-NEXT: v_writelane_b32 v22, vcc_hi, 1
-; GFX7-NEXT: s_or_saveexec_b64 s[28:29], -1
-; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s32
-; GFX7-NEXT: v_mov_b32_e32 v0, 0x8044
-; GFX7-NEXT: buffer_store_dword v22, v0, s[0:3], s32 offen ; 4-byte Folded Spill
-; GFX7-NEXT: s_mov_b64 exec, s[28:29]
-; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s32
-; GFX7-NEXT: v_lshr_b32_e64 v22, s32, 6
; GFX7-NEXT: s_movk_i32 vcc_lo, 0x4040
-; GFX7-NEXT: v_add_i32_e32 v22, vcc, vcc_lo, v22
-; GFX7-NEXT: v_add_i32_e32 v22, vcc, 0x200, v22
-; GFX7-NEXT: v_readfirstlane_b32 s59, v22
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, vcc_lo, v0
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x200, v0
+; GFX7-NEXT: v_writelane_b32 v23, s59, 27
+; GFX7-NEXT: v_readfirstlane_b32 s59, v0
; GFX7-NEXT: s_and_b64 vcc, 0, exec
-; GFX7-NEXT: s_mov_b64 s[28:29], exec
-; GFX7-NEXT: s_mov_b64 exec, -1
-; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s32
-; GFX7-NEXT: v_mov_b32_e32 v0, 0x8044
-; GFX7-NEXT: buffer_load_dword v22, v0, s[0:3], s32 offen ; 4-byte Folded Reload
-; GFX7-NEXT: s_mov_b64 exec, s[28:29]
-; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s32
-; GFX7-NEXT: s_waitcnt vmcnt(1)
; GFX7-NEXT: v_readlane_b32 vcc_lo, v22, 0
; GFX7-NEXT: v_readlane_b32 vcc_hi, v22, 1
-; GFX7-NEXT: s_mov_b64 s[28:29], exec
-; GFX7-NEXT: s_mov_b64 exec, -1
-; GFX7-NEXT: s_mov_b64 exec, s[28:29]
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: buffer_store_dword v16, off, s[0:3], s32
+; GFX7-NEXT: v_mov_b32_e32 v16, 0x8040
+; GFX7-NEXT: buffer_load_dword v0, v16, s[0:3], s32 offen ; 4-byte Folded Reload
+; GFX7-NEXT: buffer_load_dword v1, v16, s[0:3], s32 offen offset:4 ; 4-byte Folded Reload
+; GFX7-NEXT: buffer_load_dword v2, v16, s[0:3], s32 offen offset:8 ; 4-byte Folded Reload
+; GFX7-NEXT: buffer_load_dword v3, v16, s[0:3], s32 offen offset:12 ; 4-byte Folded Reload
+; GFX7-NEXT: buffer_load_dword v4, v16, s[0:3], s32 offen offset:16 ; 4-byte Folded Reload
+; GFX7-NEXT: buffer_load_dword v5, v16, s[0:3], s32 offen offset:20 ; 4-byte Folded Reload
+; GFX7-NEXT: buffer_load_dword v6, v16, s[0:3], s32 offen offset:24 ; 4-byte Folded Reload
+; GFX7-NEXT: buffer_load_dword v7, v16, s[0:3], s32 offen offset:28 ; 4-byte Folded Reload
+; GFX7-NEXT: buffer_load_dword v8, v16, s[0:3], s32 offen offset:32 ; 4-byte Folded Reload
+; GFX7-NEXT: buffer_load_dword v9, v16, s[0:3], s32 offen offset:36 ; 4-byte Folded Reload
+; GFX7-NEXT: buffer_load_dword v10, v16, s[0:3], s32 offen offset:40 ; 4-byte Folded Reload
+; GFX7-NEXT: buffer_load_dword v11, v16, s[0:3], s32 offen offset:44 ; 4-byte Folded Reload
+; GFX7-NEXT: buffer_load_dword v12, v16, s[0:3], s32 offen offset:48 ; 4-byte Folded Reload
+; GFX7-NEXT: buffer_load_dword v13, v16, s[0:3], s32 offen offset:52 ; 4-byte Folded Reload
+; GFX7-NEXT: buffer_load_dword v14, v16, s[0:3], s32 offen offset:56 ; 4-byte Folded Reload
+; GFX7-NEXT: buffer_load_dword v15, v16, s[0:3], s32 offen offset:60 ; 4-byte Folded Reload
+; GFX7-NEXT: buffer_load_dword v16, off, s[0:3], s32
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: ;;#ASMSTART
; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
@@ -1624,13 +1645,12 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX7-NEXT: v_readlane_b32 s33, v23, 2
; GFX7-NEXT: v_readlane_b32 s31, v23, 1
; GFX7-NEXT: v_readlane_b32 s30, v23, 0
-; GFX7-NEXT: ; kill: killed $vgpr22
; GFX7-NEXT: v_readlane_b32 s28, v23, 28
; GFX7-NEXT: v_readlane_b32 s29, v23, 29
; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX7-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX7-NEXT: s_add_i32 s6, s32, 0x202000
; GFX7-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX7-NEXT: s_add_i32 s6, s32, 0x201200
+; GFX7-NEXT: s_add_i32 s6, s32, 0x202100
; GFX7-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX7-NEXT: s_mov_b64 exec, s[4:5]
; GFX7-NEXT: s_waitcnt vmcnt(0)
@@ -1640,9 +1660,9 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX8-NEXT: s_add_i32 s6, s32, 0x202000
; GFX8-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX8-NEXT: s_add_i32 s6, s32, 0x201200
+; GFX8-NEXT: s_add_i32 s6, s32, 0x202100
; GFX8-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: v_writelane_b32 v23, s58, 28
@@ -1682,36 +1702,60 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc
; GFX8-NEXT: ;;#ASMEND
-; GFX8-NEXT: ; implicit-def: $vgpr22
-; GFX8-NEXT: v_writelane_b32 v23, s59, 27
+; GFX8-NEXT: buffer_store_dword v16, off, s[0:3], s32
+; GFX8-NEXT: v_mov_b32_e32 v16, 0x8040
+; GFX8-NEXT: buffer_store_dword v0, v16, s[0:3], s32 offen ; 4-byte Folded Spill
+; GFX8-NEXT: s_nop 0
+; GFX8-NEXT: buffer_store_dword v1, v16, s[0:3], s32 offen offset:4 ; 4-byte Folded Spill
+; GFX8-NEXT: buffer_store_dword v2, v16, s[0:3], s32 offen offset:8 ; 4-byte Folded Spill
+; GFX8-NEXT: buffer_store_dword v3, v16, s[0:3], s32 offen offset:12 ; 4-byte Folded Spill
+; GFX8-NEXT: buffer_store_dword v4, v16, s[0:3], s32 offen offset:16 ; 4-byte Folded Spill
+; GFX8-NEXT: buffer_store_dword v5, v16, s[0:3], s32 offen offset:20 ; 4-byte Folded Spill
+; GFX8-NEXT: buffer_store_dword v6, v16, s[0:3], s32 offen offset:24 ; 4-byte Folded Spill
+; GFX8-NEXT: buffer_store_dword v7, v16, s[0:3], s32 offen offset:28 ; 4-byte Folded Spill
+; GFX8-NEXT: buffer_store_dword v8, v16, s[0:3], s32 offen offset:32 ; 4-byte Folded Spill
+; GFX8-NEXT: buffer_store_dword v9, v16, s[0:3], s32 offen offset:36 ; 4-byte Folded Spill
+; GFX8-NEXT: buffer_store_dword v10, v16, s[0:3], s32 offen offset:40 ; 4-byte Folded Spill
+; GFX8-NEXT: buffer_store_dword v11, v16, s[0:3], s32 offen offset:44 ; 4-byte Folded Spill
+; GFX8-NEXT: buffer_store_dword v12, v16, s[0:3], s32 offen offset:48 ; 4-byte Folded Spill
+; GFX8-NEXT: buffer_store_dword v13, v16, s[0:3], s32 offen offset:52 ; 4-byte Folded Spill
+; GFX8-NEXT: buffer_store_dword v14, v16, s[0:3], s32 offen offset:56 ; 4-byte Folded Spill
+; GFX8-NEXT: buffer_store_dword v15, v16, s[0:3], s32 offen offset:60 ; 4-byte Folded Spill
+; GFX8-NEXT: buffer_load_dword v16, off, s[0:3], s32
+; GFX8-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
+; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32
; GFX8-NEXT: v_writelane_b32 v22, vcc_lo, 0
; GFX8-NEXT: v_writelane_b32 v22, vcc_hi, 1
-; GFX8-NEXT: s_or_saveexec_b64 s[58:59], -1
-; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s32
-; GFX8-NEXT: v_mov_b32_e32 v0, 0x8044
-; GFX8-NEXT: buffer_store_dword v22, v0, s[0:3], s32 offen ; 4-byte Folded Spill
-; GFX8-NEXT: s_mov_b64 exec, s[58:59]
-; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32
-; GFX8-NEXT: v_lshrrev_b32_e64 v22, 6, s32
; GFX8-NEXT: s_movk_i32 vcc_lo, 0x4040
-; GFX8-NEXT: v_add_u32_e32 v22, vcc, vcc_lo, v22
-; GFX8-NEXT: v_add_u32_e32 v22, vcc, 0x200, v22
-; GFX8-NEXT: v_readfirstlane_b32 s59, v22
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, vcc_lo, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x200, v0
+; GFX8-NEXT: v_writelane_b32 v23, s59, 27
+; GFX8-NEXT: v_readfirstlane_b32 s59, v0
; GFX8-NEXT: s_and_b64 vcc, 0, exec
-; GFX8-NEXT: s_mov_b64 s[58:59], exec
-; GFX8-NEXT: s_mov_b64 exec, -1
-; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s32
-; GFX8-NEXT: v_mov_b32_e32 v0, 0x8044
-; GFX8-NEXT: buffer_load_dword v22, v0, s[0:3], s32 offen ; 4-byte Folded Reload
-; GFX8-NEXT: s_mov_b64 exec, s[58:59]
-; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32
-; GFX8-NEXT: s_waitcnt vmcnt(1)
; GFX8-NEXT: v_readlane_b32 vcc_lo, v22, 0
; GFX8-NEXT: v_readlane_b32 vcc_hi, v22, 1
-; GFX8-NEXT: s_mov_b64 s[58:59], exec
-; GFX8-NEXT: s_mov_b64 exec, -1
-; GFX8-NEXT: s_mov_b64 exec, s[58:59]
+; GFX8-NEXT: v_readlane_b32 s58, v23, 28
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: buffer_store_dword v16, off, s[0:3], s32
+; GFX8-NEXT: v_mov_b32_e32 v16, 0x8040
+; GFX8-NEXT: buffer_load_dword v0, v16, s[0:3], s32 offen ; 4-byte Folded Reload
+; GFX8-NEXT: buffer_load_dword v1, v16, s[0:3], s32 offen offset:4 ; 4-byte Folded Reload
+; GFX8-NEXT: buffer_load_dword v2, v16, s[0:3], s32 offen offset:8 ; 4-byte Folded Reload
+; GFX8-NEXT: buffer_load_dword v3, v16, s[0:3], s32 offen offset:12 ; 4-byte Folded Reload
+; GFX8-NEXT: buffer_load_dword v4, v16, s[0:3], s32 offen offset:16 ; 4-byte Folded Reload
+; GFX8-NEXT: buffer_load_dword v5, v16, s[0:3], s32 offen offset:20 ; 4-byte Folded Reload
+; GFX8-NEXT: buffer_load_dword v6, v16, s[0:3], s32 offen offset:24 ; 4-byte Folded Reload
+; GFX8-NEXT: buffer_load_dword v7, v16, s[0:3], s32 offen offset:28 ; 4-byte Folded Reload
+; GFX8-NEXT: buffer_load_dword v8, v16, s[0:3], s32 offen offset:32 ; 4-byte Folded Reload
+; GFX8-NEXT: buffer_load_dword v9, v16, s[0:3], s32 offen offset:36 ; 4-byte Folded Reload
+; GFX8-NEXT: buffer_load_dword v10, v16, s[0:3], s32 offen offset:40 ; 4-byte Folded Reload
+; GFX8-NEXT: buffer_load_dword v11, v16, s[0:3], s32 offen offset:44 ; 4-byte Folded Reload
+; GFX8-NEXT: buffer_load_dword v12, v16, s[0:3], s32 offen offset:48 ; 4-byte Folded Reload
+; GFX8-NEXT: buffer_load_dword v13, v16, s[0:3], s32 offen offset:52 ; 4-byte Folded Reload
+; GFX8-NEXT: buffer_load_dword v14, v16, s[0:3], s32 offen offset:56 ; 4-byte Folded Reload
+; GFX8-NEXT: buffer_load_dword v15, v16, s[0:3], s32 offen offset:60 ; 4-byte Folded Reload
+; GFX8-NEXT: s_nop 0
+; GFX8-NEXT: buffer_load_dword v16, off, s[0:3], s32
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: ;;#ASMSTART
; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc
@@ -1744,13 +1788,11 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i
; GFX8-NEXT: v_readlane_b32 s33, v23, 2
; GFX8-NEXT: v_readlane_b32 s31, v23, 1
; GFX8-NEXT: v_readlane_b32 s30, v23, 0
-; GFX8-NEXT: ; kill: killed $vgpr22
-; GFX8-NEXT: v_readlane_b32 s58, v23, 28
; GFX8-NEXT: v_readlane_b32 s59, v23, 29
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
+; GFX8-NEXT: s_add_i32 s6, s32, 0x202000
; GFX8-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload
-; GFX8-NEXT: s_add_i32 s6, s32, 0x201200
+; GFX8-NEXT: s_add_i32 s6, s32, 0x202100
; GFX8-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
index c302233e748fda..76a31a7fac8c1a 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
@@ -141,112 +141,103 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
; W64-O0: ; %bb.0:
; W64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane
; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
-; W64-O0-NEXT: v_mov_b32_e32 v5, v3
-; W64-O0-NEXT: v_mov_b32_e32 v6, v2
-; W64-O0-NEXT: v_mov_b32_e32 v7, v1
-; W64-O0-NEXT: v_mov_b32_e32 v1, v0
-; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
+; W64-O0-NEXT: v_mov_b32_e32 v4, v3
+; W64-O0-NEXT: v_mov_b32_e32 v5, v2
+; W64-O0-NEXT: v_mov_b32_e32 v6, v1
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
-; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v2, v7
-; W64-O0-NEXT: v_mov_b32_e32 v3, v6
-; W64-O0-NEXT: v_mov_b32_e32 v4, v5
-; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; W64-O0-NEXT: v_mov_b32_e32 v1, v6
+; W64-O0-NEXT: v_mov_b32_e32 v2, v5
+; W64-O0-NEXT: v_mov_b32_e32 v3, v4
+; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; W64-O0-NEXT: s_nop 0
-; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7
; W64-O0-NEXT: s_mov_b32 s4, 0
-; W64-O0-NEXT: s_waitcnt vmcnt(4)
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 0
+; W64-O0-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane
+; W64-O0-NEXT: v_writelane_b32 v7, s4, 0
; W64-O0-NEXT: s_mov_b64 s[4:5], exec
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 1
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 2
+; W64-O0-NEXT: v_writelane_b32 v7, s4, 1
+; W64-O0-NEXT: v_writelane_b32 v7, s5, 2
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; W64-O0-NEXT: s_waitcnt vmcnt(4)
+; W64-O0-NEXT: v_readfirstlane_b32 s8, v0
; W64-O0-NEXT: s_waitcnt vmcnt(3)
-; W64-O0-NEXT: v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT: s_waitcnt vmcnt(2)
-; W64-O0-NEXT: v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT: v_readfirstlane_b32 s12, v1
; W64-O0-NEXT: s_mov_b32 s4, s8
; W64-O0-NEXT: s_mov_b32 s5, s12
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT: s_waitcnt vmcnt(2)
+; W64-O0-NEXT: v_readfirstlane_b32 s7, v2
; W64-O0-NEXT: s_waitcnt vmcnt(1)
-; W64-O0-NEXT: v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT: v_readfirstlane_b32 s6, v3
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11]
; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
; W64-O0-NEXT: s_mov_b32 s9, s12
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_writelane_b32 v0, s8, 3
-; W64-O0-NEXT: v_writelane_b32 v0, s9, 4
-; W64-O0-NEXT: v_writelane_b32 v0, s10, 5
-; W64-O0-NEXT: v_writelane_b32 v0, s11, 6
+; W64-O0-NEXT: s_waitcnt vmcnt(0)
+; W64-O0-NEXT: v_writelane_b32 v7, s8, 3
+; W64-O0-NEXT: v_writelane_b32 v7, s9, 4
+; W64-O0-NEXT: v_writelane_b32 v7, s10, 5
+; W64-O0-NEXT: v_writelane_b32 v7, s11, 6
; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 7
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 8
+; W64-O0-NEXT: v_writelane_b32 v7, s4, 7
+; W64-O0-NEXT: v_writelane_b32 v7, s5, 8
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v1, 7
-; W64-O0-NEXT: v_readlane_b32 s5, v1, 8
-; W64-O0-NEXT: v_readlane_b32 s8, v1, 3
-; W64-O0-NEXT: v_readlane_b32 s9, v1, 4
-; W64-O0-NEXT: v_readlane_b32 s10, v1, 5
-; W64-O0-NEXT: v_readlane_b32 s11, v1, 6
-; W64-O0-NEXT: v_readlane_b32 s6, v1, 0
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: s_nop 2
+; W64-O0-NEXT: v_readlane_b32 s4, v7, 7
+; W64-O0-NEXT: v_readlane_b32 s5, v7, 8
+; W64-O0-NEXT: v_readlane_b32 s8, v7, 3
+; W64-O0-NEXT: v_readlane_b32 s9, v7, 4
+; W64-O0-NEXT: v_readlane_b32 s10, v7, 5
+; W64-O0-NEXT: v_readlane_b32 s11, v7, 6
+; W64-O0-NEXT: v_readlane_b32 s6, v7, 0
+; W64-O0-NEXT: s_nop 4
; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen
; W64-O0-NEXT: s_waitcnt vmcnt(0)
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5]
; W64-O0-NEXT: s_cbranch_execnz .LBB0_1
; W64-O0-NEXT: ; %bb.3:
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v1, 1
-; W64-O0-NEXT: v_readlane_b32 s5, v1, 2
+; W64-O0-NEXT: v_readlane_b32 s4, v7, 1
+; W64-O0-NEXT: v_readlane_b32 s5, v7, 2
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; W64-O0-NEXT: ; kill: killed $vgpr1
; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_nop 0
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
; W64-O0-NEXT: s_setpc_b64 s[30:31]
@@ -498,34 +489,32 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt
; W64-O0: ; %bb.0: ; %entry
; W64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane
; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
; W64-O0-NEXT: v_mov_b32_e32 v13, v4
-; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; W64-O0-NEXT: v_mov_b32_e32 v7, v3
-; W64-O0-NEXT: v_mov_b32_e32 v8, v2
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; W64-O0-NEXT: v_mov_b32_e32 v9, v1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; W64-O0-NEXT: v_mov_b32_e32 v3, v0
-; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
+; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; W64-O0-NEXT: v_mov_b32_e32 v6, v3
+; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; W64-O0-NEXT: v_mov_b32_e32 v7, v2
+; W64-O0-NEXT: v_mov_b32_e32 v8, v1
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; W64-O0-NEXT: v_mov_b32_e32 v2, v0
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14_vgpr15_vgpr16 killed $exec
; W64-O0-NEXT: v_mov_b32_e32 v14, v5
-; W64-O0-NEXT: v_mov_b32_e32 v15, v6
; W64-O0-NEXT: s_waitcnt vmcnt(3)
-; W64-O0-NEXT: v_mov_b32_e32 v16, v4
+; W64-O0-NEXT: v_mov_b32_e32 v15, v4
+; W64-O0-NEXT: s_waitcnt vmcnt(2)
+; W64-O0-NEXT: v_mov_b32_e32 v16, v3
; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
; W64-O0-NEXT: s_nop 0
; W64-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
@@ -535,195 +524,192 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
-; W64-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4_vgpr5_vgpr6 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v4, v9
-; W64-O0-NEXT: v_mov_b32_e32 v5, v8
-; W64-O0-NEXT: v_mov_b32_e32 v6, v7
-; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; W64-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec
+; W64-O0-NEXT: v_mov_b32_e32 v3, v8
+; W64-O0-NEXT: v_mov_b32_e32 v4, v7
+; W64-O0-NEXT: v_mov_b32_e32 v5, v6
+; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; W64-O0-NEXT: s_nop 0
-; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
-; W64-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v3, v12
-; W64-O0-NEXT: s_waitcnt vmcnt(10)
-; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
+; W64-O0-NEXT: v_mov_b32_e32 v2, v12
+; W64-O0-NEXT: s_waitcnt vmcnt(9)
+; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
; W64-O0-NEXT: s_nop 0
-; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
-; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v2, v10
-; W64-O0-NEXT: s_waitcnt vmcnt(11)
-; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; W64-O0-NEXT: v_mov_b32_e32 v1, v10
+; W64-O0-NEXT: s_waitcnt vmcnt(10)
+; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; W64-O0-NEXT: s_nop 0
-; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7
; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7
; W64-O0-NEXT: s_mov_b32 s4, 0
-; W64-O0-NEXT: s_waitcnt vmcnt(12)
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 0
+; W64-O0-NEXT: ; implicit-def: $vgpr17 : SGPR spill to VGPR lane
+; W64-O0-NEXT: v_writelane_b32 v17, s4, 0
; W64-O0-NEXT: s_mov_b64 s[4:5], exec
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 1
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 2
+; W64-O0-NEXT: v_writelane_b32 v17, s4, 1
+; W64-O0-NEXT: v_writelane_b32 v17, s5, 2
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; W64-O0-NEXT: s_waitcnt vmcnt(4)
+; W64-O0-NEXT: v_readfirstlane_b32 s8, v0
; W64-O0-NEXT: s_waitcnt vmcnt(3)
-; W64-O0-NEXT: v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT: s_waitcnt vmcnt(2)
-; W64-O0-NEXT: v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT: v_readfirstlane_b32 s12, v1
; W64-O0-NEXT: s_mov_b32 s4, s8
; W64-O0-NEXT: s_mov_b32 s5, s12
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT: s_waitcnt vmcnt(2)
+; W64-O0-NEXT: v_readfirstlane_b32 s7, v2
; W64-O0-NEXT: s_waitcnt vmcnt(1)
-; W64-O0-NEXT: v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT: v_readfirstlane_b32 s6, v3
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11]
; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
; W64-O0-NEXT: s_mov_b32 s9, s12
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_writelane_b32 v0, s8, 3
-; W64-O0-NEXT: v_writelane_b32 v0, s9, 4
-; W64-O0-NEXT: v_writelane_b32 v0, s10, 5
-; W64-O0-NEXT: v_writelane_b32 v0, s11, 6
+; W64-O0-NEXT: s_waitcnt vmcnt(0)
+; W64-O0-NEXT: v_writelane_b32 v17, s8, 3
+; W64-O0-NEXT: v_writelane_b32 v17, s9, 4
+; W64-O0-NEXT: v_writelane_b32 v17, s10, 5
+; W64-O0-NEXT: v_writelane_b32 v17, s11, 6
; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 7
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 8
+; W64-O0-NEXT: v_writelane_b32 v17, s4, 7
+; W64-O0-NEXT: v_writelane_b32 v17, s5, 8
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB1_1 Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v1, 7
-; W64-O0-NEXT: v_readlane_b32 s5, v1, 8
-; W64-O0-NEXT: v_readlane_b32 s8, v1, 3
-; W64-O0-NEXT: v_readlane_b32 s9, v1, 4
-; W64-O0-NEXT: v_readlane_b32 s10, v1, 5
-; W64-O0-NEXT: v_readlane_b32 s11, v1, 6
-; W64-O0-NEXT: v_readlane_b32 s6, v1, 0
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: s_nop 2
+; W64-O0-NEXT: v_readlane_b32 s4, v17, 7
+; W64-O0-NEXT: v_readlane_b32 s5, v17, 8
+; W64-O0-NEXT: v_readlane_b32 s8, v17, 3
+; W64-O0-NEXT: v_readlane_b32 s9, v17, 4
+; W64-O0-NEXT: v_readlane_b32 s10, v17, 5
+; W64-O0-NEXT: v_readlane_b32 s11, v17, 6
+; W64-O0-NEXT: v_readlane_b32 s6, v17, 0
+; W64-O0-NEXT: s_nop 4
; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5]
; W64-O0-NEXT: s_cbranch_execnz .LBB1_1
; W64-O0-NEXT: ; %bb.3:
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v0, 1
-; W64-O0-NEXT: v_readlane_b32 s5, v0, 2
+; W64-O0-NEXT: v_readlane_b32 s4, v17, 1
+; W64-O0-NEXT: v_readlane_b32 s5, v17, 2
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
; W64-O0-NEXT: s_mov_b64 s[4:5], exec
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 9
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 10
+; W64-O0-NEXT: v_writelane_b32 v17, s4, 9
+; W64-O0-NEXT: v_writelane_b32 v17, s5, 10
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: .LBB1_4: ; =>This Inner Loop Header: Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; W64-O0-NEXT: s_waitcnt vmcnt(4)
+; W64-O0-NEXT: v_readfirstlane_b32 s8, v0
; W64-O0-NEXT: s_waitcnt vmcnt(3)
-; W64-O0-NEXT: v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT: s_waitcnt vmcnt(2)
-; W64-O0-NEXT: v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT: v_readfirstlane_b32 s12, v1
; W64-O0-NEXT: s_mov_b32 s4, s8
; W64-O0-NEXT: s_mov_b32 s5, s12
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT: s_waitcnt vmcnt(2)
+; W64-O0-NEXT: v_readfirstlane_b32 s7, v2
; W64-O0-NEXT: s_waitcnt vmcnt(1)
-; W64-O0-NEXT: v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT: v_readfirstlane_b32 s6, v3
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11]
; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
; W64-O0-NEXT: s_mov_b32 s9, s12
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_writelane_b32 v0, s8, 11
-; W64-O0-NEXT: v_writelane_b32 v0, s9, 12
-; W64-O0-NEXT: v_writelane_b32 v0, s10, 13
-; W64-O0-NEXT: v_writelane_b32 v0, s11, 14
+; W64-O0-NEXT: s_waitcnt vmcnt(0)
+; W64-O0-NEXT: v_writelane_b32 v17, s8, 11
+; W64-O0-NEXT: v_writelane_b32 v17, s9, 12
+; W64-O0-NEXT: v_writelane_b32 v17, s10, 13
+; W64-O0-NEXT: v_writelane_b32 v17, s11, 14
; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 15
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 16
+; W64-O0-NEXT: v_writelane_b32 v17, s4, 15
+; W64-O0-NEXT: v_writelane_b32 v17, s5, 16
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: ; %bb.5: ; in Loop: Header=BB1_4 Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v1, 15
-; W64-O0-NEXT: v_readlane_b32 s5, v1, 16
-; W64-O0-NEXT: v_readlane_b32 s8, v1, 11
-; W64-O0-NEXT: v_readlane_b32 s9, v1, 12
-; W64-O0-NEXT: v_readlane_b32 s10, v1, 13
-; W64-O0-NEXT: v_readlane_b32 s11, v1, 14
-; W64-O0-NEXT: v_readlane_b32 s6, v1, 0
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: s_nop 2
+; W64-O0-NEXT: v_readlane_b32 s4, v17, 15
+; W64-O0-NEXT: v_readlane_b32 s5, v17, 16
+; W64-O0-NEXT: v_readlane_b32 s8, v17, 11
+; W64-O0-NEXT: v_readlane_b32 s9, v17, 12
+; W64-O0-NEXT: v_readlane_b32 s10, v17, 13
+; W64-O0-NEXT: v_readlane_b32 s11, v17, 14
+; W64-O0-NEXT: v_readlane_b32 s6, v17, 0
+; W64-O0-NEXT: s_nop 4
; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5]
; W64-O0-NEXT: s_cbranch_execnz .LBB1_4
; W64-O0-NEXT: ; %bb.6:
+; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v0, 9
-; W64-O0-NEXT: v_readlane_b32 s5, v0, 10
+; W64-O0-NEXT: v_readlane_b32 s4, v17, 9
+; W64-O0-NEXT: v_readlane_b32 s5, v17, 10
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: global_store_dword v[4:5], v6, off
+; W64-O0-NEXT: global_store_dword v[3:4], v5, off
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: global_store_dword v[1:2], v3, off
+; W64-O0-NEXT: global_store_dword v[0:1], v2, off
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: ; kill: killed $vgpr0
; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
; W64-O0-NEXT: s_setpc_b64 s[30:31]
@@ -1031,262 +1017,253 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad
; W64-O0: ; %bb.0: ; %entry
; W64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
; W64-O0-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; W64-O0-NEXT: v_mov_b32_e32 v8, v6
-; W64-O0-NEXT: v_mov_b32_e32 v9, v5
-; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; W64-O0-NEXT: v_mov_b32_e32 v8, v5
+; W64-O0-NEXT: v_mov_b32_e32 v5, v4
+; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
; W64-O0-NEXT: s_nop 0
-; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
-; W64-O0-NEXT: v_mov_b32_e32 v10, v3
-; W64-O0-NEXT: v_mov_b32_e32 v11, v2
-; W64-O0-NEXT: v_mov_b32_e32 v13, v1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; W64-O0-NEXT: v_mov_b32_e32 v6, v0
-; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
+; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; W64-O0-NEXT: v_mov_b32_e32 v9, v3
+; W64-O0-NEXT: v_mov_b32_e32 v10, v2
+; W64-O0-NEXT: v_mov_b32_e32 v11, v1
+; W64-O0-NEXT: v_mov_b32_e32 v5, v0
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
-; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v2, v9
-; W64-O0-NEXT: v_mov_b32_e32 v3, v8
-; W64-O0-NEXT: v_mov_b32_e32 v4, v7
+; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; W64-O0-NEXT: v_mov_b32_e32 v1, v8
+; W64-O0-NEXT: v_mov_b32_e32 v2, v6
+; W64-O0-NEXT: v_mov_b32_e32 v3, v7
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
-; W64-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7_vgpr8_vgpr9 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v7, v13
-; W64-O0-NEXT: v_mov_b32_e32 v8, v11
-; W64-O0-NEXT: v_mov_b32_e32 v9, v10
-; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; W64-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6_vgpr7_vgpr8 killed $exec
+; W64-O0-NEXT: v_mov_b32_e32 v6, v11
+; W64-O0-NEXT: v_mov_b32_e32 v7, v10
+; W64-O0-NEXT: v_mov_b32_e32 v8, v9
+; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_nop 0
-; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
-; W64-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v6, v12
-; W64-O0-NEXT: s_waitcnt vmcnt(7)
-; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; W64-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; W64-O0-NEXT: v_mov_b32_e32 v5, v12
+; W64-O0-NEXT: s_waitcnt vmcnt(6)
+; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
; W64-O0-NEXT: s_nop 0
-; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; W64-O0-NEXT: s_waitcnt vmcnt(7)
-; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; W64-O0-NEXT: s_waitcnt vmcnt(6)
+; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; W64-O0-NEXT: s_nop 0
-; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7
; W64-O0-NEXT: ;;#ASMSTART
; W64-O0-NEXT: s_mov_b32 s4, 17
; W64-O0-NEXT: ;;#ASMEND
; W64-O0-NEXT: s_mov_b32 s5, s4
-; W64-O0-NEXT: s_waitcnt vmcnt(10)
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 0
+; W64-O0-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane
+; W64-O0-NEXT: v_writelane_b32 v13, s5, 0
; W64-O0-NEXT: s_mov_b32 s5, 0
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 1
-; W64-O0-NEXT: v_mov_b32_e32 v1, s4
-; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; W64-O0-NEXT: v_writelane_b32 v13, s5, 1
+; W64-O0-NEXT: v_mov_b32_e32 v0, s4
+; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 s[4:5], exec
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 2
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 3
+; W64-O0-NEXT: v_writelane_b32 v13, s4, 2
+; W64-O0-NEXT: v_writelane_b32 v13, s5, 3
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; W64-O0-NEXT: s_waitcnt vmcnt(4)
+; W64-O0-NEXT: v_readfirstlane_b32 s8, v0
; W64-O0-NEXT: s_waitcnt vmcnt(3)
-; W64-O0-NEXT: v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT: s_waitcnt vmcnt(2)
-; W64-O0-NEXT: v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT: v_readfirstlane_b32 s12, v1
; W64-O0-NEXT: s_mov_b32 s4, s8
; W64-O0-NEXT: s_mov_b32 s5, s12
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT: s_waitcnt vmcnt(2)
+; W64-O0-NEXT: v_readfirstlane_b32 s7, v2
; W64-O0-NEXT: s_waitcnt vmcnt(1)
-; W64-O0-NEXT: v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT: v_readfirstlane_b32 s6, v3
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11]
; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
; W64-O0-NEXT: s_mov_b32 s9, s12
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_writelane_b32 v0, s8, 4
-; W64-O0-NEXT: v_writelane_b32 v0, s9, 5
-; W64-O0-NEXT: v_writelane_b32 v0, s10, 6
-; W64-O0-NEXT: v_writelane_b32 v0, s11, 7
+; W64-O0-NEXT: s_waitcnt vmcnt(0)
+; W64-O0-NEXT: v_writelane_b32 v13, s8, 4
+; W64-O0-NEXT: v_writelane_b32 v13, s9, 5
+; W64-O0-NEXT: v_writelane_b32 v13, s10, 6
+; W64-O0-NEXT: v_writelane_b32 v13, s11, 7
; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 8
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 9
+; W64-O0-NEXT: v_writelane_b32 v13, s4, 8
+; W64-O0-NEXT: v_writelane_b32 v13, s5, 9
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB2_1 Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v1, 8
-; W64-O0-NEXT: v_readlane_b32 s5, v1, 9
-; W64-O0-NEXT: v_readlane_b32 s8, v1, 4
-; W64-O0-NEXT: v_readlane_b32 s9, v1, 5
-; W64-O0-NEXT: v_readlane_b32 s10, v1, 6
-; W64-O0-NEXT: v_readlane_b32 s11, v1, 7
-; W64-O0-NEXT: v_readlane_b32 s6, v1, 1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: s_nop 2
+; W64-O0-NEXT: v_readlane_b32 s4, v13, 8
+; W64-O0-NEXT: v_readlane_b32 s5, v13, 9
+; W64-O0-NEXT: v_readlane_b32 s8, v13, 4
+; W64-O0-NEXT: v_readlane_b32 s9, v13, 5
+; W64-O0-NEXT: v_readlane_b32 s10, v13, 6
+; W64-O0-NEXT: v_readlane_b32 s11, v13, 7
+; W64-O0-NEXT: v_readlane_b32 s6, v13, 1
+; W64-O0-NEXT: s_nop 4
; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen
; W64-O0-NEXT: s_waitcnt vmcnt(0)
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5]
; W64-O0-NEXT: s_cbranch_execnz .LBB2_1
; W64-O0-NEXT: ; %bb.3:
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s6, v0, 2
-; W64-O0-NEXT: v_readlane_b32 s7, v0, 3
+; W64-O0-NEXT: v_readlane_b32 s6, v13, 2
+; W64-O0-NEXT: v_readlane_b32 s7, v13, 3
; W64-O0-NEXT: s_mov_b64 exec, s[6:7]
-; W64-O0-NEXT: v_readlane_b32 s4, v0, 1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; W64-O0-NEXT: v_readlane_b32 s4, v13, 1
; W64-O0-NEXT: s_mov_b32 s5, 0x3ff
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_and_b32_e64 v2, v2, s5
-; W64-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v2, s4
-; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; W64-O0-NEXT: v_and_b32_e64 v1, v1, s5
+; W64-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v1, s4
+; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 s[4:5], exec
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 10
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 11
+; W64-O0-NEXT: v_writelane_b32 v13, s4, 10
+; W64-O0-NEXT: v_writelane_b32 v13, s5, 11
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
; W64-O0-NEXT: s_cbranch_execz .LBB2_8
; W64-O0-NEXT: ; %bb.4: ; %bb1
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v0, 0
+; W64-O0-NEXT: v_readlane_b32 s4, v13, 0
; W64-O0-NEXT: s_mov_b32 s5, 0
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 12
-; W64-O0-NEXT: v_mov_b32_e32 v1, s4
-; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; W64-O0-NEXT: v_writelane_b32 v13, s5, 12
+; W64-O0-NEXT: v_mov_b32_e32 v0, s4
+; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7
; W64-O0-NEXT: s_mov_b64 s[4:5], exec
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 13
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 14
+; W64-O0-NEXT: v_writelane_b32 v13, s4, 13
+; W64-O0-NEXT: v_writelane_b32 v13, s5, 14
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: .LBB2_5: ; =>This Inner Loop Header: Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; W64-O0-NEXT: s_waitcnt vmcnt(4)
+; W64-O0-NEXT: v_readfirstlane_b32 s8, v0
; W64-O0-NEXT: s_waitcnt vmcnt(3)
-; W64-O0-NEXT: v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT: s_waitcnt vmcnt(2)
-; W64-O0-NEXT: v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT: v_readfirstlane_b32 s12, v1
; W64-O0-NEXT: s_mov_b32 s4, s8
; W64-O0-NEXT: s_mov_b32 s5, s12
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT: s_waitcnt vmcnt(2)
+; W64-O0-NEXT: v_readfirstlane_b32 s7, v2
; W64-O0-NEXT: s_waitcnt vmcnt(1)
-; W64-O0-NEXT: v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT: v_readfirstlane_b32 s6, v3
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11]
; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
; W64-O0-NEXT: s_mov_b32 s9, s12
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_writelane_b32 v0, s8, 15
-; W64-O0-NEXT: v_writelane_b32 v0, s9, 16
-; W64-O0-NEXT: v_writelane_b32 v0, s10, 17
-; W64-O0-NEXT: v_writelane_b32 v0, s11, 18
+; W64-O0-NEXT: s_waitcnt vmcnt(0)
+; W64-O0-NEXT: v_writelane_b32 v13, s8, 15
+; W64-O0-NEXT: v_writelane_b32 v13, s9, 16
+; W64-O0-NEXT: v_writelane_b32 v13, s10, 17
+; W64-O0-NEXT: v_writelane_b32 v13, s11, 18
; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 19
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 20
+; W64-O0-NEXT: v_writelane_b32 v13, s4, 19
+; W64-O0-NEXT: v_writelane_b32 v13, s5, 20
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: ; %bb.6: ; in Loop: Header=BB2_5 Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v1, 19
-; W64-O0-NEXT: v_readlane_b32 s5, v1, 20
-; W64-O0-NEXT: v_readlane_b32 s8, v1, 15
-; W64-O0-NEXT: v_readlane_b32 s9, v1, 16
-; W64-O0-NEXT: v_readlane_b32 s10, v1, 17
-; W64-O0-NEXT: v_readlane_b32 s11, v1, 18
-; W64-O0-NEXT: v_readlane_b32 s6, v1, 12
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: s_nop 2
+; W64-O0-NEXT: v_readlane_b32 s4, v13, 19
+; W64-O0-NEXT: v_readlane_b32 s5, v13, 20
+; W64-O0-NEXT: v_readlane_b32 s8, v13, 15
+; W64-O0-NEXT: v_readlane_b32 s9, v13, 16
+; W64-O0-NEXT: v_readlane_b32 s10, v13, 17
+; W64-O0-NEXT: v_readlane_b32 s11, v13, 18
+; W64-O0-NEXT: v_readlane_b32 s6, v13, 12
+; W64-O0-NEXT: s_nop 4
; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen
; W64-O0-NEXT: s_waitcnt vmcnt(0)
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5]
; W64-O0-NEXT: s_cbranch_execnz .LBB2_5
; W64-O0-NEXT: ; %bb.7:
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v1, 13
-; W64-O0-NEXT: v_readlane_b32 s5, v1, 14
+; W64-O0-NEXT: v_readlane_b32 s4, v13, 13
+; W64-O0-NEXT: v_readlane_b32 s5, v13, 14
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
; W64-O0-NEXT: .LBB2_8: ; %bb2
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; W64-O0-NEXT: s_nop 0
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v0, 10
-; W64-O0-NEXT: v_readlane_b32 s5, v0, 11
+; W64-O0-NEXT: v_readlane_b32 s4, v13, 10
+; W64-O0-NEXT: v_readlane_b32 s5, v13, 11
; W64-O0-NEXT: s_or_b64 exec, exec, s[4:5]
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: global_store_dword v[1:2], v3, off
+; W64-O0-NEXT: global_store_dword v[0:1], v2, off
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: ; kill: killed $vgpr0
; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
; W64-O0-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
index dd6fd5aa384f6c..59ceecbf43b785 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
@@ -140,127 +140,115 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 {
; W64-O0: ; %bb.0:
; W64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane
-; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
-; W64-O0-NEXT: v_mov_b32_e32 v6, v2
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; W64-O0-NEXT: v_mov_b32_e32 v3, v1
-; W64-O0-NEXT: v_mov_b32_e32 v1, v0
-; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
+; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; W64-O0-NEXT: v_mov_b32_e32 v5, v2
+; W64-O0-NEXT: v_mov_b32_e32 v2, v1
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
-; W64-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; W64-O0-NEXT: s_waitcnt vmcnt(1)
-; W64-O0-NEXT: v_mov_b32_e32 v7, v2
-; W64-O0-NEXT: v_mov_b32_e32 v5, v7
-; W64-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
+; W64-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; W64-O0-NEXT: v_mov_b32_e32 v6, v3
+; W64-O0-NEXT: v_mov_b32_e32 v4, v6
+; W64-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
-; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v2, v3
-; W64-O0-NEXT: v_mov_b32_e32 v7, v2
-; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr1_vgpr2 killed $exec
+; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; W64-O0-NEXT: v_mov_b32_e32 v1, v2
+; W64-O0-NEXT: v_mov_b32_e32 v6, v1
+; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
-; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v2, v7
-; W64-O0-NEXT: v_mov_b32_e32 v3, v6
-; W64-O0-NEXT: v_mov_b32_e32 v4, v5
-; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; W64-O0-NEXT: v_mov_b32_e32 v1, v6
+; W64-O0-NEXT: v_mov_b32_e32 v2, v5
+; W64-O0-NEXT: v_mov_b32_e32 v3, v4
+; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; W64-O0-NEXT: s_nop 0
-; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; W64-O0-NEXT: s_mov_b32 s4, 0
-; W64-O0-NEXT: s_waitcnt vmcnt(4)
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 0
+; W64-O0-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane
+; W64-O0-NEXT: v_writelane_b32 v7, s4, 0
; W64-O0-NEXT: s_mov_b64 s[4:5], exec
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 1
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 2
+; W64-O0-NEXT: v_writelane_b32 v7, s4, 1
+; W64-O0-NEXT: v_writelane_b32 v7, s5, 2
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; W64-O0-NEXT: s_waitcnt vmcnt(4)
+; W64-O0-NEXT: v_readfirstlane_b32 s8, v0
; W64-O0-NEXT: s_waitcnt vmcnt(3)
-; W64-O0-NEXT: v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT: s_waitcnt vmcnt(2)
-; W64-O0-NEXT: v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT: v_readfirstlane_b32 s12, v1
; W64-O0-NEXT: s_mov_b32 s4, s8
; W64-O0-NEXT: s_mov_b32 s5, s12
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT: s_waitcnt vmcnt(2)
+; W64-O0-NEXT: v_readfirstlane_b32 s7, v2
; W64-O0-NEXT: s_waitcnt vmcnt(1)
-; W64-O0-NEXT: v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT: v_readfirstlane_b32 s6, v3
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11]
; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
; W64-O0-NEXT: s_mov_b32 s9, s12
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_writelane_b32 v0, s8, 3
-; W64-O0-NEXT: v_writelane_b32 v0, s9, 4
-; W64-O0-NEXT: v_writelane_b32 v0, s10, 5
-; W64-O0-NEXT: v_writelane_b32 v0, s11, 6
+; W64-O0-NEXT: s_waitcnt vmcnt(0)
+; W64-O0-NEXT: v_writelane_b32 v7, s8, 3
+; W64-O0-NEXT: v_writelane_b32 v7, s9, 4
+; W64-O0-NEXT: v_writelane_b32 v7, s10, 5
+; W64-O0-NEXT: v_writelane_b32 v7, s11, 6
; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 7
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 8
+; W64-O0-NEXT: v_writelane_b32 v7, s4, 7
+; W64-O0-NEXT: v_writelane_b32 v7, s5, 8
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v1, 7
-; W64-O0-NEXT: v_readlane_b32 s5, v1, 8
-; W64-O0-NEXT: v_readlane_b32 s8, v1, 3
-; W64-O0-NEXT: v_readlane_b32 s9, v1, 4
-; W64-O0-NEXT: v_readlane_b32 s10, v1, 5
-; W64-O0-NEXT: v_readlane_b32 s11, v1, 6
-; W64-O0-NEXT: v_readlane_b32 s6, v1, 0
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: s_nop 2
+; W64-O0-NEXT: v_readlane_b32 s4, v7, 7
+; W64-O0-NEXT: v_readlane_b32 s5, v7, 8
+; W64-O0-NEXT: v_readlane_b32 s8, v7, 3
+; W64-O0-NEXT: v_readlane_b32 s9, v7, 4
+; W64-O0-NEXT: v_readlane_b32 s10, v7, 5
+; W64-O0-NEXT: v_readlane_b32 s11, v7, 6
+; W64-O0-NEXT: v_readlane_b32 s6, v7, 0
+; W64-O0-NEXT: s_nop 4
; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5]
; W64-O0-NEXT: s_cbranch_execnz .LBB0_1
; W64-O0-NEXT: ; %bb.3:
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v1, 1
-; W64-O0-NEXT: v_readlane_b32 s5, v1, 2
+; W64-O0-NEXT: v_readlane_b32 s4, v7, 1
+; W64-O0-NEXT: v_readlane_b32 s5, v7, 2
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; W64-O0-NEXT: ; kill: killed $vgpr1
; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_nop 0
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
; W64-O0-NEXT: s_setpc_b64 s[30:31]
@@ -512,45 +500,42 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8)
; W64-O0: ; %bb.0: ; %entry
; W64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane
; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
; W64-O0-NEXT: v_mov_b32_e32 v14, v6
-; W64-O0-NEXT: v_mov_b32_e32 v9, v5
+; W64-O0-NEXT: v_mov_b32_e32 v8, v5
+; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
; W64-O0-NEXT: v_mov_b32_e32 v13, v4
-; W64-O0-NEXT: v_mov_b32_e32 v4, v3
-; W64-O0-NEXT: v_mov_b32_e32 v8, v2
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; W64-O0-NEXT: v_mov_b32_e32 v5, v1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; W64-O0-NEXT: v_mov_b32_e32 v3, v0
-; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
+; W64-O0-NEXT: v_mov_b32_e32 v7, v2
+; W64-O0-NEXT: v_mov_b32_e32 v4, v1
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; W64-O0-NEXT: v_mov_b32_e32 v2, v0
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v15, v7
-; W64-O0-NEXT: v_mov_b32_e32 v6, v15
-; W64-O0-NEXT: v_mov_b32_e32 v7, v14
+; W64-O0-NEXT: s_waitcnt vmcnt(2)
+; W64-O0-NEXT: v_mov_b32_e32 v15, v5
+; W64-O0-NEXT: v_mov_b32_e32 v5, v15
+; W64-O0-NEXT: v_mov_b32_e32 v6, v14
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v14, v9
-; W64-O0-NEXT: v_mov_b32_e32 v9, v14
+; W64-O0-NEXT: v_mov_b32_e32 v14, v8
+; W64-O0-NEXT: v_mov_b32_e32 v8, v14
; W64-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 killed $vgpr13_vgpr14 killed $exec
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14_vgpr15_vgpr16 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v14, v9
-; W64-O0-NEXT: v_mov_b32_e32 v15, v7
-; W64-O0-NEXT: v_mov_b32_e32 v16, v6
+; W64-O0-NEXT: v_mov_b32_e32 v14, v8
+; W64-O0-NEXT: v_mov_b32_e32 v15, v6
+; W64-O0-NEXT: v_mov_b32_e32 v16, v5
; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
; W64-O0-NEXT: s_nop 0
; W64-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
@@ -558,45 +543,45 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8)
; W64-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
-; W64-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v9, v4
-; W64-O0-NEXT: v_mov_b32_e32 v7, v9
-; W64-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr8_vgpr9 killed $exec
+; W64-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; W64-O0-NEXT: v_mov_b32_e32 v8, v3
+; W64-O0-NEXT: v_mov_b32_e32 v6, v8
+; W64-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr7_vgpr8 killed $exec
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
-; W64-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v4, v5
-; W64-O0-NEXT: v_mov_b32_e32 v9, v4
-; W64-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec
+; W64-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; W64-O0-NEXT: v_mov_b32_e32 v3, v4
+; W64-O0-NEXT: v_mov_b32_e32 v8, v3
+; W64-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $vgpr2_vgpr3 killed $exec
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
-; W64-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4_vgpr5_vgpr6 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v4, v9
-; W64-O0-NEXT: v_mov_b32_e32 v5, v8
-; W64-O0-NEXT: v_mov_b32_e32 v6, v7
-; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; W64-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec
+; W64-O0-NEXT: v_mov_b32_e32 v3, v8
+; W64-O0-NEXT: v_mov_b32_e32 v4, v7
+; W64-O0-NEXT: v_mov_b32_e32 v5, v6
+; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; W64-O0-NEXT: s_nop 0
-; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
-; W64-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v3, v12
-; W64-O0-NEXT: s_waitcnt vmcnt(10)
-; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
+; W64-O0-NEXT: v_mov_b32_e32 v2, v12
+; W64-O0-NEXT: s_waitcnt vmcnt(9)
+; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
; W64-O0-NEXT: s_nop 0
-; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
-; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v2, v10
-; W64-O0-NEXT: s_waitcnt vmcnt(11)
-; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; W64-O0-NEXT: v_mov_b32_e32 v1, v10
+; W64-O0-NEXT: s_waitcnt vmcnt(10)
+; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; W64-O0-NEXT: s_nop 0
-; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
@@ -604,165 +589,162 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8)
; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; W64-O0-NEXT: s_mov_b32 s4, 0
-; W64-O0-NEXT: s_waitcnt vmcnt(12)
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 0
+; W64-O0-NEXT: ; implicit-def: $vgpr17 : SGPR spill to VGPR lane
+; W64-O0-NEXT: v_writelane_b32 v17, s4, 0
; W64-O0-NEXT: s_mov_b64 s[4:5], exec
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 1
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 2
+; W64-O0-NEXT: v_writelane_b32 v17, s4, 1
+; W64-O0-NEXT: v_writelane_b32 v17, s5, 2
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; W64-O0-NEXT: s_waitcnt vmcnt(4)
+; W64-O0-NEXT: v_readfirstlane_b32 s8, v0
; W64-O0-NEXT: s_waitcnt vmcnt(3)
-; W64-O0-NEXT: v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT: s_waitcnt vmcnt(2)
-; W64-O0-NEXT: v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT: v_readfirstlane_b32 s12, v1
; W64-O0-NEXT: s_mov_b32 s4, s8
; W64-O0-NEXT: s_mov_b32 s5, s12
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT: s_waitcnt vmcnt(2)
+; W64-O0-NEXT: v_readfirstlane_b32 s7, v2
; W64-O0-NEXT: s_waitcnt vmcnt(1)
-; W64-O0-NEXT: v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT: v_readfirstlane_b32 s6, v3
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11]
; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
; W64-O0-NEXT: s_mov_b32 s9, s12
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_writelane_b32 v0, s8, 3
-; W64-O0-NEXT: v_writelane_b32 v0, s9, 4
-; W64-O0-NEXT: v_writelane_b32 v0, s10, 5
-; W64-O0-NEXT: v_writelane_b32 v0, s11, 6
+; W64-O0-NEXT: s_waitcnt vmcnt(0)
+; W64-O0-NEXT: v_writelane_b32 v17, s8, 3
+; W64-O0-NEXT: v_writelane_b32 v17, s9, 4
+; W64-O0-NEXT: v_writelane_b32 v17, s10, 5
+; W64-O0-NEXT: v_writelane_b32 v17, s11, 6
; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 7
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 8
+; W64-O0-NEXT: v_writelane_b32 v17, s4, 7
+; W64-O0-NEXT: v_writelane_b32 v17, s5, 8
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB1_1 Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v1, 7
-; W64-O0-NEXT: v_readlane_b32 s5, v1, 8
-; W64-O0-NEXT: v_readlane_b32 s8, v1, 3
-; W64-O0-NEXT: v_readlane_b32 s9, v1, 4
-; W64-O0-NEXT: v_readlane_b32 s10, v1, 5
-; W64-O0-NEXT: v_readlane_b32 s11, v1, 6
-; W64-O0-NEXT: v_readlane_b32 s6, v1, 0
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: s_nop 2
+; W64-O0-NEXT: v_readlane_b32 s4, v17, 7
+; W64-O0-NEXT: v_readlane_b32 s5, v17, 8
+; W64-O0-NEXT: v_readlane_b32 s8, v17, 3
+; W64-O0-NEXT: v_readlane_b32 s9, v17, 4
+; W64-O0-NEXT: v_readlane_b32 s10, v17, 5
+; W64-O0-NEXT: v_readlane_b32 s11, v17, 6
+; W64-O0-NEXT: v_readlane_b32 s6, v17, 0
+; W64-O0-NEXT: s_nop 4
; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5]
; W64-O0-NEXT: s_cbranch_execnz .LBB1_1
; W64-O0-NEXT: ; %bb.3:
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v0, 1
-; W64-O0-NEXT: v_readlane_b32 s5, v0, 2
+; W64-O0-NEXT: v_readlane_b32 s4, v17, 1
+; W64-O0-NEXT: v_readlane_b32 s5, v17, 2
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
; W64-O0-NEXT: s_mov_b64 s[4:5], exec
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 9
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 10
+; W64-O0-NEXT: v_writelane_b32 v17, s4, 9
+; W64-O0-NEXT: v_writelane_b32 v17, s5, 10
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: .LBB1_4: ; =>This Inner Loop Header: Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; W64-O0-NEXT: s_waitcnt vmcnt(4)
+; W64-O0-NEXT: v_readfirstlane_b32 s8, v0
; W64-O0-NEXT: s_waitcnt vmcnt(3)
-; W64-O0-NEXT: v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT: s_waitcnt vmcnt(2)
-; W64-O0-NEXT: v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT: v_readfirstlane_b32 s12, v1
; W64-O0-NEXT: s_mov_b32 s4, s8
; W64-O0-NEXT: s_mov_b32 s5, s12
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT: s_waitcnt vmcnt(2)
+; W64-O0-NEXT: v_readfirstlane_b32 s7, v2
; W64-O0-NEXT: s_waitcnt vmcnt(1)
-; W64-O0-NEXT: v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT: v_readfirstlane_b32 s6, v3
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11]
; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
; W64-O0-NEXT: s_mov_b32 s9, s12
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_writelane_b32 v0, s8, 11
-; W64-O0-NEXT: v_writelane_b32 v0, s9, 12
-; W64-O0-NEXT: v_writelane_b32 v0, s10, 13
-; W64-O0-NEXT: v_writelane_b32 v0, s11, 14
+; W64-O0-NEXT: s_waitcnt vmcnt(0)
+; W64-O0-NEXT: v_writelane_b32 v17, s8, 11
+; W64-O0-NEXT: v_writelane_b32 v17, s9, 12
+; W64-O0-NEXT: v_writelane_b32 v17, s10, 13
+; W64-O0-NEXT: v_writelane_b32 v17, s11, 14
; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 15
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 16
+; W64-O0-NEXT: v_writelane_b32 v17, s4, 15
+; W64-O0-NEXT: v_writelane_b32 v17, s5, 16
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: ; %bb.5: ; in Loop: Header=BB1_4 Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v1, 15
-; W64-O0-NEXT: v_readlane_b32 s5, v1, 16
-; W64-O0-NEXT: v_readlane_b32 s8, v1, 11
-; W64-O0-NEXT: v_readlane_b32 s9, v1, 12
-; W64-O0-NEXT: v_readlane_b32 s10, v1, 13
-; W64-O0-NEXT: v_readlane_b32 s11, v1, 14
-; W64-O0-NEXT: v_readlane_b32 s6, v1, 0
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: s_nop 2
+; W64-O0-NEXT: v_readlane_b32 s4, v17, 15
+; W64-O0-NEXT: v_readlane_b32 s5, v17, 16
+; W64-O0-NEXT: v_readlane_b32 s8, v17, 11
+; W64-O0-NEXT: v_readlane_b32 s9, v17, 12
+; W64-O0-NEXT: v_readlane_b32 s10, v17, 13
+; W64-O0-NEXT: v_readlane_b32 s11, v17, 14
+; W64-O0-NEXT: v_readlane_b32 s6, v17, 0
+; W64-O0-NEXT: s_nop 4
; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5]
; W64-O0-NEXT: s_cbranch_execnz .LBB1_4
; W64-O0-NEXT: ; %bb.6:
+; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v0, 9
-; W64-O0-NEXT: v_readlane_b32 s5, v0, 10
+; W64-O0-NEXT: v_readlane_b32 s4, v17, 9
+; W64-O0-NEXT: v_readlane_b32 s5, v17, 10
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; W64-O0-NEXT: global_store_dword v[3:4], v5, off
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: global_store_dword v[4:5], v6, off
+; W64-O0-NEXT: global_store_dword v[0:1], v2, off
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: global_store_dword v[1:2], v3, off
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: ; kill: killed $vgpr0
; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
; W64-O0-NEXT: s_setpc_b64 s[30:31]
@@ -1070,48 +1052,42 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j,
; W64-O0: ; %bb.0: ; %entry
; W64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
; W64-O0-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
-; W64-O0-NEXT: v_mov_b32_e32 v6, v5
-; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; W64-O0-NEXT: v_mov_b32_e32 v5, v4
+; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
; W64-O0-NEXT: s_nop 0
-; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
-; W64-O0-NEXT: v_mov_b32_e32 v4, v3
-; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; W64-O0-NEXT: v_mov_b32_e32 v13, v2
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; W64-O0-NEXT: v_mov_b32_e32 v10, v1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; W64-O0-NEXT: v_mov_b32_e32 v9, v2
+; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; W64-O0-NEXT: v_mov_b32_e32 v6, v1
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
; W64-O0-NEXT: v_mov_b32_e32 v8, v0
-; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
-; W64-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v14, v4
-; W64-O0-NEXT: v_mov_b32_e32 v4, v14
-; W64-O0-NEXT: v_mov_b32_e32 v6, v13
+; W64-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
+; W64-O0-NEXT: v_mov_b32_e32 v10, v3
+; W64-O0-NEXT: v_mov_b32_e32 v3, v10
+; W64-O0-NEXT: v_mov_b32_e32 v5, v9
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v9, v10
-; W64-O0-NEXT: v_mov_b32_e32 v13, v9
+; W64-O0-NEXT: v_mov_b32_e32 v9, v6
+; W64-O0-NEXT: v_mov_b32_e32 v6, v9
; W64-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr8_vgpr9 killed $exec
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v9, v13
-; W64-O0-NEXT: v_mov_b32_e32 v10, v6
-; W64-O0-NEXT: v_mov_b32_e32 v11, v4
+; W64-O0-NEXT: v_mov_b32_e32 v9, v6
+; W64-O0-NEXT: v_mov_b32_e32 v10, v5
+; W64-O0-NEXT: v_mov_b32_e32 v11, v3
; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_nop 0
; W64-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
@@ -1119,251 +1095,246 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j,
; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
-; W64-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v6, v7
+; W64-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; W64-O0-NEXT: v_mov_b32_e32 v5, v7
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
-; W64-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
-; W64-O0-NEXT: s_waitcnt vmcnt(6)
-; W64-O0-NEXT: v_mov_b32_e32 v4, v2
+; W64-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; W64-O0-NEXT: s_waitcnt vmcnt(5)
+; W64-O0-NEXT: v_mov_b32_e32 v3, v1
; W64-O0-NEXT: ; implicit-def: $sgpr4
; W64-O0-NEXT: ; implicit-def: $sgpr4
-; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v2, v12
-; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; W64-O0-NEXT: v_mov_b32_e32 v1, v12
+; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
; W64-O0-NEXT: s_nop 0
-; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; W64-O0-NEXT: s_nop 0
-; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
-; W64-O0-NEXT: s_waitcnt vmcnt(9)
-; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; W64-O0-NEXT: s_waitcnt vmcnt(8)
+; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; W64-O0-NEXT: s_nop 0
-; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
; W64-O0-NEXT: ;;#ASMSTART
; W64-O0-NEXT: s_mov_b32 s4, 17
; W64-O0-NEXT: ;;#ASMEND
; W64-O0-NEXT: s_mov_b32 s5, s4
-; W64-O0-NEXT: s_waitcnt vmcnt(10)
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 0
+; W64-O0-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane
+; W64-O0-NEXT: v_writelane_b32 v13, s5, 0
; W64-O0-NEXT: s_mov_b32 s5, 0
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 1
-; W64-O0-NEXT: v_mov_b32_e32 v1, s4
-; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; W64-O0-NEXT: v_writelane_b32 v13, s5, 1
+; W64-O0-NEXT: v_mov_b32_e32 v0, s4
+; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 s[4:5], exec
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 2
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 3
+; W64-O0-NEXT: v_writelane_b32 v13, s4, 2
+; W64-O0-NEXT: v_writelane_b32 v13, s5, 3
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; W64-O0-NEXT: s_waitcnt vmcnt(4)
+; W64-O0-NEXT: v_readfirstlane_b32 s8, v0
; W64-O0-NEXT: s_waitcnt vmcnt(3)
-; W64-O0-NEXT: v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT: s_waitcnt vmcnt(2)
-; W64-O0-NEXT: v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT: v_readfirstlane_b32 s12, v1
; W64-O0-NEXT: s_mov_b32 s4, s8
; W64-O0-NEXT: s_mov_b32 s5, s12
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT: s_waitcnt vmcnt(2)
+; W64-O0-NEXT: v_readfirstlane_b32 s7, v2
; W64-O0-NEXT: s_waitcnt vmcnt(1)
-; W64-O0-NEXT: v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT: v_readfirstlane_b32 s6, v3
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11]
; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
; W64-O0-NEXT: s_mov_b32 s9, s12
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_writelane_b32 v0, s8, 4
-; W64-O0-NEXT: v_writelane_b32 v0, s9, 5
-; W64-O0-NEXT: v_writelane_b32 v0, s10, 6
-; W64-O0-NEXT: v_writelane_b32 v0, s11, 7
+; W64-O0-NEXT: s_waitcnt vmcnt(0)
+; W64-O0-NEXT: v_writelane_b32 v13, s8, 4
+; W64-O0-NEXT: v_writelane_b32 v13, s9, 5
+; W64-O0-NEXT: v_writelane_b32 v13, s10, 6
+; W64-O0-NEXT: v_writelane_b32 v13, s11, 7
; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 8
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 9
+; W64-O0-NEXT: v_writelane_b32 v13, s4, 8
+; W64-O0-NEXT: v_writelane_b32 v13, s5, 9
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB2_1 Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v1, 8
-; W64-O0-NEXT: v_readlane_b32 s5, v1, 9
-; W64-O0-NEXT: v_readlane_b32 s8, v1, 4
-; W64-O0-NEXT: v_readlane_b32 s9, v1, 5
-; W64-O0-NEXT: v_readlane_b32 s10, v1, 6
-; W64-O0-NEXT: v_readlane_b32 s11, v1, 7
-; W64-O0-NEXT: v_readlane_b32 s6, v1, 1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: s_nop 2
+; W64-O0-NEXT: v_readlane_b32 s4, v13, 8
+; W64-O0-NEXT: v_readlane_b32 s5, v13, 9
+; W64-O0-NEXT: v_readlane_b32 s8, v13, 4
+; W64-O0-NEXT: v_readlane_b32 s9, v13, 5
+; W64-O0-NEXT: v_readlane_b32 s10, v13, 6
+; W64-O0-NEXT: v_readlane_b32 s11, v13, 7
+; W64-O0-NEXT: v_readlane_b32 s6, v13, 1
+; W64-O0-NEXT: s_nop 4
; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen
; W64-O0-NEXT: s_waitcnt vmcnt(0)
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5]
; W64-O0-NEXT: s_cbranch_execnz .LBB2_1
; W64-O0-NEXT: ; %bb.3:
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s6, v0, 2
-; W64-O0-NEXT: v_readlane_b32 s7, v0, 3
+; W64-O0-NEXT: v_readlane_b32 s6, v13, 2
+; W64-O0-NEXT: v_readlane_b32 s7, v13, 3
; W64-O0-NEXT: s_mov_b64 exec, s[6:7]
-; W64-O0-NEXT: v_readlane_b32 s4, v0, 1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; W64-O0-NEXT: v_readlane_b32 s4, v13, 1
; W64-O0-NEXT: s_mov_b32 s5, 0x3ff
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_and_b32_e64 v2, v2, s5
-; W64-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v2, s4
-; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; W64-O0-NEXT: v_and_b32_e64 v1, v1, s5
+; W64-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v1, s4
+; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 s[4:5], exec
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 10
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 11
+; W64-O0-NEXT: v_writelane_b32 v13, s4, 10
+; W64-O0-NEXT: v_writelane_b32 v13, s5, 11
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
; W64-O0-NEXT: s_cbranch_execz .LBB2_8
; W64-O0-NEXT: ; %bb.4: ; %bb1
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v0, 0
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_mov_b32_e32 v7, v5
-; W64-O0-NEXT: v_mov_b32_e32 v1, v4
-; W64-O0-NEXT: v_mov_b32_e32 v5, v3
-; W64-O0-NEXT: v_mov_b32_e32 v6, v2
+; W64-O0-NEXT: v_readlane_b32 s4, v13, 0
+; W64-O0-NEXT: v_mov_b32_e32 v6, v4
+; W64-O0-NEXT: v_mov_b32_e32 v0, v3
+; W64-O0-NEXT: v_mov_b32_e32 v4, v2
+; W64-O0-NEXT: v_mov_b32_e32 v5, v1
; W64-O0-NEXT: ; implicit-def: $sgpr5
; W64-O0-NEXT: ; implicit-def: $sgpr5
; W64-O0-NEXT: ; implicit-def: $sgpr5
; W64-O0-NEXT: ; implicit-def: $sgpr5
-; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
-; W64-O0-NEXT: v_mov_b32_e32 v2, v7
-; W64-O0-NEXT: v_mov_b32_e32 v3, v6
-; W64-O0-NEXT: v_mov_b32_e32 v4, v5
-; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
+; W64-O0-NEXT: v_mov_b32_e32 v1, v6
+; W64-O0-NEXT: v_mov_b32_e32 v2, v5
+; W64-O0-NEXT: v_mov_b32_e32 v3, v4
+; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
; W64-O0-NEXT: s_nop 0
-; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
-; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b32 s5, 0
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 12
-; W64-O0-NEXT: v_mov_b32_e32 v1, s4
-; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; W64-O0-NEXT: v_writelane_b32 v13, s5, 12
+; W64-O0-NEXT: v_mov_b32_e32 v0, s4
+; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 s[4:5], exec
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 13
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 14
+; W64-O0-NEXT: v_writelane_b32 v13, s4, 13
+; W64-O0-NEXT: v_writelane_b32 v13, s5, 14
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: .LBB2_5: ; =>This Inner Loop Header: Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; W64-O0-NEXT: s_waitcnt vmcnt(4)
+; W64-O0-NEXT: v_readfirstlane_b32 s8, v0
; W64-O0-NEXT: s_waitcnt vmcnt(3)
-; W64-O0-NEXT: v_readfirstlane_b32 s8, v1
-; W64-O0-NEXT: s_waitcnt vmcnt(2)
-; W64-O0-NEXT: v_readfirstlane_b32 s12, v2
+; W64-O0-NEXT: v_readfirstlane_b32 s12, v1
; W64-O0-NEXT: s_mov_b32 s4, s8
; W64-O0-NEXT: s_mov_b32 s5, s12
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1]
+; W64-O0-NEXT: s_waitcnt vmcnt(2)
+; W64-O0-NEXT: v_readfirstlane_b32 s7, v2
; W64-O0-NEXT: s_waitcnt vmcnt(1)
-; W64-O0-NEXT: v_readfirstlane_b32 s7, v3
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readfirstlane_b32 s6, v4
+; W64-O0-NEXT: v_readfirstlane_b32 s6, v3
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4]
+; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3]
; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11]
; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
; W64-O0-NEXT: s_mov_b32 s9, s12
; W64-O0-NEXT: s_mov_b32 s10, s7
; W64-O0-NEXT: s_mov_b32 s11, s6
-; W64-O0-NEXT: v_writelane_b32 v0, s8, 15
-; W64-O0-NEXT: v_writelane_b32 v0, s9, 16
-; W64-O0-NEXT: v_writelane_b32 v0, s10, 17
-; W64-O0-NEXT: v_writelane_b32 v0, s11, 18
+; W64-O0-NEXT: s_waitcnt vmcnt(0)
+; W64-O0-NEXT: v_writelane_b32 v13, s8, 15
+; W64-O0-NEXT: v_writelane_b32 v13, s9, 16
+; W64-O0-NEXT: v_writelane_b32 v13, s10, 17
+; W64-O0-NEXT: v_writelane_b32 v13, s11, 18
; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5]
-; W64-O0-NEXT: v_writelane_b32 v0, s4, 19
-; W64-O0-NEXT: v_writelane_b32 v0, s5, 20
+; W64-O0-NEXT: v_writelane_b32 v13, s4, 19
+; W64-O0-NEXT: v_writelane_b32 v13, s5, 20
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: ; %bb.6: ; in Loop: Header=BB2_5 Depth=1
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v1, 19
-; W64-O0-NEXT: v_readlane_b32 s5, v1, 20
-; W64-O0-NEXT: v_readlane_b32 s8, v1, 15
-; W64-O0-NEXT: v_readlane_b32 s9, v1, 16
-; W64-O0-NEXT: v_readlane_b32 s10, v1, 17
-; W64-O0-NEXT: v_readlane_b32 s11, v1, 18
-; W64-O0-NEXT: v_readlane_b32 s6, v1, 12
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: s_nop 2
+; W64-O0-NEXT: v_readlane_b32 s4, v13, 19
+; W64-O0-NEXT: v_readlane_b32 s5, v13, 20
+; W64-O0-NEXT: v_readlane_b32 s8, v13, 15
+; W64-O0-NEXT: v_readlane_b32 s9, v13, 16
+; W64-O0-NEXT: v_readlane_b32 s10, v13, 17
+; W64-O0-NEXT: v_readlane_b32 s11, v13, 18
+; W64-O0-NEXT: v_readlane_b32 s6, v13, 12
+; W64-O0-NEXT: s_nop 4
; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen
; W64-O0-NEXT: s_waitcnt vmcnt(0)
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5]
; W64-O0-NEXT: s_cbranch_execnz .LBB2_5
; W64-O0-NEXT: ; %bb.7:
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v1, 13
-; W64-O0-NEXT: v_readlane_b32 s5, v1, 14
+; W64-O0-NEXT: v_readlane_b32 s4, v13, 13
+; W64-O0-NEXT: v_readlane_b32 s5, v13, 14
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
; W64-O0-NEXT: .LBB2_8: ; %bb2
+; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; W64-O0-NEXT: s_nop 0
+; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[16:17]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: v_readlane_b32 s4, v0, 10
-; W64-O0-NEXT: v_readlane_b32 s5, v0, 11
+; W64-O0-NEXT: v_readlane_b32 s4, v13, 10
+; W64-O0-NEXT: v_readlane_b32 s5, v13, 11
; W64-O0-NEXT: s_or_b64 exec, exec, s[4:5]
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: global_store_dword v[1:2], v3, off
+; W64-O0-NEXT: global_store_dword v[0:1], v2, off
; W64-O0-NEXT: s_waitcnt vmcnt(0)
-; W64-O0-NEXT: ; kill: killed $vgpr0
; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
-; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
+; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
; W64-O0-NEXT: s_mov_b64 exec, s[4:5]
; W64-O0-NEXT: s_waitcnt vmcnt(0)
; W64-O0-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
index 72aafcaca3ff81..37d0309caac0ad 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
@@ -11,21 +11,17 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX908-NEXT: liveins: $sgpr4_sgpr5
; REGALLOC-GFX908-NEXT: {{ $}}
; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %5:agpr_32
- ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:VReg_128 */, def %26
- ; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:av_128 = COPY %26
- ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def %23
- ; REGALLOC-GFX908-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
- ; REGALLOC-GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[COPY]]
- ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1)
+ ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:VReg_128 */, def %6
+ ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def %7
+ ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64, %6, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1)
; REGALLOC-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
- ; REGALLOC-GFX908-NEXT: [[COPY2:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
+ ; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
; REGALLOC-GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; REGALLOC-GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
- ; REGALLOC-GFX908-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec
- ; REGALLOC-GFX908-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
- ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX2 undef %16:vreg_64, [[SI_SPILL_V64_RESTORE]], 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
- ; REGALLOC-GFX908-NEXT: [[COPY3:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]]
- ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %18:vreg_64, [[COPY3]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1)
+ ; REGALLOC-GFX908-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+ ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX2 undef %16:vreg_64, %7, 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
+ ; REGALLOC-GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]]
+ ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %18:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1)
; REGALLOC-GFX908-NEXT: S_ENDPGM 0
;
; PEI-GFX908-LABEL: name: partial_copy
@@ -60,18 +56,15 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX90A-NEXT: liveins: $sgpr4_sgpr5
; REGALLOC-GFX90A-NEXT: {{ $}}
; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %5:agpr_32
- ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6619146 /* regdef:VReg_128_Align2 */, def %25
- ; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:av_128_align2 = COPY %25
- ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def %23
- ; REGALLOC-GFX90A-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
- ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64_align2, [[COPY]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1)
+ ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6619146 /* regdef:VReg_128_Align2 */, def %6
+ ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def %7
+ ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64_align2, %6, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1)
; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
- ; REGALLOC-GFX90A-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
+ ; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
; REGALLOC-GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; REGALLOC-GFX90A-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
- ; REGALLOC-GFX90A-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
- ; REGALLOC-GFX90A-NEXT: [[SI_SPILL_AV64_RESTORE:%[0-9]+]]:av_64_align2 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
- ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX2 undef %16:vreg_64_align2, [[SI_SPILL_AV64_RESTORE]], 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
+ ; REGALLOC-GFX90A-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+ ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX2 undef %16:vreg_64_align2, %7, 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %18:vreg_64_align2, [[V_MFMA_I32_4X4X4I8_e64_]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1)
; REGALLOC-GFX90A-NEXT: S_ENDPGM 0
;
diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
index 5b0354e63c2365..078b133a93d6f3 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll
@@ -17,13 +17,11 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out,
; GCN-NEXT: s_mov_b32 s95, 0xe8f000
; GCN-NEXT: s_add_u32 s92, s92, s9
; GCN-NEXT: s_addc_u32 s93, s93, 0
-; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
-; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; GCN-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
; GCN-NEXT: s_load_dword s0, s[2:3], 0xb
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:11]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
; GCN-NEXT: v_writelane_b32 v2, s4, 0
; GCN-NEXT: v_writelane_b32 v2, s5, 1
; GCN-NEXT: v_writelane_b32 v2, s6, 2
@@ -115,107 +113,109 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out,
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:11]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 0
-; GCN-NEXT: v_writelane_b32 v1, s5, 1
-; GCN-NEXT: v_writelane_b32 v1, s6, 2
-; GCN-NEXT: v_writelane_b32 v1, s7, 3
-; GCN-NEXT: v_writelane_b32 v1, s8, 4
-; GCN-NEXT: v_writelane_b32 v1, s9, 5
-; GCN-NEXT: v_writelane_b32 v1, s10, 6
-; GCN-NEXT: v_writelane_b32 v1, s11, 7
+; GCN-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
+; GCN-NEXT: v_writelane_b32 v2, s4, 0
+; GCN-NEXT: v_writelane_b32 v2, s5, 1
+; GCN-NEXT: v_writelane_b32 v2, s6, 2
+; GCN-NEXT: v_writelane_b32 v2, s7, 3
+; GCN-NEXT: v_writelane_b32 v2, s8, 4
+; GCN-NEXT: v_writelane_b32 v2, s9, 5
+; GCN-NEXT: v_writelane_b32 v2, s10, 6
+; GCN-NEXT: v_writelane_b32 v2, s11, 7
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:11]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 8
-; GCN-NEXT: v_writelane_b32 v1, s5, 9
-; GCN-NEXT: v_writelane_b32 v1, s6, 10
-; GCN-NEXT: v_writelane_b32 v1, s7, 11
-; GCN-NEXT: v_writelane_b32 v1, s8, 12
-; GCN-NEXT: v_writelane_b32 v1, s9, 13
-; GCN-NEXT: v_writelane_b32 v1, s10, 14
-; GCN-NEXT: v_writelane_b32 v1, s11, 15
+; GCN-NEXT: v_writelane_b32 v2, s4, 8
+; GCN-NEXT: v_writelane_b32 v2, s5, 9
+; GCN-NEXT: v_writelane_b32 v2, s6, 10
+; GCN-NEXT: v_writelane_b32 v2, s7, 11
+; GCN-NEXT: v_writelane_b32 v2, s8, 12
+; GCN-NEXT: v_writelane_b32 v2, s9, 13
+; GCN-NEXT: v_writelane_b32 v2, s10, 14
+; GCN-NEXT: v_writelane_b32 v2, s11, 15
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:11]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 16
-; GCN-NEXT: v_writelane_b32 v1, s5, 17
-; GCN-NEXT: v_writelane_b32 v1, s6, 18
-; GCN-NEXT: v_writelane_b32 v1, s7, 19
-; GCN-NEXT: v_writelane_b32 v1, s8, 20
-; GCN-NEXT: v_writelane_b32 v1, s9, 21
-; GCN-NEXT: v_writelane_b32 v1, s10, 22
-; GCN-NEXT: v_writelane_b32 v1, s11, 23
+; GCN-NEXT: v_writelane_b32 v2, s4, 16
+; GCN-NEXT: v_writelane_b32 v2, s5, 17
+; GCN-NEXT: v_writelane_b32 v2, s6, 18
+; GCN-NEXT: v_writelane_b32 v2, s7, 19
+; GCN-NEXT: v_writelane_b32 v2, s8, 20
+; GCN-NEXT: v_writelane_b32 v2, s9, 21
+; GCN-NEXT: v_writelane_b32 v2, s10, 22
+; GCN-NEXT: v_writelane_b32 v2, s11, 23
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:11]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 24
-; GCN-NEXT: v_writelane_b32 v1, s5, 25
-; GCN-NEXT: v_writelane_b32 v1, s6, 26
-; GCN-NEXT: v_writelane_b32 v1, s7, 27
-; GCN-NEXT: v_writelane_b32 v1, s8, 28
-; GCN-NEXT: v_writelane_b32 v1, s9, 29
-; GCN-NEXT: v_writelane_b32 v1, s10, 30
-; GCN-NEXT: v_writelane_b32 v1, s11, 31
+; GCN-NEXT: v_writelane_b32 v2, s4, 24
+; GCN-NEXT: v_writelane_b32 v2, s5, 25
+; GCN-NEXT: v_writelane_b32 v2, s6, 26
+; GCN-NEXT: v_writelane_b32 v2, s7, 27
+; GCN-NEXT: v_writelane_b32 v2, s8, 28
+; GCN-NEXT: v_writelane_b32 v2, s9, 29
+; GCN-NEXT: v_writelane_b32 v2, s10, 30
+; GCN-NEXT: v_writelane_b32 v2, s11, 31
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:11]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 32
-; GCN-NEXT: v_writelane_b32 v1, s5, 33
-; GCN-NEXT: v_writelane_b32 v1, s6, 34
-; GCN-NEXT: v_writelane_b32 v1, s7, 35
-; GCN-NEXT: v_writelane_b32 v1, s8, 36
-; GCN-NEXT: v_writelane_b32 v1, s9, 37
-; GCN-NEXT: v_writelane_b32 v1, s10, 38
-; GCN-NEXT: v_writelane_b32 v1, s11, 39
+; GCN-NEXT: v_writelane_b32 v2, s4, 32
+; GCN-NEXT: v_writelane_b32 v2, s5, 33
+; GCN-NEXT: v_writelane_b32 v2, s6, 34
+; GCN-NEXT: v_writelane_b32 v2, s7, 35
+; GCN-NEXT: v_writelane_b32 v2, s8, 36
+; GCN-NEXT: v_writelane_b32 v2, s9, 37
+; GCN-NEXT: v_writelane_b32 v2, s10, 38
+; GCN-NEXT: v_writelane_b32 v2, s11, 39
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:11]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 40
-; GCN-NEXT: v_writelane_b32 v1, s5, 41
-; GCN-NEXT: v_writelane_b32 v1, s6, 42
-; GCN-NEXT: v_writelane_b32 v1, s7, 43
-; GCN-NEXT: v_writelane_b32 v1, s8, 44
-; GCN-NEXT: v_writelane_b32 v1, s9, 45
-; GCN-NEXT: v_writelane_b32 v1, s10, 46
-; GCN-NEXT: v_writelane_b32 v1, s11, 47
+; GCN-NEXT: v_writelane_b32 v2, s4, 40
+; GCN-NEXT: v_writelane_b32 v2, s5, 41
+; GCN-NEXT: v_writelane_b32 v2, s6, 42
+; GCN-NEXT: v_writelane_b32 v2, s7, 43
+; GCN-NEXT: v_writelane_b32 v2, s8, 44
+; GCN-NEXT: v_writelane_b32 v2, s9, 45
+; GCN-NEXT: v_writelane_b32 v2, s10, 46
+; GCN-NEXT: v_writelane_b32 v2, s11, 47
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:11]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 48
-; GCN-NEXT: v_writelane_b32 v1, s5, 49
-; GCN-NEXT: v_writelane_b32 v1, s6, 50
-; GCN-NEXT: v_writelane_b32 v1, s7, 51
-; GCN-NEXT: v_writelane_b32 v1, s8, 52
-; GCN-NEXT: v_writelane_b32 v1, s9, 53
-; GCN-NEXT: v_writelane_b32 v1, s10, 54
-; GCN-NEXT: v_writelane_b32 v1, s11, 55
+; GCN-NEXT: v_writelane_b32 v2, s4, 48
+; GCN-NEXT: v_writelane_b32 v2, s5, 49
+; GCN-NEXT: v_writelane_b32 v2, s6, 50
+; GCN-NEXT: v_writelane_b32 v2, s7, 51
+; GCN-NEXT: v_writelane_b32 v2, s8, 52
+; GCN-NEXT: v_writelane_b32 v2, s9, 53
+; GCN-NEXT: v_writelane_b32 v2, s10, 54
+; GCN-NEXT: v_writelane_b32 v2, s11, 55
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:11]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 56
-; GCN-NEXT: v_writelane_b32 v1, s5, 57
-; GCN-NEXT: v_writelane_b32 v1, s6, 58
-; GCN-NEXT: v_writelane_b32 v1, s7, 59
-; GCN-NEXT: v_writelane_b32 v1, s8, 60
-; GCN-NEXT: v_writelane_b32 v1, s9, 61
-; GCN-NEXT: v_writelane_b32 v1, s10, 62
-; GCN-NEXT: v_writelane_b32 v1, s11, 63
+; GCN-NEXT: v_writelane_b32 v2, s4, 56
+; GCN-NEXT: v_writelane_b32 v2, s5, 57
+; GCN-NEXT: v_writelane_b32 v2, s6, 58
+; GCN-NEXT: v_writelane_b32 v2, s7, 59
+; GCN-NEXT: v_writelane_b32 v2, s8, 60
+; GCN-NEXT: v_writelane_b32 v2, s9, 61
+; GCN-NEXT: v_writelane_b32 v2, s10, 62
+; GCN-NEXT: v_writelane_b32 v2, s11, 63
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_store_dword v1, off, s[92:95], 0 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v2, off, s[92:95], 0 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:11]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s4, 0
-; GCN-NEXT: v_writelane_b32 v0, s5, 1
-; GCN-NEXT: v_writelane_b32 v0, s6, 2
-; GCN-NEXT: v_writelane_b32 v0, s7, 3
-; GCN-NEXT: v_writelane_b32 v0, s8, 4
-; GCN-NEXT: v_writelane_b32 v0, s9, 5
-; GCN-NEXT: v_writelane_b32 v0, s10, 6
-; GCN-NEXT: v_writelane_b32 v0, s11, 7
+; GCN-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
+; GCN-NEXT: v_writelane_b32 v2, s4, 0
+; GCN-NEXT: v_writelane_b32 v2, s5, 1
+; GCN-NEXT: v_writelane_b32 v2, s6, 2
+; GCN-NEXT: v_writelane_b32 v2, s7, 3
+; GCN-NEXT: v_writelane_b32 v2, s8, 4
+; GCN-NEXT: v_writelane_b32 v2, s9, 5
+; GCN-NEXT: v_writelane_b32 v2, s10, 6
+; GCN-NEXT: v_writelane_b32 v2, s11, 7
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_store_dword v0, off, s[92:95], 0 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v2, off, s[92:95], 0 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: s_mov_b32 s1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -223,76 +223,76 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out,
; GCN-NEXT: s_cbranch_scc1 .LBB0_2
; GCN-NEXT: ; %bb.1: ; %bb0
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_load_dword v2, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v0, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
; GCN-NEXT: buffer_load_dword v1, off, s[92:95], 0 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: s_waitcnt vmcnt(1)
-; GCN-NEXT: v_readlane_b32 s8, v2, 56
-; GCN-NEXT: v_readlane_b32 s9, v2, 57
-; GCN-NEXT: v_readlane_b32 s10, v2, 58
-; GCN-NEXT: v_readlane_b32 s11, v2, 59
-; GCN-NEXT: v_readlane_b32 s12, v2, 60
-; GCN-NEXT: v_readlane_b32 s13, v2, 61
-; GCN-NEXT: v_readlane_b32 s14, v2, 62
-; GCN-NEXT: v_readlane_b32 s15, v2, 63
-; GCN-NEXT: v_readlane_b32 s16, v2, 48
-; GCN-NEXT: v_readlane_b32 s17, v2, 49
-; GCN-NEXT: v_readlane_b32 s18, v2, 50
-; GCN-NEXT: v_readlane_b32 s19, v2, 51
-; GCN-NEXT: v_readlane_b32 s20, v2, 52
-; GCN-NEXT: v_readlane_b32 s21, v2, 53
-; GCN-NEXT: v_readlane_b32 s22, v2, 54
-; GCN-NEXT: v_readlane_b32 s23, v2, 55
-; GCN-NEXT: v_readlane_b32 s24, v2, 40
-; GCN-NEXT: v_readlane_b32 s25, v2, 41
-; GCN-NEXT: v_readlane_b32 s26, v2, 42
-; GCN-NEXT: v_readlane_b32 s27, v2, 43
-; GCN-NEXT: v_readlane_b32 s28, v2, 44
-; GCN-NEXT: v_readlane_b32 s29, v2, 45
-; GCN-NEXT: v_readlane_b32 s30, v2, 46
-; GCN-NEXT: v_readlane_b32 s31, v2, 47
-; GCN-NEXT: v_readlane_b32 s36, v2, 32
-; GCN-NEXT: v_readlane_b32 s37, v2, 33
-; GCN-NEXT: v_readlane_b32 s38, v2, 34
-; GCN-NEXT: v_readlane_b32 s39, v2, 35
-; GCN-NEXT: v_readlane_b32 s40, v2, 36
-; GCN-NEXT: v_readlane_b32 s41, v2, 37
-; GCN-NEXT: v_readlane_b32 s42, v2, 38
-; GCN-NEXT: v_readlane_b32 s43, v2, 39
-; GCN-NEXT: v_readlane_b32 s44, v2, 24
-; GCN-NEXT: v_readlane_b32 s45, v2, 25
-; GCN-NEXT: v_readlane_b32 s46, v2, 26
-; GCN-NEXT: v_readlane_b32 s47, v2, 27
-; GCN-NEXT: v_readlane_b32 s48, v2, 28
-; GCN-NEXT: v_readlane_b32 s49, v2, 29
-; GCN-NEXT: v_readlane_b32 s50, v2, 30
-; GCN-NEXT: v_readlane_b32 s51, v2, 31
-; GCN-NEXT: v_readlane_b32 s52, v2, 16
-; GCN-NEXT: v_readlane_b32 s53, v2, 17
-; GCN-NEXT: v_readlane_b32 s54, v2, 18
-; GCN-NEXT: v_readlane_b32 s55, v2, 19
-; GCN-NEXT: v_readlane_b32 s56, v2, 20
-; GCN-NEXT: v_readlane_b32 s57, v2, 21
-; GCN-NEXT: v_readlane_b32 s58, v2, 22
-; GCN-NEXT: v_readlane_b32 s59, v2, 23
-; GCN-NEXT: v_readlane_b32 s60, v2, 8
-; GCN-NEXT: v_readlane_b32 s61, v2, 9
-; GCN-NEXT: v_readlane_b32 s62, v2, 10
-; GCN-NEXT: v_readlane_b32 s63, v2, 11
-; GCN-NEXT: v_readlane_b32 s64, v2, 12
-; GCN-NEXT: v_readlane_b32 s65, v2, 13
-; GCN-NEXT: v_readlane_b32 s66, v2, 14
-; GCN-NEXT: v_readlane_b32 s67, v2, 15
-; GCN-NEXT: v_readlane_b32 s68, v2, 0
-; GCN-NEXT: v_readlane_b32 s69, v2, 1
-; GCN-NEXT: v_readlane_b32 s70, v2, 2
-; GCN-NEXT: v_readlane_b32 s71, v2, 3
-; GCN-NEXT: v_readlane_b32 s72, v2, 4
-; GCN-NEXT: v_readlane_b32 s73, v2, 5
-; GCN-NEXT: v_readlane_b32 s74, v2, 6
-; GCN-NEXT: v_readlane_b32 s75, v2, 7
+; GCN-NEXT: v_readlane_b32 s8, v0, 56
+; GCN-NEXT: v_readlane_b32 s9, v0, 57
+; GCN-NEXT: v_readlane_b32 s10, v0, 58
+; GCN-NEXT: v_readlane_b32 s11, v0, 59
+; GCN-NEXT: v_readlane_b32 s12, v0, 60
+; GCN-NEXT: v_readlane_b32 s13, v0, 61
+; GCN-NEXT: v_readlane_b32 s14, v0, 62
+; GCN-NEXT: v_readlane_b32 s15, v0, 63
+; GCN-NEXT: v_readlane_b32 s16, v0, 48
+; GCN-NEXT: v_readlane_b32 s17, v0, 49
+; GCN-NEXT: v_readlane_b32 s18, v0, 50
+; GCN-NEXT: v_readlane_b32 s19, v0, 51
+; GCN-NEXT: v_readlane_b32 s20, v0, 52
+; GCN-NEXT: v_readlane_b32 s21, v0, 53
+; GCN-NEXT: v_readlane_b32 s22, v0, 54
+; GCN-NEXT: v_readlane_b32 s23, v0, 55
+; GCN-NEXT: v_readlane_b32 s24, v0, 40
+; GCN-NEXT: v_readlane_b32 s25, v0, 41
+; GCN-NEXT: v_readlane_b32 s26, v0, 42
+; GCN-NEXT: v_readlane_b32 s27, v0, 43
+; GCN-NEXT: v_readlane_b32 s28, v0, 44
+; GCN-NEXT: v_readlane_b32 s29, v0, 45
+; GCN-NEXT: v_readlane_b32 s30, v0, 46
+; GCN-NEXT: v_readlane_b32 s31, v0, 47
+; GCN-NEXT: v_readlane_b32 s36, v0, 32
+; GCN-NEXT: v_readlane_b32 s37, v0, 33
+; GCN-NEXT: v_readlane_b32 s38, v0, 34
+; GCN-NEXT: v_readlane_b32 s39, v0, 35
+; GCN-NEXT: v_readlane_b32 s40, v0, 36
+; GCN-NEXT: v_readlane_b32 s41, v0, 37
+; GCN-NEXT: v_readlane_b32 s42, v0, 38
+; GCN-NEXT: v_readlane_b32 s43, v0, 39
+; GCN-NEXT: v_readlane_b32 s44, v0, 24
+; GCN-NEXT: v_readlane_b32 s45, v0, 25
+; GCN-NEXT: v_readlane_b32 s46, v0, 26
+; GCN-NEXT: v_readlane_b32 s47, v0, 27
+; GCN-NEXT: v_readlane_b32 s48, v0, 28
+; GCN-NEXT: v_readlane_b32 s49, v0, 29
+; GCN-NEXT: v_readlane_b32 s50, v0, 30
+; GCN-NEXT: v_readlane_b32 s51, v0, 31
+; GCN-NEXT: v_readlane_b32 s52, v0, 16
+; GCN-NEXT: v_readlane_b32 s53, v0, 17
+; GCN-NEXT: v_readlane_b32 s54, v0, 18
+; GCN-NEXT: v_readlane_b32 s55, v0, 19
+; GCN-NEXT: v_readlane_b32 s56, v0, 20
+; GCN-NEXT: v_readlane_b32 s57, v0, 21
+; GCN-NEXT: v_readlane_b32 s58, v0, 22
+; GCN-NEXT: v_readlane_b32 s59, v0, 23
+; GCN-NEXT: v_readlane_b32 s60, v0, 8
+; GCN-NEXT: v_readlane_b32 s61, v0, 9
+; GCN-NEXT: v_readlane_b32 s62, v0, 10
+; GCN-NEXT: v_readlane_b32 s63, v0, 11
+; GCN-NEXT: v_readlane_b32 s64, v0, 12
+; GCN-NEXT: v_readlane_b32 s65, v0, 13
+; GCN-NEXT: v_readlane_b32 s66, v0, 14
+; GCN-NEXT: v_readlane_b32 s67, v0, 15
+; GCN-NEXT: v_readlane_b32 s68, v0, 0
+; GCN-NEXT: v_readlane_b32 s69, v0, 1
+; GCN-NEXT: v_readlane_b32 s70, v0, 2
+; GCN-NEXT: v_readlane_b32 s71, v0, 3
+; GCN-NEXT: v_readlane_b32 s72, v0, 4
+; GCN-NEXT: v_readlane_b32 s73, v0, 5
+; GCN-NEXT: v_readlane_b32 s74, v0, 6
+; GCN-NEXT: v_readlane_b32 s75, v0, 7
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_readlane_b32 s76, v1, 56
; GCN-NEXT: v_readlane_b32 s77, v1, 57
@@ -319,7 +319,7 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out,
; GCN-NEXT: v_readlane_b32 s6, v1, 6
; GCN-NEXT: v_readlane_b32 s7, v1, 7
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_load_dword v0, off, s[92:95], 0 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v2, off, s[92:95], 0 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:7]
@@ -380,14 +380,14 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out,
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_readlane_b32 s0, v0, 0
-; GCN-NEXT: v_readlane_b32 s1, v0, 1
-; GCN-NEXT: v_readlane_b32 s2, v0, 2
-; GCN-NEXT: v_readlane_b32 s3, v0, 3
-; GCN-NEXT: v_readlane_b32 s4, v0, 4
-; GCN-NEXT: v_readlane_b32 s5, v0, 5
-; GCN-NEXT: v_readlane_b32 s6, v0, 6
-; GCN-NEXT: v_readlane_b32 s7, v0, 7
+; GCN-NEXT: v_readlane_b32 s0, v2, 0
+; GCN-NEXT: v_readlane_b32 s1, v2, 1
+; GCN-NEXT: v_readlane_b32 s2, v2, 2
+; GCN-NEXT: v_readlane_b32 s3, v2, 3
+; GCN-NEXT: v_readlane_b32 s4, v2, 4
+; GCN-NEXT: v_readlane_b32 s5, v2, 5
+; GCN-NEXT: v_readlane_b32 s6, v2, 6
+; GCN-NEXT: v_readlane_b32 s7, v2, 7
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[84:91]
; GCN-NEXT: ;;#ASMEND
@@ -422,18 +422,6 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out,
; GCN-NEXT: ; use s[0:7]
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: .LBB0_2: ; %ret
-; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_load_dword v0, off, s[92:95], 0 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[34:35]
-; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_load_dword v1, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[34:35]
-; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_load_dword v2, off, s[92:95], 0 offset:8 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[34:35]
-; GCN-NEXT: ; kill: killed $vgpr2
-; GCN-NEXT: ; kill: killed $vgpr1
-; GCN-NEXT: ; kill: killed $vgpr0
; GCN-NEXT: s_endpgm
%wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
%wide.sgpr1 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
@@ -490,12 +478,11 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %
; GCN-NEXT: s_mov_b32 s55, 0xe8f000
; GCN-NEXT: s_add_u32 s52, s52, s9
; GCN-NEXT: s_addc_u32 s53, s53, 0
-; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
-; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; GCN-NEXT: s_load_dword s0, s[2:3], 0xb
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:19]
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; GCN-NEXT: v_writelane_b32 v1, s4, 0
; GCN-NEXT: v_writelane_b32 v1, s5, 1
; GCN-NEXT: v_writelane_b32 v1, s6, 2
@@ -575,21 +562,22 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:11]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s4, 0
-; GCN-NEXT: v_writelane_b32 v0, s5, 1
-; GCN-NEXT: v_writelane_b32 v0, s6, 2
-; GCN-NEXT: v_writelane_b32 v0, s7, 3
-; GCN-NEXT: v_writelane_b32 v0, s8, 4
-; GCN-NEXT: v_writelane_b32 v0, s9, 5
-; GCN-NEXT: v_writelane_b32 v0, s10, 6
-; GCN-NEXT: v_writelane_b32 v0, s11, 7
+; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
+; GCN-NEXT: v_writelane_b32 v1, s4, 0
+; GCN-NEXT: v_writelane_b32 v1, s5, 1
+; GCN-NEXT: v_writelane_b32 v1, s6, 2
+; GCN-NEXT: v_writelane_b32 v1, s7, 3
+; GCN-NEXT: v_writelane_b32 v1, s8, 4
+; GCN-NEXT: v_writelane_b32 v1, s9, 5
+; GCN-NEXT: v_writelane_b32 v1, s10, 6
+; GCN-NEXT: v_writelane_b32 v1, s11, 7
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[2:3]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s2, 8
-; GCN-NEXT: v_writelane_b32 v0, s3, 9
+; GCN-NEXT: v_writelane_b32 v1, s2, 8
+; GCN-NEXT: v_writelane_b32 v1, s3, 9
; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
-; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[28:29]
; GCN-NEXT: s_mov_b32 s1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -597,93 +585,93 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %
; GCN-NEXT: s_cbranch_scc1 .LBB1_2
; GCN-NEXT: ; %bb.1: ; %bb0
; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
-; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[28:29]
; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
-; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[28:29]
; GCN-NEXT: s_waitcnt vmcnt(1)
-; GCN-NEXT: v_readlane_b32 s16, v1, 8
-; GCN-NEXT: v_readlane_b32 s17, v1, 9
-; GCN-NEXT: v_readlane_b32 s20, v1, 0
-; GCN-NEXT: v_readlane_b32 s21, v1, 1
-; GCN-NEXT: v_readlane_b32 s22, v1, 2
-; GCN-NEXT: v_readlane_b32 s23, v1, 3
-; GCN-NEXT: v_readlane_b32 s24, v1, 4
-; GCN-NEXT: v_readlane_b32 s25, v1, 5
-; GCN-NEXT: v_readlane_b32 s26, v1, 6
-; GCN-NEXT: v_readlane_b32 s27, v1, 7
+; GCN-NEXT: v_readlane_b32 s16, v0, 8
+; GCN-NEXT: v_readlane_b32 s17, v0, 9
+; GCN-NEXT: v_readlane_b32 s20, v0, 0
+; GCN-NEXT: v_readlane_b32 s21, v0, 1
+; GCN-NEXT: v_readlane_b32 s22, v0, 2
+; GCN-NEXT: v_readlane_b32 s23, v0, 3
+; GCN-NEXT: v_readlane_b32 s24, v0, 4
+; GCN-NEXT: v_readlane_b32 s25, v0, 5
+; GCN-NEXT: v_readlane_b32 s26, v0, 6
+; GCN-NEXT: v_readlane_b32 s27, v0, 7
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_readlane_b32 s36, v0, 32
-; GCN-NEXT: v_readlane_b32 s37, v0, 33
-; GCN-NEXT: v_readlane_b32 s38, v0, 34
-; GCN-NEXT: v_readlane_b32 s39, v0, 35
-; GCN-NEXT: v_readlane_b32 s40, v0, 36
-; GCN-NEXT: v_readlane_b32 s41, v0, 37
-; GCN-NEXT: v_readlane_b32 s42, v0, 38
-; GCN-NEXT: v_readlane_b32 s43, v0, 39
-; GCN-NEXT: v_readlane_b32 s44, v0, 40
-; GCN-NEXT: v_readlane_b32 s45, v0, 41
-; GCN-NEXT: v_readlane_b32 s46, v0, 42
-; GCN-NEXT: v_readlane_b32 s47, v0, 43
-; GCN-NEXT: v_readlane_b32 s48, v0, 44
-; GCN-NEXT: v_readlane_b32 s49, v0, 45
-; GCN-NEXT: v_readlane_b32 s50, v0, 46
-; GCN-NEXT: v_readlane_b32 s51, v0, 47
-; GCN-NEXT: v_readlane_b32 s0, v0, 0
-; GCN-NEXT: v_readlane_b32 s1, v0, 1
-; GCN-NEXT: v_readlane_b32 s2, v0, 2
-; GCN-NEXT: v_readlane_b32 s3, v0, 3
-; GCN-NEXT: v_readlane_b32 s4, v0, 4
-; GCN-NEXT: v_readlane_b32 s5, v0, 5
-; GCN-NEXT: v_readlane_b32 s6, v0, 6
-; GCN-NEXT: v_readlane_b32 s7, v0, 7
-; GCN-NEXT: v_readlane_b32 s8, v0, 8
-; GCN-NEXT: v_readlane_b32 s9, v0, 9
-; GCN-NEXT: v_readlane_b32 s10, v0, 10
-; GCN-NEXT: v_readlane_b32 s11, v0, 11
-; GCN-NEXT: v_readlane_b32 s12, v0, 12
-; GCN-NEXT: v_readlane_b32 s13, v0, 13
-; GCN-NEXT: v_readlane_b32 s14, v0, 14
-; GCN-NEXT: v_readlane_b32 s15, v0, 15
+; GCN-NEXT: v_readlane_b32 s36, v1, 32
+; GCN-NEXT: v_readlane_b32 s37, v1, 33
+; GCN-NEXT: v_readlane_b32 s38, v1, 34
+; GCN-NEXT: v_readlane_b32 s39, v1, 35
+; GCN-NEXT: v_readlane_b32 s40, v1, 36
+; GCN-NEXT: v_readlane_b32 s41, v1, 37
+; GCN-NEXT: v_readlane_b32 s42, v1, 38
+; GCN-NEXT: v_readlane_b32 s43, v1, 39
+; GCN-NEXT: v_readlane_b32 s44, v1, 40
+; GCN-NEXT: v_readlane_b32 s45, v1, 41
+; GCN-NEXT: v_readlane_b32 s46, v1, 42
+; GCN-NEXT: v_readlane_b32 s47, v1, 43
+; GCN-NEXT: v_readlane_b32 s48, v1, 44
+; GCN-NEXT: v_readlane_b32 s49, v1, 45
+; GCN-NEXT: v_readlane_b32 s50, v1, 46
+; GCN-NEXT: v_readlane_b32 s51, v1, 47
+; GCN-NEXT: v_readlane_b32 s0, v1, 0
+; GCN-NEXT: v_readlane_b32 s1, v1, 1
+; GCN-NEXT: v_readlane_b32 s2, v1, 2
+; GCN-NEXT: v_readlane_b32 s3, v1, 3
+; GCN-NEXT: v_readlane_b32 s4, v1, 4
+; GCN-NEXT: v_readlane_b32 s5, v1, 5
+; GCN-NEXT: v_readlane_b32 s6, v1, 6
+; GCN-NEXT: v_readlane_b32 s7, v1, 7
+; GCN-NEXT: v_readlane_b32 s8, v1, 8
+; GCN-NEXT: v_readlane_b32 s9, v1, 9
+; GCN-NEXT: v_readlane_b32 s10, v1, 10
+; GCN-NEXT: v_readlane_b32 s11, v1, 11
+; GCN-NEXT: v_readlane_b32 s12, v1, 12
+; GCN-NEXT: v_readlane_b32 s13, v1, 13
+; GCN-NEXT: v_readlane_b32 s14, v1, 14
+; GCN-NEXT: v_readlane_b32 s15, v1, 15
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 16
-; GCN-NEXT: v_readlane_b32 s1, v0, 17
-; GCN-NEXT: v_readlane_b32 s2, v0, 18
-; GCN-NEXT: v_readlane_b32 s3, v0, 19
-; GCN-NEXT: v_readlane_b32 s4, v0, 20
-; GCN-NEXT: v_readlane_b32 s5, v0, 21
-; GCN-NEXT: v_readlane_b32 s6, v0, 22
-; GCN-NEXT: v_readlane_b32 s7, v0, 23
-; GCN-NEXT: v_readlane_b32 s8, v0, 24
-; GCN-NEXT: v_readlane_b32 s9, v0, 25
-; GCN-NEXT: v_readlane_b32 s10, v0, 26
-; GCN-NEXT: v_readlane_b32 s11, v0, 27
-; GCN-NEXT: v_readlane_b32 s12, v0, 28
-; GCN-NEXT: v_readlane_b32 s13, v0, 29
-; GCN-NEXT: v_readlane_b32 s14, v0, 30
-; GCN-NEXT: v_readlane_b32 s15, v0, 31
+; GCN-NEXT: v_readlane_b32 s0, v1, 16
+; GCN-NEXT: v_readlane_b32 s1, v1, 17
+; GCN-NEXT: v_readlane_b32 s2, v1, 18
+; GCN-NEXT: v_readlane_b32 s3, v1, 19
+; GCN-NEXT: v_readlane_b32 s4, v1, 20
+; GCN-NEXT: v_readlane_b32 s5, v1, 21
+; GCN-NEXT: v_readlane_b32 s6, v1, 22
+; GCN-NEXT: v_readlane_b32 s7, v1, 23
+; GCN-NEXT: v_readlane_b32 s8, v1, 24
+; GCN-NEXT: v_readlane_b32 s9, v1, 25
+; GCN-NEXT: v_readlane_b32 s10, v1, 26
+; GCN-NEXT: v_readlane_b32 s11, v1, 27
+; GCN-NEXT: v_readlane_b32 s12, v1, 28
+; GCN-NEXT: v_readlane_b32 s13, v1, 29
+; GCN-NEXT: v_readlane_b32 s14, v1, 30
+; GCN-NEXT: v_readlane_b32 s15, v1, 31
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s0, v0, 48
-; GCN-NEXT: v_readlane_b32 s1, v0, 49
-; GCN-NEXT: v_readlane_b32 s2, v0, 50
-; GCN-NEXT: v_readlane_b32 s3, v0, 51
-; GCN-NEXT: v_readlane_b32 s4, v0, 52
-; GCN-NEXT: v_readlane_b32 s5, v0, 53
-; GCN-NEXT: v_readlane_b32 s6, v0, 54
-; GCN-NEXT: v_readlane_b32 s7, v0, 55
-; GCN-NEXT: v_readlane_b32 s8, v0, 56
-; GCN-NEXT: v_readlane_b32 s9, v0, 57
-; GCN-NEXT: v_readlane_b32 s10, v0, 58
-; GCN-NEXT: v_readlane_b32 s11, v0, 59
-; GCN-NEXT: v_readlane_b32 s12, v0, 60
-; GCN-NEXT: v_readlane_b32 s13, v0, 61
-; GCN-NEXT: v_readlane_b32 s14, v0, 62
-; GCN-NEXT: v_readlane_b32 s15, v0, 63
+; GCN-NEXT: v_readlane_b32 s0, v1, 48
+; GCN-NEXT: v_readlane_b32 s1, v1, 49
+; GCN-NEXT: v_readlane_b32 s2, v1, 50
+; GCN-NEXT: v_readlane_b32 s3, v1, 51
+; GCN-NEXT: v_readlane_b32 s4, v1, 52
+; GCN-NEXT: v_readlane_b32 s5, v1, 53
+; GCN-NEXT: v_readlane_b32 s6, v1, 54
+; GCN-NEXT: v_readlane_b32 s7, v1, 55
+; GCN-NEXT: v_readlane_b32 s8, v1, 56
+; GCN-NEXT: v_readlane_b32 s9, v1, 57
+; GCN-NEXT: v_readlane_b32 s10, v1, 58
+; GCN-NEXT: v_readlane_b32 s11, v1, 59
+; GCN-NEXT: v_readlane_b32 s12, v1, 60
+; GCN-NEXT: v_readlane_b32 s13, v1, 61
+; GCN-NEXT: v_readlane_b32 s14, v1, 62
+; GCN-NEXT: v_readlane_b32 s15, v1, 63
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[36:51]
; GCN-NEXT: ;;#ASMEND
@@ -697,14 +685,6 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 %
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: .LBB1_2: ; %ret
-; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
-; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[28:29]
-; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
-; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[28:29]
-; GCN-NEXT: ; kill: killed $vgpr1
-; GCN-NEXT: ; kill: killed $vgpr0
; GCN-NEXT: s_endpgm
%wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
%wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
@@ -741,17 +721,9 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %
; GCN-NEXT: s_mov_b32 s55, 0xe8f000
; GCN-NEXT: s_add_u32 s52, s52, s9
; GCN-NEXT: s_addc_u32 s53, s53, 0
-; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
-; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN-NEXT: s_load_dword s0, s[2:3], 0xb
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[34:35]
-; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: ;;#ASMSTART
@@ -765,91 +737,91 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:19]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_waitcnt vmcnt(1)
-; GCN-NEXT: v_writelane_b32 v1, s4, 0
-; GCN-NEXT: v_writelane_b32 v1, s5, 1
-; GCN-NEXT: v_writelane_b32 v1, s6, 2
-; GCN-NEXT: v_writelane_b32 v1, s7, 3
-; GCN-NEXT: v_writelane_b32 v1, s8, 4
-; GCN-NEXT: v_writelane_b32 v1, s9, 5
-; GCN-NEXT: v_writelane_b32 v1, s10, 6
-; GCN-NEXT: v_writelane_b32 v1, s11, 7
-; GCN-NEXT: v_writelane_b32 v1, s12, 8
-; GCN-NEXT: v_writelane_b32 v1, s13, 9
-; GCN-NEXT: v_writelane_b32 v1, s14, 10
-; GCN-NEXT: v_writelane_b32 v1, s15, 11
-; GCN-NEXT: v_writelane_b32 v1, s16, 12
-; GCN-NEXT: v_writelane_b32 v1, s17, 13
-; GCN-NEXT: v_writelane_b32 v1, s18, 14
-; GCN-NEXT: v_writelane_b32 v1, s19, 15
+; GCN-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
+; GCN-NEXT: v_writelane_b32 v32, s4, 0
+; GCN-NEXT: v_writelane_b32 v32, s5, 1
+; GCN-NEXT: v_writelane_b32 v32, s6, 2
+; GCN-NEXT: v_writelane_b32 v32, s7, 3
+; GCN-NEXT: v_writelane_b32 v32, s8, 4
+; GCN-NEXT: v_writelane_b32 v32, s9, 5
+; GCN-NEXT: v_writelane_b32 v32, s10, 6
+; GCN-NEXT: v_writelane_b32 v32, s11, 7
+; GCN-NEXT: v_writelane_b32 v32, s12, 8
+; GCN-NEXT: v_writelane_b32 v32, s13, 9
+; GCN-NEXT: v_writelane_b32 v32, s14, 10
+; GCN-NEXT: v_writelane_b32 v32, s15, 11
+; GCN-NEXT: v_writelane_b32 v32, s16, 12
+; GCN-NEXT: v_writelane_b32 v32, s17, 13
+; GCN-NEXT: v_writelane_b32 v32, s18, 14
+; GCN-NEXT: v_writelane_b32 v32, s19, 15
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:19]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 16
-; GCN-NEXT: v_writelane_b32 v1, s5, 17
-; GCN-NEXT: v_writelane_b32 v1, s6, 18
-; GCN-NEXT: v_writelane_b32 v1, s7, 19
-; GCN-NEXT: v_writelane_b32 v1, s8, 20
-; GCN-NEXT: v_writelane_b32 v1, s9, 21
-; GCN-NEXT: v_writelane_b32 v1, s10, 22
-; GCN-NEXT: v_writelane_b32 v1, s11, 23
-; GCN-NEXT: v_writelane_b32 v1, s12, 24
-; GCN-NEXT: v_writelane_b32 v1, s13, 25
-; GCN-NEXT: v_writelane_b32 v1, s14, 26
-; GCN-NEXT: v_writelane_b32 v1, s15, 27
-; GCN-NEXT: v_writelane_b32 v1, s16, 28
-; GCN-NEXT: v_writelane_b32 v1, s17, 29
-; GCN-NEXT: v_writelane_b32 v1, s18, 30
-; GCN-NEXT: v_writelane_b32 v1, s19, 31
+; GCN-NEXT: v_writelane_b32 v32, s4, 16
+; GCN-NEXT: v_writelane_b32 v32, s5, 17
+; GCN-NEXT: v_writelane_b32 v32, s6, 18
+; GCN-NEXT: v_writelane_b32 v32, s7, 19
+; GCN-NEXT: v_writelane_b32 v32, s8, 20
+; GCN-NEXT: v_writelane_b32 v32, s9, 21
+; GCN-NEXT: v_writelane_b32 v32, s10, 22
+; GCN-NEXT: v_writelane_b32 v32, s11, 23
+; GCN-NEXT: v_writelane_b32 v32, s12, 24
+; GCN-NEXT: v_writelane_b32 v32, s13, 25
+; GCN-NEXT: v_writelane_b32 v32, s14, 26
+; GCN-NEXT: v_writelane_b32 v32, s15, 27
+; GCN-NEXT: v_writelane_b32 v32, s16, 28
+; GCN-NEXT: v_writelane_b32 v32, s17, 29
+; GCN-NEXT: v_writelane_b32 v32, s18, 30
+; GCN-NEXT: v_writelane_b32 v32, s19, 31
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:19]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 32
-; GCN-NEXT: v_writelane_b32 v1, s5, 33
-; GCN-NEXT: v_writelane_b32 v1, s6, 34
-; GCN-NEXT: v_writelane_b32 v1, s7, 35
-; GCN-NEXT: v_writelane_b32 v1, s8, 36
-; GCN-NEXT: v_writelane_b32 v1, s9, 37
-; GCN-NEXT: v_writelane_b32 v1, s10, 38
-; GCN-NEXT: v_writelane_b32 v1, s11, 39
-; GCN-NEXT: v_writelane_b32 v1, s12, 40
-; GCN-NEXT: v_writelane_b32 v1, s13, 41
-; GCN-NEXT: v_writelane_b32 v1, s14, 42
-; GCN-NEXT: v_writelane_b32 v1, s15, 43
-; GCN-NEXT: v_writelane_b32 v1, s16, 44
-; GCN-NEXT: v_writelane_b32 v1, s17, 45
-; GCN-NEXT: v_writelane_b32 v1, s18, 46
-; GCN-NEXT: v_writelane_b32 v1, s19, 47
+; GCN-NEXT: v_writelane_b32 v32, s4, 32
+; GCN-NEXT: v_writelane_b32 v32, s5, 33
+; GCN-NEXT: v_writelane_b32 v32, s6, 34
+; GCN-NEXT: v_writelane_b32 v32, s7, 35
+; GCN-NEXT: v_writelane_b32 v32, s8, 36
+; GCN-NEXT: v_writelane_b32 v32, s9, 37
+; GCN-NEXT: v_writelane_b32 v32, s10, 38
+; GCN-NEXT: v_writelane_b32 v32, s11, 39
+; GCN-NEXT: v_writelane_b32 v32, s12, 40
+; GCN-NEXT: v_writelane_b32 v32, s13, 41
+; GCN-NEXT: v_writelane_b32 v32, s14, 42
+; GCN-NEXT: v_writelane_b32 v32, s15, 43
+; GCN-NEXT: v_writelane_b32 v32, s16, 44
+; GCN-NEXT: v_writelane_b32 v32, s17, 45
+; GCN-NEXT: v_writelane_b32 v32, s18, 46
+; GCN-NEXT: v_writelane_b32 v32, s19, 47
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:19]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 48
-; GCN-NEXT: v_writelane_b32 v1, s5, 49
-; GCN-NEXT: v_writelane_b32 v1, s6, 50
-; GCN-NEXT: v_writelane_b32 v1, s7, 51
-; GCN-NEXT: v_writelane_b32 v1, s8, 52
-; GCN-NEXT: v_writelane_b32 v1, s9, 53
-; GCN-NEXT: v_writelane_b32 v1, s10, 54
-; GCN-NEXT: v_writelane_b32 v1, s11, 55
-; GCN-NEXT: v_writelane_b32 v1, s12, 56
-; GCN-NEXT: v_writelane_b32 v1, s13, 57
-; GCN-NEXT: v_writelane_b32 v1, s14, 58
-; GCN-NEXT: v_writelane_b32 v1, s15, 59
-; GCN-NEXT: v_writelane_b32 v1, s16, 60
-; GCN-NEXT: v_writelane_b32 v1, s17, 61
-; GCN-NEXT: v_writelane_b32 v1, s18, 62
-; GCN-NEXT: v_writelane_b32 v1, s19, 63
+; GCN-NEXT: v_writelane_b32 v32, s4, 48
+; GCN-NEXT: v_writelane_b32 v32, s5, 49
+; GCN-NEXT: v_writelane_b32 v32, s6, 50
+; GCN-NEXT: v_writelane_b32 v32, s7, 51
+; GCN-NEXT: v_writelane_b32 v32, s8, 52
+; GCN-NEXT: v_writelane_b32 v32, s9, 53
+; GCN-NEXT: v_writelane_b32 v32, s10, 54
+; GCN-NEXT: v_writelane_b32 v32, s11, 55
+; GCN-NEXT: v_writelane_b32 v32, s12, 56
+; GCN-NEXT: v_writelane_b32 v32, s13, 57
+; GCN-NEXT: v_writelane_b32 v32, s14, 58
+; GCN-NEXT: v_writelane_b32 v32, s15, 59
+; GCN-NEXT: v_writelane_b32 v32, s16, 60
+; GCN-NEXT: v_writelane_b32 v32, s17, 61
+; GCN-NEXT: v_writelane_b32 v32, s18, 62
+; GCN-NEXT: v_writelane_b32 v32, s19, 63
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v32, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[2:3]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_waitcnt vmcnt(1)
-; GCN-NEXT: v_writelane_b32 v0, s2, 0
-; GCN-NEXT: v_writelane_b32 v0, s3, 1
+; GCN-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
+; GCN-NEXT: v_writelane_b32 v32, s2, 0
+; GCN-NEXT: v_writelane_b32 v32, s3, 1
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v32, off, s[52:55], 0 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: s_mov_b32 s1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -857,59 +829,59 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %
; GCN-NEXT: s_cbranch_scc1 .LBB2_2
; GCN-NEXT: ; %bb.1: ; %bb0
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v31, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_readlane_b32 s36, v1, 32
-; GCN-NEXT: v_readlane_b32 s37, v1, 33
-; GCN-NEXT: v_readlane_b32 s38, v1, 34
-; GCN-NEXT: v_readlane_b32 s39, v1, 35
-; GCN-NEXT: v_readlane_b32 s40, v1, 36
-; GCN-NEXT: v_readlane_b32 s41, v1, 37
-; GCN-NEXT: v_readlane_b32 s42, v1, 38
-; GCN-NEXT: v_readlane_b32 s43, v1, 39
-; GCN-NEXT: v_readlane_b32 s44, v1, 40
-; GCN-NEXT: v_readlane_b32 s45, v1, 41
-; GCN-NEXT: v_readlane_b32 s46, v1, 42
-; GCN-NEXT: v_readlane_b32 s47, v1, 43
-; GCN-NEXT: v_readlane_b32 s48, v1, 44
-; GCN-NEXT: v_readlane_b32 s49, v1, 45
-; GCN-NEXT: v_readlane_b32 s50, v1, 46
-; GCN-NEXT: v_readlane_b32 s51, v1, 47
-; GCN-NEXT: v_readlane_b32 s0, v1, 16
-; GCN-NEXT: v_readlane_b32 s1, v1, 17
-; GCN-NEXT: v_readlane_b32 s2, v1, 18
-; GCN-NEXT: v_readlane_b32 s3, v1, 19
-; GCN-NEXT: v_readlane_b32 s4, v1, 20
-; GCN-NEXT: v_readlane_b32 s5, v1, 21
-; GCN-NEXT: v_readlane_b32 s6, v1, 22
-; GCN-NEXT: v_readlane_b32 s7, v1, 23
-; GCN-NEXT: v_readlane_b32 s8, v1, 24
-; GCN-NEXT: v_readlane_b32 s9, v1, 25
-; GCN-NEXT: v_readlane_b32 s10, v1, 26
-; GCN-NEXT: v_readlane_b32 s11, v1, 27
-; GCN-NEXT: v_readlane_b32 s12, v1, 28
-; GCN-NEXT: v_readlane_b32 s13, v1, 29
-; GCN-NEXT: v_readlane_b32 s14, v1, 30
-; GCN-NEXT: v_readlane_b32 s15, v1, 31
-; GCN-NEXT: v_readlane_b32 s16, v1, 0
-; GCN-NEXT: v_readlane_b32 s17, v1, 1
-; GCN-NEXT: v_readlane_b32 s18, v1, 2
-; GCN-NEXT: v_readlane_b32 s19, v1, 3
-; GCN-NEXT: v_readlane_b32 s20, v1, 4
-; GCN-NEXT: v_readlane_b32 s21, v1, 5
-; GCN-NEXT: v_readlane_b32 s22, v1, 6
-; GCN-NEXT: v_readlane_b32 s23, v1, 7
-; GCN-NEXT: v_readlane_b32 s24, v1, 8
-; GCN-NEXT: v_readlane_b32 s25, v1, 9
-; GCN-NEXT: v_readlane_b32 s26, v1, 10
-; GCN-NEXT: v_readlane_b32 s27, v1, 11
-; GCN-NEXT: v_readlane_b32 s28, v1, 12
-; GCN-NEXT: v_readlane_b32 s29, v1, 13
-; GCN-NEXT: v_readlane_b32 s30, v1, 14
-; GCN-NEXT: v_readlane_b32 s31, v1, 15
+; GCN-NEXT: v_readlane_b32 s36, v31, 32
+; GCN-NEXT: v_readlane_b32 s37, v31, 33
+; GCN-NEXT: v_readlane_b32 s38, v31, 34
+; GCN-NEXT: v_readlane_b32 s39, v31, 35
+; GCN-NEXT: v_readlane_b32 s40, v31, 36
+; GCN-NEXT: v_readlane_b32 s41, v31, 37
+; GCN-NEXT: v_readlane_b32 s42, v31, 38
+; GCN-NEXT: v_readlane_b32 s43, v31, 39
+; GCN-NEXT: v_readlane_b32 s44, v31, 40
+; GCN-NEXT: v_readlane_b32 s45, v31, 41
+; GCN-NEXT: v_readlane_b32 s46, v31, 42
+; GCN-NEXT: v_readlane_b32 s47, v31, 43
+; GCN-NEXT: v_readlane_b32 s48, v31, 44
+; GCN-NEXT: v_readlane_b32 s49, v31, 45
+; GCN-NEXT: v_readlane_b32 s50, v31, 46
+; GCN-NEXT: v_readlane_b32 s51, v31, 47
+; GCN-NEXT: v_readlane_b32 s0, v31, 16
+; GCN-NEXT: v_readlane_b32 s1, v31, 17
+; GCN-NEXT: v_readlane_b32 s2, v31, 18
+; GCN-NEXT: v_readlane_b32 s3, v31, 19
+; GCN-NEXT: v_readlane_b32 s4, v31, 20
+; GCN-NEXT: v_readlane_b32 s5, v31, 21
+; GCN-NEXT: v_readlane_b32 s6, v31, 22
+; GCN-NEXT: v_readlane_b32 s7, v31, 23
+; GCN-NEXT: v_readlane_b32 s8, v31, 24
+; GCN-NEXT: v_readlane_b32 s9, v31, 25
+; GCN-NEXT: v_readlane_b32 s10, v31, 26
+; GCN-NEXT: v_readlane_b32 s11, v31, 27
+; GCN-NEXT: v_readlane_b32 s12, v31, 28
+; GCN-NEXT: v_readlane_b32 s13, v31, 29
+; GCN-NEXT: v_readlane_b32 s14, v31, 30
+; GCN-NEXT: v_readlane_b32 s15, v31, 31
+; GCN-NEXT: v_readlane_b32 s16, v31, 0
+; GCN-NEXT: v_readlane_b32 s17, v31, 1
+; GCN-NEXT: v_readlane_b32 s18, v31, 2
+; GCN-NEXT: v_readlane_b32 s19, v31, 3
+; GCN-NEXT: v_readlane_b32 s20, v31, 4
+; GCN-NEXT: v_readlane_b32 s21, v31, 5
+; GCN-NEXT: v_readlane_b32 s22, v31, 6
+; GCN-NEXT: v_readlane_b32 s23, v31, 7
+; GCN-NEXT: v_readlane_b32 s24, v31, 8
+; GCN-NEXT: v_readlane_b32 s25, v31, 9
+; GCN-NEXT: v_readlane_b32 s26, v31, 10
+; GCN-NEXT: v_readlane_b32 s27, v31, 11
+; GCN-NEXT: v_readlane_b32 s28, v31, 12
+; GCN-NEXT: v_readlane_b32 s29, v31, 13
+; GCN-NEXT: v_readlane_b32 s30, v31, 14
+; GCN-NEXT: v_readlane_b32 s31, v31, 15
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v32, off, s[52:55], 0 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[16:31]
@@ -917,25 +889,25 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s4, v1, 48
-; GCN-NEXT: v_readlane_b32 s5, v1, 49
-; GCN-NEXT: v_readlane_b32 s6, v1, 50
-; GCN-NEXT: v_readlane_b32 s7, v1, 51
-; GCN-NEXT: v_readlane_b32 s8, v1, 52
-; GCN-NEXT: v_readlane_b32 s9, v1, 53
-; GCN-NEXT: v_readlane_b32 s10, v1, 54
-; GCN-NEXT: v_readlane_b32 s11, v1, 55
-; GCN-NEXT: v_readlane_b32 s12, v1, 56
-; GCN-NEXT: v_readlane_b32 s13, v1, 57
-; GCN-NEXT: v_readlane_b32 s14, v1, 58
-; GCN-NEXT: v_readlane_b32 s15, v1, 59
-; GCN-NEXT: v_readlane_b32 s16, v1, 60
-; GCN-NEXT: v_readlane_b32 s17, v1, 61
-; GCN-NEXT: v_readlane_b32 s18, v1, 62
-; GCN-NEXT: v_readlane_b32 s19, v1, 63
+; GCN-NEXT: v_readlane_b32 s4, v31, 48
+; GCN-NEXT: v_readlane_b32 s5, v31, 49
+; GCN-NEXT: v_readlane_b32 s6, v31, 50
+; GCN-NEXT: v_readlane_b32 s7, v31, 51
+; GCN-NEXT: v_readlane_b32 s8, v31, 52
+; GCN-NEXT: v_readlane_b32 s9, v31, 53
+; GCN-NEXT: v_readlane_b32 s10, v31, 54
+; GCN-NEXT: v_readlane_b32 s11, v31, 55
+; GCN-NEXT: v_readlane_b32 s12, v31, 56
+; GCN-NEXT: v_readlane_b32 s13, v31, 57
+; GCN-NEXT: v_readlane_b32 s14, v31, 58
+; GCN-NEXT: v_readlane_b32 s15, v31, 59
+; GCN-NEXT: v_readlane_b32 s16, v31, 60
+; GCN-NEXT: v_readlane_b32 s17, v31, 61
+; GCN-NEXT: v_readlane_b32 s18, v31, 62
+; GCN-NEXT: v_readlane_b32 s19, v31, 63
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_readlane_b32 s0, v0, 0
-; GCN-NEXT: v_readlane_b32 s1, v0, 1
+; GCN-NEXT: v_readlane_b32 s0, v32, 0
+; GCN-NEXT: v_readlane_b32 s1, v32, 1
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[36:51]
; GCN-NEXT: ;;#ASMEND
@@ -946,14 +918,6 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %
; GCN-NEXT: ; use s[0:1]
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: .LBB2_2: ; %ret
-; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[34:35]
-; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[34:35]
-; GCN-NEXT: ; kill: killed $vgpr1
-; GCN-NEXT: ; kill: killed $vgpr0
; GCN-NEXT: s_endpgm
call void asm sideeffect "", "~{v[0:7]}" () #0
call void asm sideeffect "", "~{v[8:15]}" () #0
@@ -993,17 +957,9 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
; GCN-NEXT: s_mov_b32 s55, 0xe8f000
; GCN-NEXT: s_add_u32 s52, s52, s9
; GCN-NEXT: s_addc_u32 s53, s53, 0
-; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
-; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN-NEXT: s_load_dword s0, s[2:3], 0x9
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[34:35]
-; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: ;;#ASMSTART
@@ -1017,91 +973,91 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:19]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_waitcnt vmcnt(1)
-; GCN-NEXT: v_writelane_b32 v1, s4, 0
-; GCN-NEXT: v_writelane_b32 v1, s5, 1
-; GCN-NEXT: v_writelane_b32 v1, s6, 2
-; GCN-NEXT: v_writelane_b32 v1, s7, 3
-; GCN-NEXT: v_writelane_b32 v1, s8, 4
-; GCN-NEXT: v_writelane_b32 v1, s9, 5
-; GCN-NEXT: v_writelane_b32 v1, s10, 6
-; GCN-NEXT: v_writelane_b32 v1, s11, 7
-; GCN-NEXT: v_writelane_b32 v1, s12, 8
-; GCN-NEXT: v_writelane_b32 v1, s13, 9
-; GCN-NEXT: v_writelane_b32 v1, s14, 10
-; GCN-NEXT: v_writelane_b32 v1, s15, 11
-; GCN-NEXT: v_writelane_b32 v1, s16, 12
-; GCN-NEXT: v_writelane_b32 v1, s17, 13
-; GCN-NEXT: v_writelane_b32 v1, s18, 14
-; GCN-NEXT: v_writelane_b32 v1, s19, 15
+; GCN-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
+; GCN-NEXT: v_writelane_b32 v32, s4, 0
+; GCN-NEXT: v_writelane_b32 v32, s5, 1
+; GCN-NEXT: v_writelane_b32 v32, s6, 2
+; GCN-NEXT: v_writelane_b32 v32, s7, 3
+; GCN-NEXT: v_writelane_b32 v32, s8, 4
+; GCN-NEXT: v_writelane_b32 v32, s9, 5
+; GCN-NEXT: v_writelane_b32 v32, s10, 6
+; GCN-NEXT: v_writelane_b32 v32, s11, 7
+; GCN-NEXT: v_writelane_b32 v32, s12, 8
+; GCN-NEXT: v_writelane_b32 v32, s13, 9
+; GCN-NEXT: v_writelane_b32 v32, s14, 10
+; GCN-NEXT: v_writelane_b32 v32, s15, 11
+; GCN-NEXT: v_writelane_b32 v32, s16, 12
+; GCN-NEXT: v_writelane_b32 v32, s17, 13
+; GCN-NEXT: v_writelane_b32 v32, s18, 14
+; GCN-NEXT: v_writelane_b32 v32, s19, 15
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:19]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 16
-; GCN-NEXT: v_writelane_b32 v1, s5, 17
-; GCN-NEXT: v_writelane_b32 v1, s6, 18
-; GCN-NEXT: v_writelane_b32 v1, s7, 19
-; GCN-NEXT: v_writelane_b32 v1, s8, 20
-; GCN-NEXT: v_writelane_b32 v1, s9, 21
-; GCN-NEXT: v_writelane_b32 v1, s10, 22
-; GCN-NEXT: v_writelane_b32 v1, s11, 23
-; GCN-NEXT: v_writelane_b32 v1, s12, 24
-; GCN-NEXT: v_writelane_b32 v1, s13, 25
-; GCN-NEXT: v_writelane_b32 v1, s14, 26
-; GCN-NEXT: v_writelane_b32 v1, s15, 27
-; GCN-NEXT: v_writelane_b32 v1, s16, 28
-; GCN-NEXT: v_writelane_b32 v1, s17, 29
-; GCN-NEXT: v_writelane_b32 v1, s18, 30
-; GCN-NEXT: v_writelane_b32 v1, s19, 31
+; GCN-NEXT: v_writelane_b32 v32, s4, 16
+; GCN-NEXT: v_writelane_b32 v32, s5, 17
+; GCN-NEXT: v_writelane_b32 v32, s6, 18
+; GCN-NEXT: v_writelane_b32 v32, s7, 19
+; GCN-NEXT: v_writelane_b32 v32, s8, 20
+; GCN-NEXT: v_writelane_b32 v32, s9, 21
+; GCN-NEXT: v_writelane_b32 v32, s10, 22
+; GCN-NEXT: v_writelane_b32 v32, s11, 23
+; GCN-NEXT: v_writelane_b32 v32, s12, 24
+; GCN-NEXT: v_writelane_b32 v32, s13, 25
+; GCN-NEXT: v_writelane_b32 v32, s14, 26
+; GCN-NEXT: v_writelane_b32 v32, s15, 27
+; GCN-NEXT: v_writelane_b32 v32, s16, 28
+; GCN-NEXT: v_writelane_b32 v32, s17, 29
+; GCN-NEXT: v_writelane_b32 v32, s18, 30
+; GCN-NEXT: v_writelane_b32 v32, s19, 31
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:19]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 32
-; GCN-NEXT: v_writelane_b32 v1, s5, 33
-; GCN-NEXT: v_writelane_b32 v1, s6, 34
-; GCN-NEXT: v_writelane_b32 v1, s7, 35
-; GCN-NEXT: v_writelane_b32 v1, s8, 36
-; GCN-NEXT: v_writelane_b32 v1, s9, 37
-; GCN-NEXT: v_writelane_b32 v1, s10, 38
-; GCN-NEXT: v_writelane_b32 v1, s11, 39
-; GCN-NEXT: v_writelane_b32 v1, s12, 40
-; GCN-NEXT: v_writelane_b32 v1, s13, 41
-; GCN-NEXT: v_writelane_b32 v1, s14, 42
-; GCN-NEXT: v_writelane_b32 v1, s15, 43
-; GCN-NEXT: v_writelane_b32 v1, s16, 44
-; GCN-NEXT: v_writelane_b32 v1, s17, 45
-; GCN-NEXT: v_writelane_b32 v1, s18, 46
-; GCN-NEXT: v_writelane_b32 v1, s19, 47
+; GCN-NEXT: v_writelane_b32 v32, s4, 32
+; GCN-NEXT: v_writelane_b32 v32, s5, 33
+; GCN-NEXT: v_writelane_b32 v32, s6, 34
+; GCN-NEXT: v_writelane_b32 v32, s7, 35
+; GCN-NEXT: v_writelane_b32 v32, s8, 36
+; GCN-NEXT: v_writelane_b32 v32, s9, 37
+; GCN-NEXT: v_writelane_b32 v32, s10, 38
+; GCN-NEXT: v_writelane_b32 v32, s11, 39
+; GCN-NEXT: v_writelane_b32 v32, s12, 40
+; GCN-NEXT: v_writelane_b32 v32, s13, 41
+; GCN-NEXT: v_writelane_b32 v32, s14, 42
+; GCN-NEXT: v_writelane_b32 v32, s15, 43
+; GCN-NEXT: v_writelane_b32 v32, s16, 44
+; GCN-NEXT: v_writelane_b32 v32, s17, 45
+; GCN-NEXT: v_writelane_b32 v32, s18, 46
+; GCN-NEXT: v_writelane_b32 v32, s19, 47
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[4:19]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s4, 48
-; GCN-NEXT: v_writelane_b32 v1, s5, 49
-; GCN-NEXT: v_writelane_b32 v1, s6, 50
-; GCN-NEXT: v_writelane_b32 v1, s7, 51
-; GCN-NEXT: v_writelane_b32 v1, s8, 52
-; GCN-NEXT: v_writelane_b32 v1, s9, 53
-; GCN-NEXT: v_writelane_b32 v1, s10, 54
-; GCN-NEXT: v_writelane_b32 v1, s11, 55
-; GCN-NEXT: v_writelane_b32 v1, s12, 56
-; GCN-NEXT: v_writelane_b32 v1, s13, 57
-; GCN-NEXT: v_writelane_b32 v1, s14, 58
-; GCN-NEXT: v_writelane_b32 v1, s15, 59
-; GCN-NEXT: v_writelane_b32 v1, s16, 60
-; GCN-NEXT: v_writelane_b32 v1, s17, 61
-; GCN-NEXT: v_writelane_b32 v1, s18, 62
-; GCN-NEXT: v_writelane_b32 v1, s19, 63
+; GCN-NEXT: v_writelane_b32 v32, s4, 48
+; GCN-NEXT: v_writelane_b32 v32, s5, 49
+; GCN-NEXT: v_writelane_b32 v32, s6, 50
+; GCN-NEXT: v_writelane_b32 v32, s7, 51
+; GCN-NEXT: v_writelane_b32 v32, s8, 52
+; GCN-NEXT: v_writelane_b32 v32, s9, 53
+; GCN-NEXT: v_writelane_b32 v32, s10, 54
+; GCN-NEXT: v_writelane_b32 v32, s11, 55
+; GCN-NEXT: v_writelane_b32 v32, s12, 56
+; GCN-NEXT: v_writelane_b32 v32, s13, 57
+; GCN-NEXT: v_writelane_b32 v32, s14, 58
+; GCN-NEXT: v_writelane_b32 v32, s15, 59
+; GCN-NEXT: v_writelane_b32 v32, s16, 60
+; GCN-NEXT: v_writelane_b32 v32, s17, 61
+; GCN-NEXT: v_writelane_b32 v32, s18, 62
+; GCN-NEXT: v_writelane_b32 v32, s19, 63
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v32, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[2:3]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_waitcnt vmcnt(1)
-; GCN-NEXT: v_writelane_b32 v0, s2, 0
-; GCN-NEXT: v_writelane_b32 v0, s3, 1
+; GCN-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
+; GCN-NEXT: v_writelane_b32 v32, s2, 0
+; GCN-NEXT: v_writelane_b32 v32, s3, 1
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v32, off, s[52:55], 0 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: s_mov_b32 s1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -1109,59 +1065,59 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
; GCN-NEXT: s_cbranch_scc1 .LBB3_2
; GCN-NEXT: ; %bb.1: ; %bb0
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v31, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_readlane_b32 s36, v2, 32
-; GCN-NEXT: v_readlane_b32 s37, v2, 33
-; GCN-NEXT: v_readlane_b32 s38, v2, 34
-; GCN-NEXT: v_readlane_b32 s39, v2, 35
-; GCN-NEXT: v_readlane_b32 s40, v2, 36
-; GCN-NEXT: v_readlane_b32 s41, v2, 37
-; GCN-NEXT: v_readlane_b32 s42, v2, 38
-; GCN-NEXT: v_readlane_b32 s43, v2, 39
-; GCN-NEXT: v_readlane_b32 s44, v2, 40
-; GCN-NEXT: v_readlane_b32 s45, v2, 41
-; GCN-NEXT: v_readlane_b32 s46, v2, 42
-; GCN-NEXT: v_readlane_b32 s47, v2, 43
-; GCN-NEXT: v_readlane_b32 s48, v2, 44
-; GCN-NEXT: v_readlane_b32 s49, v2, 45
-; GCN-NEXT: v_readlane_b32 s50, v2, 46
-; GCN-NEXT: v_readlane_b32 s51, v2, 47
-; GCN-NEXT: v_readlane_b32 s0, v2, 16
-; GCN-NEXT: v_readlane_b32 s1, v2, 17
-; GCN-NEXT: v_readlane_b32 s2, v2, 18
-; GCN-NEXT: v_readlane_b32 s3, v2, 19
-; GCN-NEXT: v_readlane_b32 s4, v2, 20
-; GCN-NEXT: v_readlane_b32 s5, v2, 21
-; GCN-NEXT: v_readlane_b32 s6, v2, 22
-; GCN-NEXT: v_readlane_b32 s7, v2, 23
-; GCN-NEXT: v_readlane_b32 s8, v2, 24
-; GCN-NEXT: v_readlane_b32 s9, v2, 25
-; GCN-NEXT: v_readlane_b32 s10, v2, 26
-; GCN-NEXT: v_readlane_b32 s11, v2, 27
-; GCN-NEXT: v_readlane_b32 s12, v2, 28
-; GCN-NEXT: v_readlane_b32 s13, v2, 29
-; GCN-NEXT: v_readlane_b32 s14, v2, 30
-; GCN-NEXT: v_readlane_b32 s15, v2, 31
-; GCN-NEXT: v_readlane_b32 s16, v2, 0
-; GCN-NEXT: v_readlane_b32 s17, v2, 1
-; GCN-NEXT: v_readlane_b32 s18, v2, 2
-; GCN-NEXT: v_readlane_b32 s19, v2, 3
-; GCN-NEXT: v_readlane_b32 s20, v2, 4
-; GCN-NEXT: v_readlane_b32 s21, v2, 5
-; GCN-NEXT: v_readlane_b32 s22, v2, 6
-; GCN-NEXT: v_readlane_b32 s23, v2, 7
-; GCN-NEXT: v_readlane_b32 s24, v2, 8
-; GCN-NEXT: v_readlane_b32 s25, v2, 9
-; GCN-NEXT: v_readlane_b32 s26, v2, 10
-; GCN-NEXT: v_readlane_b32 s27, v2, 11
-; GCN-NEXT: v_readlane_b32 s28, v2, 12
-; GCN-NEXT: v_readlane_b32 s29, v2, 13
-; GCN-NEXT: v_readlane_b32 s30, v2, 14
-; GCN-NEXT: v_readlane_b32 s31, v2, 15
+; GCN-NEXT: v_readlane_b32 s36, v31, 32
+; GCN-NEXT: v_readlane_b32 s37, v31, 33
+; GCN-NEXT: v_readlane_b32 s38, v31, 34
+; GCN-NEXT: v_readlane_b32 s39, v31, 35
+; GCN-NEXT: v_readlane_b32 s40, v31, 36
+; GCN-NEXT: v_readlane_b32 s41, v31, 37
+; GCN-NEXT: v_readlane_b32 s42, v31, 38
+; GCN-NEXT: v_readlane_b32 s43, v31, 39
+; GCN-NEXT: v_readlane_b32 s44, v31, 40
+; GCN-NEXT: v_readlane_b32 s45, v31, 41
+; GCN-NEXT: v_readlane_b32 s46, v31, 42
+; GCN-NEXT: v_readlane_b32 s47, v31, 43
+; GCN-NEXT: v_readlane_b32 s48, v31, 44
+; GCN-NEXT: v_readlane_b32 s49, v31, 45
+; GCN-NEXT: v_readlane_b32 s50, v31, 46
+; GCN-NEXT: v_readlane_b32 s51, v31, 47
+; GCN-NEXT: v_readlane_b32 s0, v31, 16
+; GCN-NEXT: v_readlane_b32 s1, v31, 17
+; GCN-NEXT: v_readlane_b32 s2, v31, 18
+; GCN-NEXT: v_readlane_b32 s3, v31, 19
+; GCN-NEXT: v_readlane_b32 s4, v31, 20
+; GCN-NEXT: v_readlane_b32 s5, v31, 21
+; GCN-NEXT: v_readlane_b32 s6, v31, 22
+; GCN-NEXT: v_readlane_b32 s7, v31, 23
+; GCN-NEXT: v_readlane_b32 s8, v31, 24
+; GCN-NEXT: v_readlane_b32 s9, v31, 25
+; GCN-NEXT: v_readlane_b32 s10, v31, 26
+; GCN-NEXT: v_readlane_b32 s11, v31, 27
+; GCN-NEXT: v_readlane_b32 s12, v31, 28
+; GCN-NEXT: v_readlane_b32 s13, v31, 29
+; GCN-NEXT: v_readlane_b32 s14, v31, 30
+; GCN-NEXT: v_readlane_b32 s15, v31, 31
+; GCN-NEXT: v_readlane_b32 s16, v31, 0
+; GCN-NEXT: v_readlane_b32 s17, v31, 1
+; GCN-NEXT: v_readlane_b32 s18, v31, 2
+; GCN-NEXT: v_readlane_b32 s19, v31, 3
+; GCN-NEXT: v_readlane_b32 s20, v31, 4
+; GCN-NEXT: v_readlane_b32 s21, v31, 5
+; GCN-NEXT: v_readlane_b32 s22, v31, 6
+; GCN-NEXT: v_readlane_b32 s23, v31, 7
+; GCN-NEXT: v_readlane_b32 s24, v31, 8
+; GCN-NEXT: v_readlane_b32 s25, v31, 9
+; GCN-NEXT: v_readlane_b32 s26, v31, 10
+; GCN-NEXT: v_readlane_b32 s27, v31, 11
+; GCN-NEXT: v_readlane_b32 s28, v31, 12
+; GCN-NEXT: v_readlane_b32 s29, v31, 13
+; GCN-NEXT: v_readlane_b32 s30, v31, 14
+; GCN-NEXT: v_readlane_b32 s31, v31, 15
; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v32, off, s[52:55], 0 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def v0
@@ -1172,25 +1128,25 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[0:15]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s4, v2, 48
-; GCN-NEXT: v_readlane_b32 s5, v2, 49
-; GCN-NEXT: v_readlane_b32 s6, v2, 50
-; GCN-NEXT: v_readlane_b32 s7, v2, 51
-; GCN-NEXT: v_readlane_b32 s8, v2, 52
-; GCN-NEXT: v_readlane_b32 s9, v2, 53
-; GCN-NEXT: v_readlane_b32 s10, v2, 54
-; GCN-NEXT: v_readlane_b32 s11, v2, 55
-; GCN-NEXT: v_readlane_b32 s12, v2, 56
-; GCN-NEXT: v_readlane_b32 s13, v2, 57
-; GCN-NEXT: v_readlane_b32 s14, v2, 58
-; GCN-NEXT: v_readlane_b32 s15, v2, 59
-; GCN-NEXT: v_readlane_b32 s16, v2, 60
-; GCN-NEXT: v_readlane_b32 s17, v2, 61
-; GCN-NEXT: v_readlane_b32 s18, v2, 62
-; GCN-NEXT: v_readlane_b32 s19, v2, 63
+; GCN-NEXT: v_readlane_b32 s4, v31, 48
+; GCN-NEXT: v_readlane_b32 s5, v31, 49
+; GCN-NEXT: v_readlane_b32 s6, v31, 50
+; GCN-NEXT: v_readlane_b32 s7, v31, 51
+; GCN-NEXT: v_readlane_b32 s8, v31, 52
+; GCN-NEXT: v_readlane_b32 s9, v31, 53
+; GCN-NEXT: v_readlane_b32 s10, v31, 54
+; GCN-NEXT: v_readlane_b32 s11, v31, 55
+; GCN-NEXT: v_readlane_b32 s12, v31, 56
+; GCN-NEXT: v_readlane_b32 s13, v31, 57
+; GCN-NEXT: v_readlane_b32 s14, v31, 58
+; GCN-NEXT: v_readlane_b32 s15, v31, 59
+; GCN-NEXT: v_readlane_b32 s16, v31, 60
+; GCN-NEXT: v_readlane_b32 s17, v31, 61
+; GCN-NEXT: v_readlane_b32 s18, v31, 62
+; GCN-NEXT: v_readlane_b32 s19, v31, 63
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_readlane_b32 s0, v1, 0
-; GCN-NEXT: v_readlane_b32 s1, v1, 1
+; GCN-NEXT: v_readlane_b32 s0, v32, 0
+; GCN-NEXT: v_readlane_b32 s1, v32, 1
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[36:51]
; GCN-NEXT: ;;#ASMEND
@@ -1204,14 +1160,6 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 {
; GCN-NEXT: ; use v0
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: .LBB3_2: ; %ret
-; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[34:35]
-; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[34:35]
-; GCN-NEXT: ; kill: killed $vgpr1
-; GCN-NEXT: ; kill: killed $vgpr0
; GCN-NEXT: s_endpgm
call void asm sideeffect "", "~{v[0:7]}" () #0
call void asm sideeffect "", "~{v[8:15]}" () #0
@@ -1243,7 +1191,7 @@ ret:
}
attributes #0 = { nounwind }
-attributes #1 = { nounwind "amdgpu-waves-per-eu"="8,8" }
+attributes #1 = { nounwind "amdgpu-waves-per-eu"="7,7" }
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir
index 8e2a56b463c401..fa62048fd31adf 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir
@@ -61,35 +61,27 @@ machineFunctionInfo:
isChainFunction: true
returnsVoid: true
wwmReservedRegs:
- - '$vgpr11'
+ - '$vgpr10'
body: |
bb.0:
liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
; GCN-LABEL: name: preserve_all_lanes_wwm_above_args
- ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
+ ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: SCRATCH_STORE_DWORD_ST killed $vgpr11, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
- ; GCN-NEXT: renamable $vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10
+ ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
- ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr10, 0
- ; GCN-NEXT: renamable $vgpr10 = V_MOV_B32_e32 10, implicit $exec
- ; GCN-NEXT: $vgpr8 = COPY killed renamable $vgpr10
+ ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
+ ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 10, implicit $exec
+ ; GCN-NEXT: $vgpr8 = COPY killed $vgpr0
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
- ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: $vgpr10 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: $vgpr11 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
- renamable $vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10
+ $vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10
$sgpr35 = S_MOV_B32 5
$sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr10, 0
- renamable $vgpr10 = V_MOV_B32_e32 10, implicit $exec
- $vgpr8 = COPY renamable killed $vgpr10
+ $vgpr10 = V_MOV_B32_e32 10, implicit $exec
+ $vgpr8 = COPY killed $vgpr10
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
@@ -139,23 +131,15 @@ body: |
liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
; GCN-LABEL: name: preserve_inactive_lanes_wwm_args
- ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
+ ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr10
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
- ; GCN-NEXT: renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
+ ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
- ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $vgpr9(tied-def 0) :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
- ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
+ ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr0
renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
$sgpr35 = S_MOV_B32 5
$sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
@@ -184,7 +168,7 @@ body: |
; GCN-LABEL: name: dont_preserve_if_no_chain_calls
; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr9
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
+ ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
@@ -218,7 +202,7 @@ body: |
; GCN-LABEL: name: dont_preserve_v0_v7
; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
+ ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir
index 4b8b71a7400852..49001a2cfd7a65 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir
@@ -36,19 +36,11 @@ body: |
liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
; GCN-LABEL: name: preserve_inactive_wwm
- ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
+ ; GCN: liveins: $sgpr0, $sgpr35
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
- ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
- ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
+ ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr1
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
@@ -72,24 +64,16 @@ body: |
; GCN-LABEL: name: preserve_inactive_detected_wwm
; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
- ; GCN-NEXT: renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
+ ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
- ; GCN-NEXT: renamable $vgpr9 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr9
+ ; GCN-NEXT: $vgpr9 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr9
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr9, 0
; GCN-NEXT: renamable $vgpr9 = V_MOV_B32_e32 10, implicit $exec
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
- ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
$sgpr35 = S_MOV_B32 5
@@ -122,7 +106,7 @@ body: |
; GCN-LABEL: name: dont_preserve_wwm_if_no_chain_calls
; GCN: liveins: $sgpr35, $vgpr8
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
+ ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0
; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec
@@ -151,11 +135,11 @@ body: |
liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
; GCN-LABEL: name: dont_preserve_wwm_if_init_whole_wave
- ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
+ ; GCN: liveins: $sgpr0, $sgpr35
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
- ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
+ ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr1
renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
@@ -209,7 +193,7 @@ body: |
; GCN-LABEL: name: dont_preserve_v0_v7
; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr7, $vgpr8, $vgpr9
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
+ ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/pr51516.mir b/llvm/test/CodeGen/AMDGPU/pr51516.mir
index b21285e83dc21d..4be102f7860eab 100644
--- a/llvm/test/CodeGen/AMDGPU/pr51516.mir
+++ b/llvm/test/CodeGen/AMDGPU/pr51516.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -amdgpu-disable-unclustered-high-rp-reschedule -verify-machineinstrs -start-before=machine-scheduler -stop-after=virtregrewriter,1 -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -amdgpu-disable-unclustered-high-rp-reschedule -verify-machineinstrs -start-before=machine-scheduler -stop-after=virtregrewriter,2 -o - %s | FileCheck -check-prefix=GCN %s
# Check that %3 was not rematerialized before the last store since its operand %1
# is killed by that store.
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir b/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir
index 4571e792c7cb58..168d63d3a95b96 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir
+++ b/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir
@@ -20,16 +20,10 @@ body: |
; GCN-LABEL: name: preserve_scratch_vgpr_inactive_lanes
; GCN: liveins: $sgpr35, $vgpr0, $sgpr30_sgpr31
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
+ ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
; GCN-NEXT: $sgpr35 = S_MOV_B32 5
; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec
- ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GCN-NEXT: S_SETPC_B64_return killed renamable $sgpr30_sgpr31, implicit $vgpr0
renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0
$sgpr35 = S_MOV_B32 5
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
index bbeb2e1884a9ff..924340ec8a2a6a 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
@@ -13,333 +13,333 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX906-NEXT: s_mov_b32 s16, s33
; GFX906-NEXT: s_mov_b32 s33, s32
; GFX906-NEXT: s_xor_saveexec_b64 s[18:19], -1
-; GFX906-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX906-NEXT: s_mov_b64 exec, -1
-; GFX906-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill
; GFX906-NEXT: s_mov_b64 exec, s[18:19]
-; GFX906-NEXT: ; implicit-def: $vgpr2
+; GFX906-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane
; GFX906-NEXT: s_mov_b32 s21, s15
-; GFX906-NEXT: v_writelane_b32 v2, s6, 0
-; GFX906-NEXT: v_writelane_b32 v2, s7, 1
-; GFX906-NEXT: v_writelane_b32 v2, s21, 2
+; GFX906-NEXT: v_writelane_b32 v39, s6, 0
+; GFX906-NEXT: v_writelane_b32 v39, s7, 1
+; GFX906-NEXT: v_writelane_b32 v39, s21, 2
; GFX906-NEXT: s_mov_b32 s22, s14
-; GFX906-NEXT: v_writelane_b32 v2, s22, 3
+; GFX906-NEXT: v_writelane_b32 v39, s22, 3
; GFX906-NEXT: s_mov_b32 s23, s13
-; GFX906-NEXT: v_writelane_b32 v2, s23, 4
+; GFX906-NEXT: v_writelane_b32 v39, s23, 4
; GFX906-NEXT: s_mov_b32 s24, s12
-; GFX906-NEXT: v_writelane_b32 v2, s24, 5
+; GFX906-NEXT: v_writelane_b32 v39, s24, 5
; GFX906-NEXT: s_mov_b64 s[26:27], s[10:11]
-; GFX906-NEXT: v_writelane_b32 v2, s26, 6
+; GFX906-NEXT: v_writelane_b32 v39, s26, 6
; GFX906-NEXT: v_writelane_b32 v41, s16, 4
-; GFX906-NEXT: v_writelane_b32 v2, s27, 7
+; GFX906-NEXT: v_writelane_b32 v39, s27, 7
; GFX906-NEXT: v_writelane_b32 v41, s34, 2
-; GFX906-NEXT: v_writelane_b32 v2, s8, 8
+; GFX906-NEXT: v_writelane_b32 v39, s8, 8
; GFX906-NEXT: v_writelane_b32 v41, s35, 3
-; GFX906-NEXT: v_writelane_b32 v2, s9, 9
+; GFX906-NEXT: v_writelane_b32 v39, s9, 9
; GFX906-NEXT: v_writelane_b32 v41, s30, 0
-; GFX906-NEXT: v_writelane_b32 v2, s4, 10
+; GFX906-NEXT: v_writelane_b32 v39, s4, 10
; GFX906-NEXT: s_addk_i32 s32, 0x2800
+; GFX906-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX906-NEXT: v_writelane_b32 v41, s31, 1
; GFX906-NEXT: v_mov_b32_e32 v32, v31
-; GFX906-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
; GFX906-NEXT: s_nop 0
-; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
-; GFX906-NEXT: v_writelane_b32 v2, s5, 11
+; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
+; GFX906-NEXT: v_writelane_b32 v39, s5, 11
; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GFX906-NEXT: v_mov_b32_e32 v33, v2
; GFX906-NEXT: s_mov_b64 exec, s[34:35]
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def v[0:31]
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
; GFX906-NEXT: s_nop 0
-; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
-; GFX906-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill
+; GFX906-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def v40
; GFX906-NEXT: ;;#ASMEND
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def s11
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GFX906-NEXT: v_mov_b32_e32 v40, v33
-; GFX906-NEXT: s_mov_b64 exec, s[34:35]
-; GFX906-NEXT: v_writelane_b32 v40, s11, 12
+; GFX906-NEXT: v_writelane_b32 v39, s11, 12
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def s12
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_writelane_b32 v40, s12, 13
+; GFX906-NEXT: v_writelane_b32 v39, s12, 13
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def s13
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_writelane_b32 v40, s13, 14
+; GFX906-NEXT: v_writelane_b32 v39, s13, 14
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def s14
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_writelane_b32 v40, s14, 15
+; GFX906-NEXT: v_writelane_b32 v39, s14, 15
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def s15
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_writelane_b32 v40, s15, 16
+; GFX906-NEXT: v_writelane_b32 v39, s15, 16
; GFX906-NEXT: s_getpc_b64 s[10:11]
; GFX906-NEXT: s_add_u32 s10, s10, foo at gotpcrel32@lo+4
; GFX906-NEXT: s_addc_u32 s11, s11, foo at gotpcrel32@hi+12
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def s16
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_writelane_b32 v40, s16, 17
+; GFX906-NEXT: v_writelane_b32 v39, s16, 17
; GFX906-NEXT: s_load_dwordx2 s[10:11], s[10:11], 0x0
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def s17
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_writelane_b32 v40, s17, 18
+; GFX906-NEXT: v_writelane_b32 v39, s17, 18
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def s18
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_writelane_b32 v40, s18, 19
+; GFX906-NEXT: v_writelane_b32 v39, s18, 19
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def s19
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_writelane_b32 v40, s19, 20
+; GFX906-NEXT: v_writelane_b32 v39, s19, 20
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def s20
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_writelane_b32 v40, s20, 21
+; GFX906-NEXT: v_writelane_b32 v39, s20, 21
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
-; GFX906-NEXT: v_writelane_b32 v40, s10, 22
-; GFX906-NEXT: v_writelane_b32 v40, s11, 23
+; GFX906-NEXT: v_writelane_b32 v39, s10, 22
+; GFX906-NEXT: v_writelane_b32 v39, s11, 23
+; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
+; GFX906-NEXT: s_mov_b64 exec, s[34:35]
; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
+; GFX906-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX906-NEXT: s_mov_b64 exec, s[34:35]
-; GFX906-NEXT: v_readlane_b32 s16, v40, 22
+; GFX906-NEXT: v_readlane_b32 s16, v39, 22
; GFX906-NEXT: s_mov_b32 s12, s24
; GFX906-NEXT: s_mov_b32 s13, s23
; GFX906-NEXT: s_mov_b32 s14, s22
; GFX906-NEXT: v_mov_b32_e32 v31, v32
; GFX906-NEXT: s_mov_b32 s15, s21
; GFX906-NEXT: s_mov_b64 s[10:11], s[26:27]
-; GFX906-NEXT: v_readlane_b32 s17, v40, 23
-; GFX906-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GFX906-NEXT: v_readlane_b32 s17, v39, 23
+; GFX906-NEXT: v_mov_b32_e32 v40, v32
; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
+; GFX906-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX906-NEXT: s_mov_b64 exec, s[34:35]
-; GFX906-NEXT: v_readlane_b32 s11, v40, 12
+; GFX906-NEXT: s_waitcnt vmcnt(0)
+; GFX906-NEXT: v_readlane_b32 s11, v39, 12
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s11
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s12, v40, 13
+; GFX906-NEXT: v_readlane_b32 s12, v39, 13
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s12
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s13, v40, 14
+; GFX906-NEXT: v_readlane_b32 s13, v39, 14
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s13
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s14, v40, 15
+; GFX906-NEXT: v_readlane_b32 s14, v39, 15
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s14
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s15, v40, 16
+; GFX906-NEXT: v_readlane_b32 s15, v39, 16
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s15
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s16, v40, 17
+; GFX906-NEXT: v_readlane_b32 s16, v39, 17
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s16
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s17, v40, 18
+; GFX906-NEXT: v_readlane_b32 s17, v39, 18
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s17
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s18, v40, 19
+; GFX906-NEXT: v_readlane_b32 s18, v39, 19
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s18
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s19, v40, 20
+; GFX906-NEXT: v_readlane_b32 s19, v39, 20
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s19
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s20, v40, 21
+; GFX906-NEXT: v_readlane_b32 s20, v39, 21
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s20
; GFX906-NEXT: ;;#ASMEND
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def s21
; GFX906-NEXT: ;;#ASMEND
+; GFX906-NEXT: v_writelane_b32 v39, s21, 24
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def s22
; GFX906-NEXT: ;;#ASMEND
+; GFX906-NEXT: v_writelane_b32 v39, s22, 25
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def s23
; GFX906-NEXT: ;;#ASMEND
+; GFX906-NEXT: v_writelane_b32 v39, s23, 26
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def s24
; GFX906-NEXT: ;;#ASMEND
+; GFX906-NEXT: v_writelane_b32 v39, s24, 27
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def s25
; GFX906-NEXT: ;;#ASMEND
+; GFX906-NEXT: v_writelane_b32 v39, s25, 28
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def s26
; GFX906-NEXT: ;;#ASMEND
+; GFX906-NEXT: v_writelane_b32 v39, s26, 29
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def s27
; GFX906-NEXT: ;;#ASMEND
+; GFX906-NEXT: v_writelane_b32 v39, s27, 30
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def s28
; GFX906-NEXT: ;;#ASMEND
+; GFX906-NEXT: v_writelane_b32 v39, s28, 31
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; def s29
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GFX906-NEXT: v_writelane_b32 v40, s21, 24
-; GFX906-NEXT: v_writelane_b32 v40, s22, 25
-; GFX906-NEXT: v_writelane_b32 v40, s23, 26
-; GFX906-NEXT: v_writelane_b32 v40, s24, 27
-; GFX906-NEXT: v_writelane_b32 v40, s25, 28
-; GFX906-NEXT: v_writelane_b32 v40, s26, 29
-; GFX906-NEXT: v_writelane_b32 v40, s27, 30
-; GFX906-NEXT: v_writelane_b32 v40, s28, 31
-; GFX906-NEXT: v_writelane_b32 v40, s29, 32
-; GFX906-NEXT: v_readlane_b32 s4, v40, 10
-; GFX906-NEXT: v_readlane_b32 s6, v40, 0
-; GFX906-NEXT: v_readlane_b32 s8, v40, 8
-; GFX906-NEXT: v_readlane_b32 s10, v40, 6
-; GFX906-NEXT: v_readlane_b32 s16, v40, 22
-; GFX906-NEXT: v_readlane_b32 s12, v40, 5
-; GFX906-NEXT: v_readlane_b32 s13, v40, 4
-; GFX906-NEXT: v_readlane_b32 s14, v40, 3
-; GFX906-NEXT: v_readlane_b32 s15, v40, 2
-; GFX906-NEXT: v_readlane_b32 s5, v40, 11
-; GFX906-NEXT: v_readlane_b32 s7, v40, 1
-; GFX906-NEXT: v_readlane_b32 s9, v40, 9
-; GFX906-NEXT: v_readlane_b32 s11, v40, 7
-; GFX906-NEXT: v_readlane_b32 s17, v40, 23
+; GFX906-NEXT: v_writelane_b32 v39, s29, 32
; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
+; GFX906-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX906-NEXT: s_mov_b64 exec, s[34:35]
+; GFX906-NEXT: v_readlane_b32 s4, v39, 10
+; GFX906-NEXT: v_readlane_b32 s6, v39, 0
+; GFX906-NEXT: v_readlane_b32 s8, v39, 8
+; GFX906-NEXT: v_readlane_b32 s10, v39, 6
+; GFX906-NEXT: v_readlane_b32 s16, v39, 22
+; GFX906-NEXT: v_readlane_b32 s12, v39, 5
+; GFX906-NEXT: v_mov_b32_e32 v31, v40
+; GFX906-NEXT: v_readlane_b32 s13, v39, 4
+; GFX906-NEXT: v_readlane_b32 s14, v39, 3
+; GFX906-NEXT: v_readlane_b32 s15, v39, 2
+; GFX906-NEXT: v_readlane_b32 s5, v39, 11
+; GFX906-NEXT: v_readlane_b32 s7, v39, 1
+; GFX906-NEXT: v_readlane_b32 s9, v39, 9
+; GFX906-NEXT: v_readlane_b32 s11, v39, 7
+; GFX906-NEXT: v_readlane_b32 s17, v39, 23
; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
+; GFX906-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX906-NEXT: s_mov_b64 exec, s[34:35]
-; GFX906-NEXT: v_readlane_b32 s21, v40, 24
+; GFX906-NEXT: s_waitcnt vmcnt(0)
+; GFX906-NEXT: v_readlane_b32 s4, v39, 10
+; GFX906-NEXT: v_readlane_b32 s6, v39, 0
+; GFX906-NEXT: v_readlane_b32 s8, v39, 8
+; GFX906-NEXT: v_readlane_b32 s10, v39, 6
+; GFX906-NEXT: v_readlane_b32 s16, v39, 22
+; GFX906-NEXT: v_readlane_b32 s5, v39, 11
+; GFX906-NEXT: v_readlane_b32 s7, v39, 1
+; GFX906-NEXT: v_readlane_b32 s9, v39, 9
+; GFX906-NEXT: v_readlane_b32 s11, v39, 7
+; GFX906-NEXT: v_readlane_b32 s12, v39, 5
+; GFX906-NEXT: v_readlane_b32 s13, v39, 4
+; GFX906-NEXT: v_readlane_b32 s14, v39, 3
+; GFX906-NEXT: v_readlane_b32 s15, v39, 2
+; GFX906-NEXT: v_mov_b32_e32 v31, v40
+; GFX906-NEXT: v_readlane_b32 s17, v39, 23
+; GFX906-NEXT: v_readlane_b32 s21, v39, 24
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s21
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s22, v40, 25
+; GFX906-NEXT: v_readlane_b32 s22, v39, 25
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s22
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s23, v40, 26
+; GFX906-NEXT: v_readlane_b32 s23, v39, 26
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s23
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s24, v40, 27
+; GFX906-NEXT: v_readlane_b32 s24, v39, 27
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s24
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s25, v40, 28
+; GFX906-NEXT: v_readlane_b32 s25, v39, 28
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s25
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s26, v40, 29
+; GFX906-NEXT: v_readlane_b32 s26, v39, 29
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s26
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s27, v40, 30
+; GFX906-NEXT: v_readlane_b32 s27, v39, 30
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s27
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s28, v40, 31
+; GFX906-NEXT: v_readlane_b32 s28, v39, 31
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s28
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: v_readlane_b32 s29, v40, 32
+; GFX906-NEXT: v_readlane_b32 s29, v39, 32
; GFX906-NEXT: ;;#ASMSTART
; GFX906-NEXT: ; use s29
; GFX906-NEXT: ;;#ASMEND
-; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GFX906-NEXT: v_readlane_b32 s4, v40, 10
-; GFX906-NEXT: v_readlane_b32 s6, v40, 0
-; GFX906-NEXT: v_readlane_b32 s8, v40, 8
-; GFX906-NEXT: v_readlane_b32 s10, v40, 6
-; GFX906-NEXT: v_readlane_b32 s16, v40, 22
-; GFX906-NEXT: v_readlane_b32 s5, v40, 11
-; GFX906-NEXT: v_readlane_b32 s7, v40, 1
-; GFX906-NEXT: v_readlane_b32 s9, v40, 9
-; GFX906-NEXT: v_readlane_b32 s11, v40, 7
-; GFX906-NEXT: v_readlane_b32 s12, v40, 5
-; GFX906-NEXT: v_readlane_b32 s13, v40, 4
-; GFX906-NEXT: v_readlane_b32 s14, v40, 3
-; GFX906-NEXT: v_readlane_b32 s15, v40, 2
-; GFX906-NEXT: v_readlane_b32 s17, v40, 23
-; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GFX906-NEXT: s_mov_b64 exec, s[34:35]
; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX906-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload
; GFX906-NEXT: v_readlane_b32 s31, v41, 1
; GFX906-NEXT: v_readlane_b32 s30, v41, 0
-; GFX906-NEXT: ; kill: killed $vgpr40
; GFX906-NEXT: v_readlane_b32 s4, v41, 4
; GFX906-NEXT: v_readlane_b32 s34, v41, 2
; GFX906-NEXT: v_readlane_b32 s35, v41, 3
@@ -360,12 +360,11 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX906-NEXT: s_waitcnt vmcnt(0)
; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; GFX906-NEXT: s_waitcnt vmcnt(0)
+; GFX906-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX906-NEXT: s_xor_saveexec_b64 s[6:7], -1
-; GFX906-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX906-NEXT: s_mov_b64 exec, -1
-; GFX906-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX906-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
+; GFX906-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
; GFX906-NEXT: s_mov_b64 exec, s[6:7]
; GFX906-NEXT: s_addk_i32 s32, 0xd800
; GFX906-NEXT: s_mov_b32 s33, s4
@@ -378,346 +377,346 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX908-NEXT: s_mov_b32 s16, s33
; GFX908-NEXT: s_mov_b32 s33, s32
; GFX908-NEXT: s_xor_saveexec_b64 s[18:19], -1
-; GFX908-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill
-; GFX908-NEXT: s_mov_b64 exec, -1
-; GFX908-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
; GFX908-NEXT: s_mov_b64 exec, s[18:19]
-; GFX908-NEXT: v_mov_b32_e32 v3, s16
-; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill
-; GFX908-NEXT: v_mov_b32_e32 v3, s34
-; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill
-; GFX908-NEXT: v_mov_b32_e32 v3, s35
-; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill
+; GFX908-NEXT: v_mov_b32_e32 v2, s16
+; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill
+; GFX908-NEXT: v_mov_b32_e32 v2, s34
+; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill
+; GFX908-NEXT: v_mov_b32_e32 v2, s35
+; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill
; GFX908-NEXT: s_addk_i32 s32, 0x2c00
+; GFX908-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX908-NEXT: s_mov_b64 s[16:17], exec
; GFX908-NEXT: s_mov_b64 exec, 1
-; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:172
+; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:168
; GFX908-NEXT: v_writelane_b32 v2, s30, 0
-; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:172
+; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:168
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_mov_b64 exec, s[16:17]
; GFX908-NEXT: s_mov_b64 s[16:17], exec
; GFX908-NEXT: s_mov_b64 exec, 1
-; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:172
+; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:168
; GFX908-NEXT: v_writelane_b32 v2, s31, 0
-; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:172
+; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:168
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_mov_b64 exec, s[16:17]
-; GFX908-NEXT: ; implicit-def: $vgpr2
+; GFX908-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane
; GFX908-NEXT: s_mov_b32 s21, s15
-; GFX908-NEXT: v_writelane_b32 v2, s6, 0
-; GFX908-NEXT: v_writelane_b32 v2, s7, 1
-; GFX908-NEXT: v_writelane_b32 v2, s21, 2
+; GFX908-NEXT: v_writelane_b32 v39, s6, 0
+; GFX908-NEXT: v_writelane_b32 v39, s7, 1
+; GFX908-NEXT: v_writelane_b32 v39, s21, 2
; GFX908-NEXT: s_mov_b32 s22, s14
-; GFX908-NEXT: v_writelane_b32 v2, s22, 3
+; GFX908-NEXT: v_writelane_b32 v39, s22, 3
; GFX908-NEXT: s_mov_b32 s23, s13
-; GFX908-NEXT: v_writelane_b32 v2, s23, 4
+; GFX908-NEXT: v_writelane_b32 v39, s23, 4
; GFX908-NEXT: s_mov_b32 s24, s12
-; GFX908-NEXT: v_writelane_b32 v2, s24, 5
+; GFX908-NEXT: v_writelane_b32 v39, s24, 5
; GFX908-NEXT: s_mov_b64 s[26:27], s[10:11]
-; GFX908-NEXT: v_writelane_b32 v2, s26, 6
-; GFX908-NEXT: v_writelane_b32 v2, s27, 7
-; GFX908-NEXT: v_writelane_b32 v2, s8, 8
-; GFX908-NEXT: v_writelane_b32 v2, s9, 9
-; GFX908-NEXT: v_writelane_b32 v2, s4, 10
+; GFX908-NEXT: v_writelane_b32 v39, s26, 6
+; GFX908-NEXT: v_writelane_b32 v39, s27, 7
+; GFX908-NEXT: v_writelane_b32 v39, s8, 8
+; GFX908-NEXT: v_writelane_b32 v39, s9, 9
+; GFX908-NEXT: v_writelane_b32 v39, s4, 10
; GFX908-NEXT: v_mov_b32_e32 v32, v31
-; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
; GFX908-NEXT: s_nop 0
-; GFX908-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
-; GFX908-NEXT: v_writelane_b32 v2, s5, 11
+; GFX908-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
+; GFX908-NEXT: v_writelane_b32 v39, s5, 11
; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GFX908-NEXT: v_mov_b32_e32 v33, v2
; GFX908-NEXT: s_mov_b64 exec, s[34:35]
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def v[0:31]
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
; GFX908-NEXT: s_nop 0
-; GFX908-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill
-; GFX908-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill
+; GFX908-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def v40
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def s11
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GFX908-NEXT: v_mov_b32_e32 v40, v33
-; GFX908-NEXT: s_mov_b64 exec, s[34:35]
-; GFX908-NEXT: v_writelane_b32 v40, s11, 12
+; GFX908-NEXT: v_writelane_b32 v39, s11, 12
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def s12
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_writelane_b32 v40, s12, 13
+; GFX908-NEXT: v_writelane_b32 v39, s12, 13
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def s13
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_writelane_b32 v40, s13, 14
+; GFX908-NEXT: v_writelane_b32 v39, s13, 14
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def s14
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_writelane_b32 v40, s14, 15
+; GFX908-NEXT: v_writelane_b32 v39, s14, 15
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def s15
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_writelane_b32 v40, s15, 16
+; GFX908-NEXT: v_writelane_b32 v39, s15, 16
; GFX908-NEXT: s_getpc_b64 s[10:11]
; GFX908-NEXT: s_add_u32 s10, s10, foo at gotpcrel32@lo+4
; GFX908-NEXT: s_addc_u32 s11, s11, foo at gotpcrel32@hi+12
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def s16
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_writelane_b32 v40, s16, 17
+; GFX908-NEXT: v_writelane_b32 v39, s16, 17
; GFX908-NEXT: s_load_dwordx2 s[10:11], s[10:11], 0x0
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def s17
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_writelane_b32 v40, s17, 18
+; GFX908-NEXT: v_writelane_b32 v39, s17, 18
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def s18
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_writelane_b32 v40, s18, 19
+; GFX908-NEXT: v_writelane_b32 v39, s18, 19
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def s19
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_writelane_b32 v40, s19, 20
+; GFX908-NEXT: v_writelane_b32 v39, s19, 20
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def s20
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_writelane_b32 v40, s20, 21
+; GFX908-NEXT: v_writelane_b32 v39, s20, 21
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
-; GFX908-NEXT: v_writelane_b32 v40, s10, 22
-; GFX908-NEXT: v_writelane_b32 v40, s11, 23
+; GFX908-NEXT: v_writelane_b32 v39, s10, 22
+; GFX908-NEXT: v_writelane_b32 v39, s11, 23
; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
; GFX908-NEXT: s_mov_b64 exec, s[34:35]
-; GFX908-NEXT: v_readlane_b32 s16, v40, 22
+; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
+; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
+; GFX908-NEXT: s_mov_b64 exec, s[34:35]
+; GFX908-NEXT: v_readlane_b32 s16, v39, 22
; GFX908-NEXT: s_mov_b32 s12, s24
; GFX908-NEXT: s_mov_b32 s13, s23
; GFX908-NEXT: s_mov_b32 s14, s22
; GFX908-NEXT: v_mov_b32_e32 v31, v32
; GFX908-NEXT: s_mov_b32 s15, s21
; GFX908-NEXT: s_mov_b64 s[10:11], s[26:27]
-; GFX908-NEXT: v_readlane_b32 s17, v40, 23
-; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GFX908-NEXT: v_readlane_b32 s17, v39, 23
+; GFX908-NEXT: v_mov_b32_e32 v40, v32
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
+; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
; GFX908-NEXT: s_mov_b64 exec, s[34:35]
-; GFX908-NEXT: v_readlane_b32 s11, v40, 12
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: v_readlane_b32 s11, v39, 12
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s11
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s12, v40, 13
+; GFX908-NEXT: v_readlane_b32 s12, v39, 13
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s12
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s13, v40, 14
+; GFX908-NEXT: v_readlane_b32 s13, v39, 14
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s13
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s14, v40, 15
+; GFX908-NEXT: v_readlane_b32 s14, v39, 15
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s14
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s15, v40, 16
+; GFX908-NEXT: v_readlane_b32 s15, v39, 16
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s15
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s16, v40, 17
+; GFX908-NEXT: v_readlane_b32 s16, v39, 17
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s16
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s17, v40, 18
+; GFX908-NEXT: v_readlane_b32 s17, v39, 18
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s17
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s18, v40, 19
+; GFX908-NEXT: v_readlane_b32 s18, v39, 19
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s18
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s19, v40, 20
+; GFX908-NEXT: v_readlane_b32 s19, v39, 20
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s19
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s20, v40, 21
+; GFX908-NEXT: v_readlane_b32 s20, v39, 21
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s20
; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def s21
; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: v_writelane_b32 v39, s21, 24
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def s22
; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: v_writelane_b32 v39, s22, 25
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def s23
; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: v_writelane_b32 v39, s23, 26
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def s24
; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: v_writelane_b32 v39, s24, 27
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def s25
; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: v_writelane_b32 v39, s25, 28
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def s26
; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: v_writelane_b32 v39, s26, 29
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def s27
; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: v_writelane_b32 v39, s27, 30
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def s28
; GFX908-NEXT: ;;#ASMEND
+; GFX908-NEXT: v_writelane_b32 v39, s28, 31
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def s29
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; GFX908-NEXT: v_writelane_b32 v40, s21, 24
-; GFX908-NEXT: v_writelane_b32 v40, s22, 25
-; GFX908-NEXT: v_writelane_b32 v40, s23, 26
-; GFX908-NEXT: v_writelane_b32 v40, s24, 27
-; GFX908-NEXT: v_writelane_b32 v40, s25, 28
-; GFX908-NEXT: v_writelane_b32 v40, s26, 29
-; GFX908-NEXT: v_writelane_b32 v40, s27, 30
-; GFX908-NEXT: v_writelane_b32 v40, s28, 31
-; GFX908-NEXT: v_writelane_b32 v40, s29, 32
-; GFX908-NEXT: v_readlane_b32 s4, v40, 10
-; GFX908-NEXT: v_readlane_b32 s6, v40, 0
-; GFX908-NEXT: v_readlane_b32 s8, v40, 8
-; GFX908-NEXT: v_readlane_b32 s10, v40, 6
-; GFX908-NEXT: v_readlane_b32 s16, v40, 22
-; GFX908-NEXT: v_readlane_b32 s12, v40, 5
-; GFX908-NEXT: v_readlane_b32 s13, v40, 4
-; GFX908-NEXT: v_readlane_b32 s14, v40, 3
-; GFX908-NEXT: v_readlane_b32 s15, v40, 2
-; GFX908-NEXT: v_readlane_b32 s5, v40, 11
-; GFX908-NEXT: v_readlane_b32 s7, v40, 1
-; GFX908-NEXT: v_readlane_b32 s9, v40, 9
-; GFX908-NEXT: v_readlane_b32 s11, v40, 7
-; GFX908-NEXT: v_readlane_b32 s17, v40, 23
+; GFX908-NEXT: v_writelane_b32 v39, s29, 32
; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
+; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
; GFX908-NEXT: s_mov_b64 exec, s[34:35]
+; GFX908-NEXT: v_readlane_b32 s4, v39, 10
+; GFX908-NEXT: v_readlane_b32 s6, v39, 0
+; GFX908-NEXT: v_readlane_b32 s8, v39, 8
+; GFX908-NEXT: v_readlane_b32 s10, v39, 6
+; GFX908-NEXT: v_readlane_b32 s16, v39, 22
+; GFX908-NEXT: v_readlane_b32 s12, v39, 5
+; GFX908-NEXT: v_mov_b32_e32 v31, v40
+; GFX908-NEXT: v_readlane_b32 s13, v39, 4
+; GFX908-NEXT: v_readlane_b32 s14, v39, 3
+; GFX908-NEXT: v_readlane_b32 s15, v39, 2
+; GFX908-NEXT: v_readlane_b32 s5, v39, 11
+; GFX908-NEXT: v_readlane_b32 s7, v39, 1
+; GFX908-NEXT: v_readlane_b32 s9, v39, 9
+; GFX908-NEXT: v_readlane_b32 s11, v39, 7
+; GFX908-NEXT: v_readlane_b32 s17, v39, 23
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
+; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
; GFX908-NEXT: s_mov_b64 exec, s[34:35]
-; GFX908-NEXT: v_readlane_b32 s21, v40, 24
+; GFX908-NEXT: s_waitcnt vmcnt(0)
+; GFX908-NEXT: v_readlane_b32 s4, v39, 10
+; GFX908-NEXT: v_readlane_b32 s6, v39, 0
+; GFX908-NEXT: v_readlane_b32 s8, v39, 8
+; GFX908-NEXT: v_readlane_b32 s10, v39, 6
+; GFX908-NEXT: v_readlane_b32 s16, v39, 22
+; GFX908-NEXT: v_readlane_b32 s5, v39, 11
+; GFX908-NEXT: v_readlane_b32 s7, v39, 1
+; GFX908-NEXT: v_readlane_b32 s9, v39, 9
+; GFX908-NEXT: v_readlane_b32 s11, v39, 7
+; GFX908-NEXT: v_readlane_b32 s12, v39, 5
+; GFX908-NEXT: v_readlane_b32 s13, v39, 4
+; GFX908-NEXT: v_readlane_b32 s14, v39, 3
+; GFX908-NEXT: v_readlane_b32 s15, v39, 2
+; GFX908-NEXT: v_mov_b32_e32 v31, v40
+; GFX908-NEXT: v_readlane_b32 s17, v39, 23
+; GFX908-NEXT: v_readlane_b32 s21, v39, 24
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s21
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s22, v40, 25
+; GFX908-NEXT: v_readlane_b32 s22, v39, 25
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s22
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s23, v40, 26
+; GFX908-NEXT: v_readlane_b32 s23, v39, 26
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s23
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s24, v40, 27
+; GFX908-NEXT: v_readlane_b32 s24, v39, 27
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s24
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s25, v40, 28
+; GFX908-NEXT: v_readlane_b32 s25, v39, 28
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s25
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s26, v40, 29
+; GFX908-NEXT: v_readlane_b32 s26, v39, 29
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s26
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s27, v40, 30
+; GFX908-NEXT: v_readlane_b32 s27, v39, 30
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s27
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s28, v40, 31
+; GFX908-NEXT: v_readlane_b32 s28, v39, 31
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s28
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_readlane_b32 s29, v40, 32
+; GFX908-NEXT: v_readlane_b32 s29, v39, 32
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; use s29
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; GFX908-NEXT: v_readlane_b32 s4, v40, 10
-; GFX908-NEXT: v_readlane_b32 s6, v40, 0
-; GFX908-NEXT: v_readlane_b32 s8, v40, 8
-; GFX908-NEXT: v_readlane_b32 s10, v40, 6
-; GFX908-NEXT: v_readlane_b32 s16, v40, 22
-; GFX908-NEXT: v_readlane_b32 s5, v40, 11
-; GFX908-NEXT: v_readlane_b32 s7, v40, 1
-; GFX908-NEXT: v_readlane_b32 s9, v40, 9
-; GFX908-NEXT: v_readlane_b32 s11, v40, 7
-; GFX908-NEXT: v_readlane_b32 s12, v40, 5
-; GFX908-NEXT: v_readlane_b32 s13, v40, 4
-; GFX908-NEXT: v_readlane_b32 s14, v40, 3
-; GFX908-NEXT: v_readlane_b32 s15, v40, 2
-; GFX908-NEXT: v_readlane_b32 s17, v40, 23
-; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GFX908-NEXT: s_mov_b64 exec, s[34:35]
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload
; GFX908-NEXT: s_mov_b64 s[4:5], exec
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[30:33] offset:112
@@ -737,37 +736,34 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_mov_b64 exec, 1
-; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:172
-; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:168
+; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: v_readlane_b32 s31, v0, 0
-; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:172
+; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_mov_b64 exec, s[4:5]
; GFX908-NEXT: s_mov_b64 s[4:5], exec
; GFX908-NEXT: s_mov_b64 exec, 1
-; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:172
-; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:168
+; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: v_readlane_b32 s30, v0, 0
-; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:172
+; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_mov_b64 exec, s[4:5]
-; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload
-; GFX908-NEXT: ; kill: killed $vgpr40
+; GFX908-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: v_readfirstlane_b32 s4, v0
-; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: v_readfirstlane_b32 s34, v0
-; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: v_readfirstlane_b32 s35, v0
; GFX908-NEXT: s_xor_saveexec_b64 s[6:7], -1
-; GFX908-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload
-; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload
-; GFX908-NEXT: s_mov_b64 exec, -1
-; GFX908-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload
+; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
; GFX908-NEXT: s_mov_b64 exec, s[6:7]
; GFX908-NEXT: s_addk_i32 s32, 0xd400
; GFX908-NEXT: s_mov_b32 s33, s4
diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir b/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir
index 447a8bf9956f3e..fe01728c005633 100644
--- a/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir
+++ b/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir
@@ -1,5 +1,5 @@
-# RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs=0 -start-before=greedy,1 -stop-after=virtregrewriter,1 %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s
-# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,1 %s -o /dev/null 2>&1 | FileCheck -check-prefixes=ERR,VERIFIER %s
+# RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs=0 -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s
+# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -o /dev/null 2>&1 | FileCheck -check-prefixes=ERR,VERIFIER %s
# FIXME: We should not produce a verifier error after erroring
diff --git a/llvm/test/CodeGen/AMDGPU/rem_i128.ll b/llvm/test/CodeGen/AMDGPU/rem_i128.ll
index 19cc60963e9007..f7f5bd56fa6f16 100644
--- a/llvm/test/CodeGen/AMDGPU/rem_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/rem_i128.ll
@@ -243,350 +243,345 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0: ; %bb.0: ; %_udiv-special-cases
; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v4
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1
+; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v7
+; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
+; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT: v_mov_b32_e32 v18, v1
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v5
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v4
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v3
+; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v3
; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX9-O0-NEXT: s_mov_b32 s4, 63
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v11
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v12
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v13
-; GFX9-O0-NEXT: v_ashrrev_i64 v[3:4], s4, v[3:4]
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_ashrrev_i64 v[2:3], s4, v[2:3]
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v11
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v12
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v13
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0
-; GFX9-O0-NEXT: s_waitcnt vmcnt(4)
-; GFX9-O0-NEXT: v_writelane_b32 v0, s6, 0
-; GFX9-O0-NEXT: v_writelane_b32 v0, s7, 1
+; GFX9-O0-NEXT: ; implicit-def: $vgpr30 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 0
+; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 1
; GFX9-O0-NEXT: s_mov_b32 s10, s6
-; GFX9-O0-NEXT: v_writelane_b32 v0, s10, 2
+; GFX9-O0-NEXT: v_writelane_b32 v30, s10, 2
; GFX9-O0-NEXT: s_mov_b32 s11, s7
-; GFX9-O0-NEXT: v_writelane_b32 v0, s11, 3
-; GFX9-O0-NEXT: v_sub_co_u32_e32 v10, vcc, s10, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v4, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s10
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v1, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, s11
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v2, vcc
+; GFX9-O0-NEXT: v_writelane_b32 v30, s11, 3
+; GFX9-O0-NEXT: v_sub_co_u32_e32 v9, vcc, s10, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v4, v3, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s10
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v0, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, s11
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v6, v1, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
+; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10
; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7]
-; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[12:13], s[4:5]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[4:5]
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[4:5]
+; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[11:12], s[4:5]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[4:5]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5]
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v16, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v4
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v7, s[4:5]
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5]
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5]
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5]
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v8
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v9
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v17
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v19
-; GFX9-O0-NEXT: v_sub_co_u32_e32 v14, vcc, s10, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, s11
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v11, v10, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, s10
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v8, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, s11
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v13, vcc, v13, v9, vcc
+; GFX9-O0-NEXT: v_sub_co_u32_e32 v13, vcc, s10, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, s11
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v10, v9, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, s10
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v7, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, s11
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v8, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v15
+; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v11
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14
; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7]
-; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[18:19], s[4:5]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, v10, v12, s[4:5]
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v14
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v12, s[4:5]
+; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[17:18], s[4:5]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[4:5]
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v13
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v11, s[4:5]
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v10
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v18, v9
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v13
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v12
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, v9, v13, s[4:5]
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v8, v11, s[4:5]
+; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v12
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v11
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v8, v12, s[4:5]
+; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[4:5]
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v7
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v9
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v5
-; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v4
+; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v15
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17
-; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v12
+; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11
-; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v10
+; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v17
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v19
-; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v12
+; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11
-; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v10
+; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v17
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v19
-; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v6
+; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v5
-; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v4
+; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v15
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17
-; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v12
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v19
-; GFX9-O0-NEXT: v_or_b32_e64 v15, v13, v14
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v11
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18
-; GFX9-O0-NEXT: v_or_b32_e64 v13, v13, v14
-; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15
-; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[13:14], s[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17
-; GFX9-O0-NEXT: v_or_b32_e64 v15, v13, v14
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16
-; GFX9-O0-NEXT: v_or_b32_e64 v13, v13, v14
-; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15
-; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[13:14], s[6:7]
+; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v18
+; GFX9-O0-NEXT: v_or_b32_e64 v14, v12, v13
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v10
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v17
+; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v13
+; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v14
+; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[12:13], s[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v16
+; GFX9-O0-NEXT: v_or_b32_e64 v14, v12, v13
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v15
+; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v13
+; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v14
+; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[12:13], s[6:7]
; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[11:12], s[8:9]
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v8, v8
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[10:11], s[8:9]
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7
; GFX9-O0-NEXT: s_mov_b32 s13, 32
-; GFX9-O0-NEXT: v_add_u32_e64 v8, v8, s13
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v9, v9
-; GFX9-O0-NEXT: v_min_u32_e64 v8, v8, v9
+; GFX9-O0-NEXT: v_add_u32_e64 v7, v7, s13
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v8, v8
+; GFX9-O0-NEXT: v_min_u32_e64 v7, v7, v8
; GFX9-O0-NEXT: s_mov_b32 s12, 0
; GFX9-O0-NEXT: ; implicit-def: $sgpr14
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, s12
-; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v11
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v9
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7
-; GFX9-O0-NEXT: v_add_u32_e64 v7, v7, s13
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v10
-; GFX9-O0-NEXT: v_min_u32_e64 v13, v7, v10
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, s12
+; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v10
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6
+; GFX9-O0-NEXT: v_add_u32_e64 v6, v6, s13
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v9, v9
+; GFX9-O0-NEXT: v_min_u32_e64 v12, v6, v9
; GFX9-O0-NEXT: ; implicit-def: $sgpr14
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, s12
-; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, s12
+; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v6
; GFX9-O0-NEXT: s_mov_b64 s[14:15], 64
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v13
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v12
; GFX9-O0-NEXT: s_mov_b32 s16, s14
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v13
; GFX9-O0-NEXT: s_mov_b32 s18, s15
-; GFX9-O0-NEXT: v_add_co_u32_e64 v10, s[16:17], v10, s16
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, s18
-; GFX9-O0-NEXT: v_addc_co_u32_e64 v7, s[16:17], v7, v11, s[16:17]
-; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v12, s[8:9]
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v10
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, v8, v9, s[8:9]
+; GFX9-O0-NEXT: v_add_co_u32_e64 v9, s[16:17], v9, s16
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, s18
+; GFX9-O0-NEXT: v_addc_co_u32_e64 v6, s[16:17], v6, v10, s[16:17]
+; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v11, s[8:9]
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v9
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v7, v8, s[8:9]
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v7
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v6
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[5:6], s[8:9]
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[4:5], s[8:9]
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v0
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s13
; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v1
-; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s13
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v2
-; GFX9-O0-NEXT: v_min_u32_e64 v6, v5, v6
+; GFX9-O0-NEXT: v_min_u32_e64 v5, v4, v5
; GFX9-O0-NEXT: ; implicit-def: $sgpr16
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s12
-; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v3
-; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s13
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v11, v4
-; GFX9-O0-NEXT: v_min_u32_e64 v12, v5, v11
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s12
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v2
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s13
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v3
+; GFX9-O0-NEXT: v_min_u32_e64 v11, v4, v10
; GFX9-O0-NEXT: ; implicit-def: $sgpr13
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s12
-; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v12
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s12
+; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v11
; GFX9-O0-NEXT: s_mov_b32 s12, s14
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12
; GFX9-O0-NEXT: s_mov_b32 s14, s15
-; GFX9-O0-NEXT: v_add_co_u32_e64 v11, s[12:13], v11, s12
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, s14
-; GFX9-O0-NEXT: v_addc_co_u32_e64 v5, s[12:13], v5, v12, s[12:13]
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[8:9]
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[8:9]
+; GFX9-O0-NEXT: v_add_co_u32_e64 v10, s[12:13], v10, s12
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, s14
+; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[12:13], v4, v11, s[12:13]
+; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v11
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[8:9]
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[8:9]
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8
+; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10
-; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, v5, v8
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v9, vcc, v6, v7, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, s10
+; GFX9-O0-NEXT: v_sub_co_u32_e32 v4, vcc, v4, v7
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v5, v6, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v6, s10
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v6, v7, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, s11
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, s10
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v6, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v6, s11
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v6, v7, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
+; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
+; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
-; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15]
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9]
-; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7
-; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v7, 1
+; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
+; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
+; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, 1
; GFX9-O0-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9]
; GFX9-O0-NEXT: s_mov_b64 s[4:5], -1
; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
; GFX9-O0-NEXT: s_mov_b32 s14, s13
-; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14
+; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
-; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v9
-; GFX9-O0-NEXT: v_or_b32_e64 v7, v7, v10
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
-; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[5:6], s[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v2, v5, s[8:9]
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, s10
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[8:9]
+; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8
+; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v9
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7
+; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[8:9]
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[8:9]
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
-; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[8:9]
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s10
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[8:9]
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[8:9]
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9]
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4
; GFX9-O0-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 s[4:5], exec
-; GFX9-O0-NEXT: v_writelane_b32 v0, s4, 4
-; GFX9-O0-NEXT: v_writelane_b32 v0, s5, 5
+; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 4
+; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 5
; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
; GFX9-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
@@ -594,67 +589,66 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_branch .LBB0_8
; GFX9-O0-NEXT: .LBB0_1: ; %Flow
; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s4, v0, 6
-; GFX9-O0-NEXT: v_readlane_b32 s5, v0, 7
+; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 6
+; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 7
; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-O0-NEXT: ; %bb.2: ; %Flow
-; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_waitcnt vmcnt(7)
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(7)
-; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(7)
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(7)
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(7)
-; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(7)
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(7)
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(7)
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB0_5
; GFX9-O0-NEXT: .LBB0_3: ; %Flow2
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s4, v4, 4
-; GFX9-O0-NEXT: v_readlane_b32 s5, v4, 5
-; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
-; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
+; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 4
+; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 5
+; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: s_nop 0
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB0_9
; GFX9-O0-NEXT: .LBB0_4: ; %udiv-loop-exit
-; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b32 s4, 1
; GFX9-O0-NEXT: s_waitcnt vmcnt(2)
; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s4, v[0:1]
@@ -687,123 +681,117 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB0_3
; GFX9-O0-NEXT: .LBB0_5: ; %Flow1
+; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s4, v8, 8
-; GFX9-O0-NEXT: v_readlane_b32 s5, v8, 9
+; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 8
+; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 9
; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
-; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: s_nop 0
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB0_4
; GFX9-O0-NEXT: .LBB0_6: ; %udiv-do-while
; GFX9-O0-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s6, v16, 10
-; GFX9-O0-NEXT: v_readlane_b32 s7, v16, 11
-; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 10
+; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 11
; GFX9-O0-NEXT: s_mov_b32 s4, 63
-; GFX9-O0-NEXT: s_waitcnt vmcnt(16)
-; GFX9-O0-NEXT: v_lshrrev_b64 v[29:30], s4, v[2:3]
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v30
+; GFX9-O0-NEXT: v_lshrrev_b64 v[28:29], s4, v[2:3]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v29
; GFX9-O0-NEXT: s_mov_b32 s5, 1
-; GFX9-O0-NEXT: v_lshlrev_b64 v[23:24], s5, v[23:24]
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v24
+; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], s5, v[22:23]
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23
; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v29
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23
-; GFX9-O0-NEXT: v_or_b32_e64 v23, v5, v10
-; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v24, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[29:30], s5, v[2:3]
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v22
+; GFX9-O0-NEXT: v_or_b32_e64 v22, v5, v10
+; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v23, v4
+; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[2:3]
; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], s4, v[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v30
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v29
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5
; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v29
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v28
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
; GFX9-O0-NEXT: v_or_b32_e64 v4, v3, v4
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2
; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s5, v[0:1]
-; GFX9-O0-NEXT: v_lshlrev_b64 v[29:30], s5, v[6:7]
+; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[6:7]
; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1]
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v30
-; GFX9-O0-NEXT: s_waitcnt vmcnt(10)
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v29
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v27
; GFX9-O0-NEXT: v_or3_b32 v6, v6, v7, v10
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, v29
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v27
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v28
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v26
; GFX9-O0-NEXT: v_or3_b32 v0, v0, v1, v7
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3
-; GFX9-O0-NEXT: s_waitcnt vmcnt(8)
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v26
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v25
; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v25
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v24
; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v24
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v22
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v15
; GFX9-O0-NEXT: v_sub_co_u32_e32 v13, vcc, v13, v6
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v10, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v4, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc
@@ -823,22 +811,22 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12
; GFX9-O0-NEXT: v_mov_b32_e32 v12, 0
; GFX9-O0-NEXT: v_mov_b32_e32 v13, 0
-; GFX9-O0-NEXT: v_mov_b32_e32 v23, v22
-; GFX9-O0-NEXT: v_and_b32_e64 v23, v7, v23
-; GFX9-O0-NEXT: v_and_b32_e64 v21, v11, v21
-; GFX9-O0-NEXT: ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v22, v23
-; GFX9-O0-NEXT: v_mov_b32_e32 v23, v20
-; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v23
-; GFX9-O0-NEXT: v_and_b32_e64 v23, v11, v19
-; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v24, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v20, v23
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v24
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v21
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22
-; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v20
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v19, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v22, v21
+; GFX9-O0-NEXT: v_and_b32_e64 v22, v7, v22
+; GFX9-O0-NEXT: v_and_b32_e64 v20, v11, v20
+; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v21, v22
+; GFX9-O0-NEXT: v_mov_b32_e32 v22, v19
+; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v22
+; GFX9-O0-NEXT: v_and_b32_e64 v22, v11, v18
+; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v23, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v19, v22
+; GFX9-O0-NEXT: v_mov_b32_e32 v18, v23
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21
+; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v19
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v18, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v11, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v7, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
@@ -854,149 +842,149 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1
; GFX9-O0-NEXT: s_mov_b32 s5, s8
; GFX9-O0-NEXT: s_mov_b32 s4, s9
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, s5
-; GFX9-O0-NEXT: v_add_co_u32_e32 v20, vcc, v11, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5
+; GFX9-O0-NEXT: v_add_co_u32_e32 v19, vcc, v11, v16
; GFX9-O0-NEXT: v_mov_b32_e32 v11, s4
; GFX9-O0-NEXT: v_addc_co_u32_e32 v9, vcc, v9, v11, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v17, vcc, v10, v11, vcc
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v10, v11, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v10, s4
; GFX9-O0-NEXT: v_addc_co_u32_e32 v8, vcc, v8, v10, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v21, v9
+; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v20, v9
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v8
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v20
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v21
-; GFX9-O0-NEXT: v_mov_b32_e32 v22, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v21
-; GFX9-O0-NEXT: v_or_b32_e64 v19, v19, v22
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v20
-; GFX9-O0-NEXT: v_or_b32_e64 v17, v17, v18
-; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v19
-; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[17:18], v[12:13]
+; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v19
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20
+; GFX9-O0-NEXT: v_mov_b32_e32 v21, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v18, v20
+; GFX9-O0-NEXT: v_or_b32_e64 v18, v18, v21
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v19
+; GFX9-O0-NEXT: v_or_b32_e64 v16, v16, v17
+; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v18
+; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13]
; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v3
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v2
+; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v0
; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v0
+; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v15
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v14
; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v15
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v14
+; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v13
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v12
; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v13
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v12
+; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX9-O0-NEXT: v_writelane_b32 v16, s6, 6
-; GFX9-O0-NEXT: v_writelane_b32 v16, s7, 7
+; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 6
+; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 7
; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX9-O0-NEXT: v_writelane_b32 v16, s6, 10
-; GFX9-O0-NEXT: v_writelane_b32 v16, s7, 11
+; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 10
+; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 11
; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
-; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_andn2_b64 exec, exec, s[4:5]
; GFX9-O0-NEXT: s_cbranch_execnz .LBB0_6
; GFX9-O0-NEXT: s_branch .LBB0_1
; GFX9-O0-NEXT: .LBB0_7: ; %udiv-preheader
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
-; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
; GFX9-O0-NEXT: s_waitcnt vmcnt(10)
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[21:22]
+; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[20:21]
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7
; GFX9-O0-NEXT: s_mov_b32 s6, 64
; GFX9-O0-NEXT: v_sub_u32_e64 v12, s6, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[23:24], v12, v[19:20]
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v24
+; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], v12, v[18:19]
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v23
; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v12
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v23
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22
; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7
; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v4, s6
; GFX9-O0-NEXT: v_sub_u32_e64 v5, v4, s6
-; GFX9-O0-NEXT: v_lshrrev_b64 v[23:24], v5, v[19:20]
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v24
+; GFX9-O0-NEXT: v_lshrrev_b64 v[22:23], v5, v[18:19]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[4:5]
; GFX9-O0-NEXT: s_mov_b32 s6, 0
; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v4, s6
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v22
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v21
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[6:7]
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v23
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v22
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v20
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[6:7]
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[19:20]
+; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[18:19]
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v5
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0
; GFX9-O0-NEXT: s_mov_b32 s8, s7
@@ -1015,12 +1003,12 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1
; GFX9-O0-NEXT: s_mov_b32 s5, s8
; GFX9-O0-NEXT: s_mov_b32 s4, s9
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, s5
-; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, s4
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v17, vcc, v15, v17, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5
+; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, s4
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v15, v16, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5
; GFX9-O0-NEXT: v_addc_co_u32_e32 v14, vcc, v14, v15, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v15, s4
@@ -1032,429 +1020,428 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7]
; GFX9-O0-NEXT: v_mov_b32_e32 v15, s9
; GFX9-O0-NEXT: v_mov_b32_e32 v14, s8
; GFX9-O0-NEXT: v_mov_b32_e32 v13, s7
; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6
-; GFX9-O0-NEXT: v_writelane_b32 v16, s4, 10
-; GFX9-O0-NEXT: v_writelane_b32 v16, s5, 11
+; GFX9-O0-NEXT: s_waitcnt vmcnt(4)
+; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 10
+; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 11
; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
-; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB0_6
; GFX9-O0-NEXT: .LBB0_8: ; %udiv-bb1
+; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
-; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
; GFX9-O0-NEXT: s_mov_b32 s5, s6
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
+; GFX9-O0-NEXT: s_waitcnt vmcnt(2)
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0
; GFX9-O0-NEXT: s_mov_b32 s4, s7
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0
; GFX9-O0-NEXT: s_mov_b32 s8, s6
; GFX9-O0-NEXT: s_mov_b32 s9, s7
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5
-; GFX9-O0-NEXT: v_add_co_u32_e32 v9, vcc, v4, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s4
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v5, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v5, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s9
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5
+; GFX9-O0-NEXT: v_add_co_u32_e32 v8, vcc, v3, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s4
+; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v4, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v2
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b32 s4, 0x7f
-; GFX9-O0-NEXT: v_sub_u32_e64 v3, s4, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[5:6], v3, v[11:12]
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v6
+; GFX9-O0-NEXT: v_sub_u32_e64 v2, s4, v3
+; GFX9-O0-NEXT: v_lshlrev_b64 v[4:5], v2, v[10:11]
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5
; GFX9-O0-NEXT: s_mov_b32 s4, 64
-; GFX9-O0-NEXT: v_sub_u32_e64 v14, s4, v3
-; GFX9-O0-NEXT: v_lshrrev_b64 v[14:15], v14, v[7:8]
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v15
-; GFX9-O0-NEXT: v_or_b32_e64 v13, v13, v16
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v13
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v6
-; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v3, s4
+; GFX9-O0-NEXT: v_sub_u32_e64 v13, s4, v2
+; GFX9-O0-NEXT: v_lshrrev_b64 v[13:14], v13, v[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v14
+; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v15
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13
+; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5
+; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v2, s4
; GFX9-O0-NEXT: s_mov_b32 s10, 63
-; GFX9-O0-NEXT: v_sub_u32_e64 v4, s10, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[13:14], v4, v[7:8]
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v15, s[4:5]
+; GFX9-O0-NEXT: v_sub_u32_e64 v3, s10, v3
+; GFX9-O0-NEXT: v_lshlrev_b64 v[12:13], v3, v[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[4:5]
; GFX9-O0-NEXT: s_mov_b32 s10, 0
-; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v3, s10
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v15, s[10:11]
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5]
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[10:11]
+; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v2, s10
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v11
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[10:11]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[10:11]
; GFX9-O0-NEXT: ; implicit-def: $sgpr10
; GFX9-O0-NEXT: ; implicit-def: $sgpr10
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[7:8], v3, v[7:8]
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5]
-; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v4, v7, s[4:5]
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-O0-NEXT: v_lshlrev_b64 v[6:7], v2, v[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, s9
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5]
+; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v3, v6, s[4:5]
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v3
-; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2
+; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10
-; GFX9-O0-NEXT: v_or_b32_e64 v3, v3, v4
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v9
-; GFX9-O0-NEXT: v_or_b32_e64 v1, v1, v2
-; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[1:2], s[6:7]
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9
+; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v8
+; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s6
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, s7
-; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, s8
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, s6
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s7
+; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 s[6:7], exec
; GFX9-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
; GFX9-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-O0-NEXT: v_writelane_b32 v0, s6, 8
-; GFX9-O0-NEXT: v_writelane_b32 v0, s7, 9
+; GFX9-O0-NEXT: s_waitcnt vmcnt(16)
+; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 8
+; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 9
; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-O0-NEXT: s_cbranch_execz .LBB0_5
; GFX9-O0-NEXT: s_branch .LBB0_7
; GFX9-O0-NEXT: .LBB0_9: ; %udiv-end
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1
-; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23]
-; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b32 s4, 32
; GFX9-O0-NEXT: s_waitcnt vmcnt(2)
-; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[5:6]
+; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[16:17]
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, v17
-; GFX9-O0-NEXT: v_mul_lo_u32 v3, v1, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v20
+; GFX9-O0-NEXT: v_mul_lo_u32 v8, v1, v0
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_lshrrev_b64 v[17:18], s4, v[17:18]
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v17
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mul_lo_u32 v2, v5, v2
-; GFX9-O0-NEXT: v_mad_u64_u32 v[17:18], s[6:7], v5, v0, 0
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, v18
-; GFX9-O0-NEXT: v_add3_u32 v2, v0, v2, v3
+; GFX9-O0-NEXT: v_lshrrev_b64 v[20:21], s4, v[20:21]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v20
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v16
+; GFX9-O0-NEXT: v_mul_lo_u32 v5, v2, v5
+; GFX9-O0-NEXT: v_mad_u64_u32 v[16:17], s[6:7], v2, v0, 0
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v17
+; GFX9-O0-NEXT: v_add3_u32 v8, v0, v5, v8
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: v_mov_b32_e32 v0, s5
-; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0
-; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s4, v[2:3]
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v3
-; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 killed $vgpr17_vgpr18 killed $exec
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v0
+; GFX9-O0-NEXT: v_lshlrev_b64 v[8:9], s4, v[8:9]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
+; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 killed $vgpr16_vgpr17 killed $exec
; GFX9-O0-NEXT: s_mov_b32 s5, 0
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: v_mov_b32_e32 v0, s5
-; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v0
+; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v17
+; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v16
+; GFX9-O0-NEXT: v_or_b32_e64 v16, v5, v8
+; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v0
+; GFX9-O0-NEXT: v_lshrrev_b64 v[8:9], s4, v[18:19]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v14
+; GFX9-O0-NEXT: v_mul_lo_u32 v9, v8, v5
+; GFX9-O0-NEXT: v_lshrrev_b64 v[14:15], s4, v[14:15]
+; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v18
-; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v17
-; GFX9-O0-NEXT: v_or_b32_e64 v17, v2, v3
-; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v0
-; GFX9-O0-NEXT: v_lshrrev_b64 v[2:3], s4, v[19:20]
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v11
-; GFX9-O0-NEXT: v_mul_lo_u32 v3, v2, v6
-; GFX9-O0-NEXT: v_lshrrev_b64 v[11:12], s4, v[11:12]
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, v19
-; GFX9-O0-NEXT: v_mul_lo_u32 v11, v11, v0
-; GFX9-O0-NEXT: v_mad_u64_u32 v[19:20], s[6:7], v2, v0, 0
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v20
-; GFX9-O0-NEXT: v_add3_u32 v2, v2, v3, v11
+; GFX9-O0-NEXT: v_mul_lo_u32 v14, v14, v0
+; GFX9-O0-NEXT: v_mad_u64_u32 v[18:19], s[6:7], v8, v0, 0
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v19
+; GFX9-O0-NEXT: v_add3_u32 v8, v8, v9, v14
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; implicit-def: $sgpr7
; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, s6
-; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s4, v[2:3]
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v3
-; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 killed $vgpr19_vgpr20 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, s6
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v14
+; GFX9-O0-NEXT: v_lshlrev_b64 v[8:9], s4, v[8:9]
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v9
+; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 killed $vgpr18_vgpr19 killed $exec
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5
-; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v20, v11
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20
-; GFX9-O0-NEXT: v_or_b32_e64 v11, v11, v12
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v19
-; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3
-; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v18
-; GFX9-O0-NEXT: v_add_co_u32_e64 v17, s[6:7], v11, v12
-; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v3, s[6:7]
-; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v2
-; GFX9-O0-NEXT: v_mad_u64_u32 v[19:20], s[6:7], v6, v1, 0
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v19
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, s5
+; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v19, v14
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v19
+; GFX9-O0-NEXT: v_or_b32_e64 v14, v14, v15
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v18
+; GFX9-O0-NEXT: v_or_b32_e64 v8, v8, v9
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v14
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v9
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v17
+; GFX9-O0-NEXT: v_add_co_u32_e64 v16, s[6:7], v14, v15
+; GFX9-O0-NEXT: v_addc_co_u32_e64 v8, s[6:7], v8, v9, s[6:7]
+; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v8
+; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v5, v1, 0
+; GFX9-O0-NEXT: v_mov_b32_e32 v18, v14
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5
-; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v20
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, s5
+; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v19, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v19
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; implicit-def: $sgpr7
; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6
-; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v20, v12
-; GFX9-O0-NEXT: v_lshlrev_b64 v[19:20], s4, v[19:20]
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v20
-; GFX9-O0-NEXT: v_or_b32_e64 v11, v11, v12
-; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v19
-; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3
-; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT: v_mad_u64_u32 v[19:20], s[6:7], v6, v5, 0
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v19
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, s6
+; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v9
+; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15]
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v15
+; GFX9-O0-NEXT: v_or_b32_e64 v8, v8, v9
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18
+; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT: v_or_b32_e64 v20, v9, v14
+; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v21, v8
+; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v5, v2, 0
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v14
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, s5
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v12
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v20
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; implicit-def: $sgpr7
; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: v_mov_b32_e32 v21, s6
-; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v20, v21
-; GFX9-O0-NEXT: v_lshlrev_b64 v[19:20], s4, v[19:20]
-; GFX9-O0-NEXT: v_mov_b32_e32 v21, v20
-; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v21
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v19
-; GFX9-O0-NEXT: v_or_b32_e64 v23, v11, v12
-; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v24, v6
-; GFX9-O0-NEXT: v_mad_u64_u32 v[11:12], s[6:7], v0, v5, 0
-; GFX9-O0-NEXT: v_mov_b32_e32 v21, v12
+; GFX9-O0-NEXT: v_mov_b32_e32 v18, s6
+; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v18
+; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15]
+; GFX9-O0-NEXT: v_mov_b32_e32 v18, v15
+; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v18
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v14
+; GFX9-O0-NEXT: v_or_b32_e64 v22, v8, v9
+; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v23, v5
+; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v0, v2, 0
+; GFX9-O0-NEXT: v_mov_b32_e32 v18, v15
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5
-; GFX9-O0-NEXT: ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v22, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23
-; GFX9-O0-NEXT: v_mov_b32_e32 v20, v21
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v24
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v22
-; GFX9-O0-NEXT: v_add_co_u32_e64 v5, s[6:7], v5, v20
-; GFX9-O0-NEXT: v_addc_co_u32_e64 v19, s[6:7], v6, v19, s[6:7]
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v19
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5
+; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v19, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v22
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v23
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v19
+; GFX9-O0-NEXT: v_add_co_u32_e64 v8, s[6:7], v8, v9
+; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7]
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0xffffffff
; GFX9-O0-NEXT: s_mov_b32 s8, s7
-; GFX9-O0-NEXT: v_and_b32_e64 v19, v19, s8
-; GFX9-O0-NEXT: v_mov_b32_e32 v20, v5
+; GFX9-O0-NEXT: v_and_b32_e64 v2, v2, s8
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
; GFX9-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 killed $sgpr6_sgpr7
-; GFX9-O0-NEXT: v_and_b32_e64 v21, v20, s6
-; GFX9-O0-NEXT: ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v22, v19
-; GFX9-O0-NEXT: v_mad_u64_u32 v[19:20], s[6:7], v0, v1, 0
-; GFX9-O0-NEXT: v_mov_b32_e32 v23, v19
+; GFX9-O0-NEXT: v_and_b32_e64 v18, v5, s6
+; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v19, v2
+; GFX9-O0-NEXT: v_mad_u64_u32 v[22:23], s[6:7], v0, v1, 0
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v22
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: v_mov_b32_e32 v0, s5
-; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v24, v0
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, v24
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v20
+; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v22, v23
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; implicit-def: $sgpr7
; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s6
-; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v20, v1
-; GFX9-O0-NEXT: v_lshlrev_b64 v[19:20], s4, v[19:20]
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v20
-; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v23
-; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 killed $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT: v_or_b32_e64 v23, v1, v19
-; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v24, v0
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, v23
-; GFX9-O0-NEXT: v_mov_b32_e32 v20, v21
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v24
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v22
-; GFX9-O0-NEXT: v_add_co_u32_e64 v0, s[6:7], v0, v20
-; GFX9-O0-NEXT: v_addc_co_u32_e64 v19, s[6:7], v1, v19, s[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, s6
+; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v23, v5
+; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], s4, v[22:23]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23
+; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr1_vgpr2 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v22
+; GFX9-O0-NEXT: v_or_b32_e64 v1, v1, v2
+; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v18
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v19
+; GFX9-O0-NEXT: v_add_co_u32_e64 v0, s[6:7], v0, v5
+; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v1, v2, s[6:7]
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v19
-; GFX9-O0-NEXT: v_lshrrev_b64 v[21:22], s4, v[0:1]
-; GFX9-O0-NEXT: v_lshrrev_b64 v[5:6], s4, v[5:6]
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v20, v21
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v22
-; GFX9-O0-NEXT: v_add_co_u32_e64 v19, s[6:7], v19, v20
-; GFX9-O0-NEXT: v_addc_co_u32_e64 v5, s[6:7], v5, v6, s[6:7]
-; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v20, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v19
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v20
-; GFX9-O0-NEXT: v_add_co_u32_e64 v19, s[6:7], v5, v6
-; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v3, s[6:7]
-; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v20, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT: v_lshrrev_b64 v[18:19], s4, v[0:1]
+; GFX9-O0-NEXT: v_lshrrev_b64 v[22:23], s4, v[8:9]
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v22
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v23
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v19
+; GFX9-O0-NEXT: v_add_co_u32_e64 v18, s[6:7], v8, v9
+; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7]
+; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v19, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v20
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v21
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v19
+; GFX9-O0-NEXT: v_add_co_u32_e64 v18, s[6:7], v8, v9
+; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7]
+; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v19, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v18
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v16
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v19
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v20
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v18
-; GFX9-O0-NEXT: v_add_co_u32_e64 v2, s[6:7], v2, v6
-; GFX9-O0-NEXT: v_addc_co_u32_e64 v5, s[6:7], v3, v5, s[6:7]
-; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v17
+; GFX9-O0-NEXT: v_add_co_u32_e64 v8, s[6:7], v8, v9
+; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7]
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v2
; GFX9-O0-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1]
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v1
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1
+; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5
+; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v15
+; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v5
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, v11
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v14
; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v0
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, v15
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v16
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v14
-; GFX9-O0-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v12
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v11, vcc
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v6, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v3
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v13
+; GFX9-O0-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v9
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v8, vcc
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v5, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v2, vcc, v1, v2, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
@@ -1462,53 +1449,48 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v10
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v7
; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT: v_xor_b32_e64 v9, v6, v5
-; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v3
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v6
+; GFX9-O0-NEXT: v_xor_b32_e64 v8, v5, v4
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v3
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v6
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7
-; GFX9-O0-NEXT: v_xor_b32_e64 v0, v0, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10
+; GFX9-O0-NEXT: v_xor_b32_e64 v0, v0, v7
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v10
-; GFX9-O0-NEXT: v_sub_co_u32_e32 v7, vcc, v7, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v9
+; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, v5, v7
; GFX9-O0-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v6, vcc
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v3, v5, vcc
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v1
+; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
-; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v0
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
-; GFX9-O0-NEXT: v_lshrrev_b64 v[7:8], s4, v[7:8]
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
; GFX9-O0-NEXT: v_lshrrev_b64 v[5:6], s4, v[5:6]
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-O0-NEXT: ; kill: killed $vgpr4
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5
+; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s4, v[3:4]
+; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: s_setpc_b64 s[30:31]
@@ -1725,266 +1707,258 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0: ; %bb.0: ; %_udiv-special-cases
; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v0
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v0
+; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v2
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v3
+; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v3
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7
+; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v7
; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v10
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_waitcnt vmcnt(7)
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: s_waitcnt vmcnt(6)
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v10
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v12
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v2
-; GFX9-O0-NEXT: v_or_b32_e64 v3, v8, v7
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1
-; GFX9-O0-NEXT: v_or_b32_e64 v1, v5, v6
-; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1
+; GFX9-O0-NEXT: v_or_b32_e64 v2, v7, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0
+; GFX9-O0-NEXT: v_or_b32_e64 v0, v4, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0
-; GFX9-O0-NEXT: s_waitcnt vmcnt(16)
-; GFX9-O0-NEXT: v_writelane_b32 v0, s6, 0
-; GFX9-O0-NEXT: v_writelane_b32 v0, s7, 1
-; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[1:2], s[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v14
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
-; GFX9-O0-NEXT: v_or_b32_e64 v15, v4, v2
+; GFX9-O0-NEXT: ; implicit-def: $vgpr30 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 0
+; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 1
+; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[0:1], s[6:7]
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v13
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT: v_or_b32_e64 v9, v3, v1
-; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v15
-; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[9:10], s[6:7]
+; GFX9-O0-NEXT: v_or_b32_e64 v14, v3, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v12
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8
+; GFX9-O0-NEXT: v_or_b32_e64 v8, v2, v0
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v14
+; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v5
; GFX9-O0-NEXT: s_mov_b32 s9, 32
-; GFX9-O0-NEXT: v_add_u32_e64 v6, v6, s9
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7
-; GFX9-O0-NEXT: v_min_u32_e64 v6, v6, v7
+; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6
+; GFX9-O0-NEXT: v_min_u32_e64 v5, v5, v6
; GFX9-O0-NEXT: s_mov_b32 s8, 0
; GFX9-O0-NEXT: ; implicit-def: $sgpr10
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, s8
-; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v9
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v7
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v5
-; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v8, v8
-; GFX9-O0-NEXT: v_min_u32_e64 v15, v5, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, s8
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v6
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v4
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7
+; GFX9-O0-NEXT: v_min_u32_e64 v14, v4, v7
; GFX9-O0-NEXT: ; implicit-def: $sgpr10
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v4
; GFX9-O0-NEXT: s_mov_b64 s[10:11], 64
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v15
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14
; GFX9-O0-NEXT: s_mov_b32 s12, s10
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v15
; GFX9-O0-NEXT: s_mov_b32 s14, s11
-; GFX9-O0-NEXT: v_add_co_u32_e64 v8, s[12:13], v8, s12
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, s14
-; GFX9-O0-NEXT: v_addc_co_u32_e64 v5, s[12:13], v5, v9, s[12:13]
-; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
+; GFX9-O0-NEXT: v_add_co_u32_e64 v7, s[12:13], v7, s12
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, s14
+; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[12:13], v4, v8, s[12:13]
+; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8
; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[6:7]
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[12:13]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[12:13]
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, v6, v7, s[12:13]
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[10:11], s[12:13]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v9, s[12:13]
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v5, v6, s[12:13]
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
-; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v0
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9
; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v1
-; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v2
-; GFX9-O0-NEXT: v_min_u32_e64 v6, v5, v6
+; GFX9-O0-NEXT: v_min_u32_e64 v5, v4, v5
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v3
-; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9
-; GFX9-O0-NEXT: v_ffbh_u32_e64 v11, v4
-; GFX9-O0-NEXT: v_min_u32_e64 v15, v5, v11
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v2
+; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9
+; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v3
+; GFX9-O0-NEXT: v_min_u32_e64 v14, v4, v10
; GFX9-O0-NEXT: ; implicit-def: $sgpr9
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v15
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v14
; GFX9-O0-NEXT: s_mov_b32 s8, s10
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v15
; GFX9-O0-NEXT: s_mov_b32 s10, s11
-; GFX9-O0-NEXT: v_add_co_u32_e64 v11, s[8:9], v11, s8
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, s10
-; GFX9-O0-NEXT: v_addc_co_u32_e64 v5, s[8:9], v5, v12, s[8:9]
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-O0-NEXT: v_add_co_u32_e64 v10, s[8:9], v10, s8
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, s10
+; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[8:9], v4, v11, s[8:9]
+; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v11
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[8:9]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[8:9]
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[8:9]
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[12:13], s[8:9]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[8:9]
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[8:9]
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8
+; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10
; GFX9-O0-NEXT: s_mov_b32 s10, s6
; GFX9-O0-NEXT: s_mov_b32 s11, s7
-; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, v5, v8
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v9, vcc, v6, v7, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, s10
+; GFX9-O0-NEXT: v_sub_co_u32_e32 v4, vcc, v4, v7
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v5, v6, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v6, s10
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v6, v7, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, s11
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, s10
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v6, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v6, s11
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v6, v7, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v6, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
+; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7]
+; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
-; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15]
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15]
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9]
-; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7
-; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v7, 1
+; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
+; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
+; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, 1
; GFX9-O0-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9]
; GFX9-O0-NEXT: s_mov_b64 s[4:5], -1
; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
; GFX9-O0-NEXT: s_mov_b32 s14, s13
-; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14
+; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
-; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v9
-; GFX9-O0-NEXT: v_or_b32_e64 v7, v7, v10
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
-; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[5:6], s[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v2, v5, s[8:9]
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, s10
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[8:9]
+; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8
+; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v9
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7
+; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[8:9]
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[8:9]
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
; GFX9-O0-NEXT: ; implicit-def: $sgpr12
-; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[8:9]
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s10
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[8:9]
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[8:9]
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9]
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
-; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4
; GFX9-O0-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 s[4:5], exec
-; GFX9-O0-NEXT: v_writelane_b32 v0, s4, 2
-; GFX9-O0-NEXT: v_writelane_b32 v0, s5, 3
+; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 2
+; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 3
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
@@ -1992,11 +1966,11 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_branch .LBB1_8
; GFX9-O0-NEXT: .LBB1_1: ; %Flow
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s4, v0, 4
-; GFX9-O0-NEXT: v_readlane_b32 s5, v0, 5
+; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 4
+; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 5
; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-O0-NEXT: ; %bb.2: ; %Flow
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
@@ -2025,20 +1999,19 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB1_5
; GFX9-O0-NEXT: .LBB1_3: ; %Flow2
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s4, v4, 2
-; GFX9-O0-NEXT: v_readlane_b32 s5, v4, 3
-; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
+; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 2
+; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 3
+; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: s_nop 0
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
@@ -2085,13 +2058,6 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB1_3
; GFX9-O0-NEXT: .LBB1_5: ; %Flow1
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s4, v8, 6
-; GFX9-O0-NEXT: v_readlane_b32 s5, v8, 7
-; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
@@ -2100,9 +2066,15 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
+; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 6
+; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 7
+; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: s_nop 0
; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
@@ -2116,92 +2088,87 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_branch .LBB1_4
; GFX9-O0-NEXT: .LBB1_6: ; %udiv-do-while
; GFX9-O0-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s6, v16, 8
-; GFX9-O0-NEXT: v_readlane_b32 s7, v16, 9
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
+; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
+; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 8
+; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 9
; GFX9-O0-NEXT: s_mov_b32 s4, 63
-; GFX9-O0-NEXT: s_waitcnt vmcnt(16)
-; GFX9-O0-NEXT: v_lshrrev_b64 v[29:30], s4, v[2:3]
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v30
+; GFX9-O0-NEXT: v_lshrrev_b64 v[28:29], s4, v[2:3]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v29
; GFX9-O0-NEXT: s_mov_b32 s5, 1
-; GFX9-O0-NEXT: v_lshlrev_b64 v[23:24], s5, v[23:24]
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v24
+; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], s5, v[22:23]
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23
; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v29
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23
-; GFX9-O0-NEXT: v_or_b32_e64 v23, v5, v10
-; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v24, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[29:30], s5, v[2:3]
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v22
+; GFX9-O0-NEXT: v_or_b32_e64 v22, v5, v10
+; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v23, v4
+; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[2:3]
; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], s4, v[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v30
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v29
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5
; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v29
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v28
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
; GFX9-O0-NEXT: v_or_b32_e64 v4, v3, v4
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2
; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s5, v[0:1]
-; GFX9-O0-NEXT: v_lshlrev_b64 v[29:30], s5, v[6:7]
+; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[6:7]
; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1]
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v30
-; GFX9-O0-NEXT: s_waitcnt vmcnt(10)
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v29
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v27
; GFX9-O0-NEXT: v_or3_b32 v6, v6, v7, v10
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, v29
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v27
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v28
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v26
; GFX9-O0-NEXT: v_or3_b32 v0, v0, v1, v7
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3
-; GFX9-O0-NEXT: s_waitcnt vmcnt(8)
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v26
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v25
; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v25
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v24
; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v24
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v22
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v15
; GFX9-O0-NEXT: v_sub_co_u32_e32 v13, vcc, v13, v6
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v10, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v4, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc
@@ -2221,22 +2188,22 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12
; GFX9-O0-NEXT: v_mov_b32_e32 v12, 0
; GFX9-O0-NEXT: v_mov_b32_e32 v13, 0
-; GFX9-O0-NEXT: v_mov_b32_e32 v23, v22
-; GFX9-O0-NEXT: v_and_b32_e64 v23, v7, v23
-; GFX9-O0-NEXT: v_and_b32_e64 v21, v11, v21
-; GFX9-O0-NEXT: ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v22, v23
-; GFX9-O0-NEXT: v_mov_b32_e32 v23, v20
-; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v23
-; GFX9-O0-NEXT: v_and_b32_e64 v23, v11, v19
-; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v24, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v20, v23
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v24
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v21
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22
-; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v20
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v19, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v22, v21
+; GFX9-O0-NEXT: v_and_b32_e64 v22, v7, v22
+; GFX9-O0-NEXT: v_and_b32_e64 v20, v11, v20
+; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v21, v22
+; GFX9-O0-NEXT: v_mov_b32_e32 v22, v19
+; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v22
+; GFX9-O0-NEXT: v_and_b32_e64 v22, v11, v18
+; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v23, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v19, v22
+; GFX9-O0-NEXT: v_mov_b32_e32 v18, v23
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21
+; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v19
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v18, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v11, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v7, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
@@ -2252,66 +2219,66 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1
; GFX9-O0-NEXT: s_mov_b32 s5, s8
; GFX9-O0-NEXT: s_mov_b32 s4, s9
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, s5
-; GFX9-O0-NEXT: v_add_co_u32_e32 v20, vcc, v11, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5
+; GFX9-O0-NEXT: v_add_co_u32_e32 v19, vcc, v11, v16
; GFX9-O0-NEXT: v_mov_b32_e32 v11, s4
; GFX9-O0-NEXT: v_addc_co_u32_e32 v9, vcc, v9, v11, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v17, vcc, v10, v11, vcc
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v10, v11, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v10, s4
; GFX9-O0-NEXT: v_addc_co_u32_e32 v8, vcc, v8, v10, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v21, v9
+; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v20, v9
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v8
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v20
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v21
-; GFX9-O0-NEXT: v_mov_b32_e32 v22, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v21
-; GFX9-O0-NEXT: v_or_b32_e64 v19, v19, v22
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v20
-; GFX9-O0-NEXT: v_or_b32_e64 v17, v17, v18
-; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v19
-; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[17:18], v[12:13]
+; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v8, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v19
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20
+; GFX9-O0-NEXT: v_mov_b32_e32 v21, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v18, v20
+; GFX9-O0-NEXT: v_or_b32_e64 v18, v18, v21
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v19
+; GFX9-O0-NEXT: v_or_b32_e64 v16, v16, v17
+; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v18
+; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13]
; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v3
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v2
+; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v0
-; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v0
+; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v15
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v14
-; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v15
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v14
+; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v13
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v12
-; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v13
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v12
+; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX9-O0-NEXT: v_writelane_b32 v16, s6, 4
-; GFX9-O0-NEXT: v_writelane_b32 v16, s7, 5
+; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 4
+; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 5
; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GFX9-O0-NEXT: v_writelane_b32 v16, s6, 8
-; GFX9-O0-NEXT: v_writelane_b32 v16, s7, 9
+; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 8
+; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 9
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
@@ -2349,52 +2316,52 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-O0-NEXT: s_waitcnt vmcnt(10)
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[21:22]
+; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[20:21]
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7
; GFX9-O0-NEXT: s_mov_b32 s6, 64
; GFX9-O0-NEXT: v_sub_u32_e64 v12, s6, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[23:24], v12, v[19:20]
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v24
+; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], v12, v[18:19]
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v23
; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v12
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v23
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22
; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7
; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v4, s6
; GFX9-O0-NEXT: v_sub_u32_e64 v5, v4, s6
-; GFX9-O0-NEXT: v_lshrrev_b64 v[23:24], v5, v[19:20]
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v24
+; GFX9-O0-NEXT: v_lshrrev_b64 v[22:23], v5, v[18:19]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[4:5]
; GFX9-O0-NEXT: s_mov_b32 s6, 0
; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v4, s6
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v22
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v21
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[6:7]
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v23
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v22
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v20
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[6:7]
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[19:20]
+; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[18:19]
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v5
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0
; GFX9-O0-NEXT: s_mov_b32 s8, s7
@@ -2413,12 +2380,12 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1
; GFX9-O0-NEXT: s_mov_b32 s5, s8
; GFX9-O0-NEXT: s_mov_b32 s4, s9
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, s5
-; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, s4
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v17, vcc, v15, v17, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5
+; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, s4
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v15, v16, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5
; GFX9-O0-NEXT: v_addc_co_u32_e32 v14, vcc, v14, v15, vcc
; GFX9-O0-NEXT: v_mov_b32_e32 v15, s4
@@ -2430,7 +2397,7 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
@@ -2443,10 +2410,11 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: v_mov_b32_e32 v14, s8
; GFX9-O0-NEXT: v_mov_b32_e32 v13, s7
; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6
-; GFX9-O0-NEXT: v_writelane_b32 v16, s4, 8
-; GFX9-O0-NEXT: v_writelane_b32 v16, s5, 9
+; GFX9-O0-NEXT: s_waitcnt vmcnt(4)
+; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 8
+; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 9
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
@@ -2474,403 +2442,396 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_branch .LBB1_6
; GFX9-O0-NEXT: .LBB1_8: ; %udiv-bb1
+; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
; GFX9-O0-NEXT: s_mov_b32 s5, s6
-; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
+; GFX9-O0-NEXT: s_waitcnt vmcnt(2)
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0
; GFX9-O0-NEXT: s_mov_b32 s4, s7
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0
; GFX9-O0-NEXT: s_mov_b32 s8, s6
; GFX9-O0-NEXT: s_mov_b32 s9, s7
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5
-; GFX9-O0-NEXT: v_add_co_u32_e32 v9, vcc, v4, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s4
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v5, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v5, vcc
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s9
-; GFX9-O0-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5
+; GFX9-O0-NEXT: v_add_co_u32_e32 v8, vcc, v3, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s4
+; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v4, vcc
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9
+; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v10, v2
+; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b32 s4, 0x7f
-; GFX9-O0-NEXT: v_sub_u32_e64 v3, s4, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[5:6], v3, v[11:12]
-; GFX9-O0-NEXT: v_mov_b32_e32 v13, v6
+; GFX9-O0-NEXT: v_sub_u32_e64 v2, s4, v3
+; GFX9-O0-NEXT: v_lshlrev_b64 v[4:5], v2, v[10:11]
+; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5
; GFX9-O0-NEXT: s_mov_b32 s4, 64
-; GFX9-O0-NEXT: v_sub_u32_e64 v14, s4, v3
-; GFX9-O0-NEXT: v_lshrrev_b64 v[14:15], v14, v[7:8]
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v15
-; GFX9-O0-NEXT: v_or_b32_e64 v13, v13, v16
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v13
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v6
-; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v3, s4
+; GFX9-O0-NEXT: v_sub_u32_e64 v13, s4, v2
+; GFX9-O0-NEXT: v_lshrrev_b64 v[13:14], v13, v[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v14
+; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v15
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13
+; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5
+; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v2, s4
; GFX9-O0-NEXT: s_mov_b32 s10, 63
-; GFX9-O0-NEXT: v_sub_u32_e64 v4, s10, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[13:14], v4, v[7:8]
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v15, s[4:5]
+; GFX9-O0-NEXT: v_sub_u32_e64 v3, s10, v3
+; GFX9-O0-NEXT: v_lshlrev_b64 v[12:13], v3, v[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[4:5]
; GFX9-O0-NEXT: s_mov_b32 s10, 0
-; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v3, s10
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v15, s[10:11]
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5]
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[10:11]
+; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v2, s10
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v11
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[10:11]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[10:11]
; GFX9-O0-NEXT: ; implicit-def: $sgpr10
; GFX9-O0-NEXT: ; implicit-def: $sgpr10
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
-; GFX9-O0-NEXT: v_lshlrev_b64 v[7:8], v3, v[7:8]
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5]
-; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v4, v7, s[4:5]
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-O0-NEXT: v_lshlrev_b64 v[6:7], v2, v[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, s9
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5]
+; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v3, v6, s[4:5]
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
-; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v3
-; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2
+; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10
-; GFX9-O0-NEXT: v_or_b32_e64 v3, v3, v4
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v9
-; GFX9-O0-NEXT: v_or_b32_e64 v1, v1, v2
-; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[1:2], s[6:7]
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9
+; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v8
+; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1
+; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
+; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s6
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, s7
-; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, s8
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, s6
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s7
+; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 s[6:7], exec
; GFX9-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5]
; GFX9-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7]
-; GFX9-O0-NEXT: v_writelane_b32 v0, s6, 6
-; GFX9-O0-NEXT: v_writelane_b32 v0, s7, 7
+; GFX9-O0-NEXT: s_waitcnt vmcnt(16)
+; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 6
+; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 7
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-O0-NEXT: s_cbranch_execz .LBB1_5
; GFX9-O0-NEXT: s_branch .LBB1_7
; GFX9-O0-NEXT: .LBB1_9: ; %udiv-end
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
-; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
-; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b32 s4, 32
; GFX9-O0-NEXT: s_waitcnt vmcnt(2)
-; GFX9-O0-NEXT: v_lshrrev_b64 v[2:3], s4, v[7:8]
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2
+; GFX9-O0-NEXT: v_lshrrev_b64 v[2:3], s4, v[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v13
-; GFX9-O0-NEXT: v_mul_lo_u32 v5, v6, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12
+; GFX9-O0-NEXT: v_mul_lo_u32 v4, v5, v2
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_lshrrev_b64 v[13:14], s4, v[13:14]
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13
-; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT: v_mul_lo_u32 v3, v7, v3
-; GFX9-O0-NEXT: v_mad_u64_u32 v[13:14], s[6:7], v7, v2, 0
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v14
-; GFX9-O0-NEXT: v_add3_u32 v2, v2, v3, v5
+; GFX9-O0-NEXT: v_lshrrev_b64 v[12:13], s4, v[12:13]
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v12
+; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT: v_mul_lo_u32 v3, v6, v3
+; GFX9-O0-NEXT: v_mad_u64_u32 v[12:13], s[6:7], v6, v2, 0
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v13
+; GFX9-O0-NEXT: v_add3_u32 v2, v2, v3, v4
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-O0-NEXT: v_lshlrev_b64 v[17:18], s4, v[2:3]
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v18
-; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 killed $vgpr13_vgpr14 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4
+; GFX9-O0-NEXT: v_lshlrev_b64 v[3:4], s4, v[2:3]
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v4
+; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 killed $vgpr12_vgpr13 killed $exec
; GFX9-O0-NEXT: s_mov_b32 s5, 0
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5
-; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v14
-; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13
-; GFX9-O0-NEXT: v_or_b32_e64 v13, v3, v5
-; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v2
-; GFX9-O0-NEXT: v_lshrrev_b64 v[2:3], s4, v[15:16]
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v11
-; GFX9-O0-NEXT: v_mul_lo_u32 v3, v2, v8
-; GFX9-O0-NEXT: v_lshrrev_b64 v[11:12], s4, v[11:12]
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v15
-; GFX9-O0-NEXT: v_mul_lo_u32 v11, v11, v5
-; GFX9-O0-NEXT: v_mad_u64_u32 v[15:16], s[6:7], v2, v5, 0
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v16
-; GFX9-O0-NEXT: v_add3_u32 v2, v2, v3, v11
+; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v13
+; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v3
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v12
+; GFX9-O0-NEXT: v_or_b32_e64 v12, v3, v4
+; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v2
+; GFX9-O0-NEXT: v_lshrrev_b64 v[2:3], s4, v[14:15]
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v10
+; GFX9-O0-NEXT: v_mul_lo_u32 v3, v2, v7
+; GFX9-O0-NEXT: v_lshrrev_b64 v[10:11], s4, v[10:11]
+; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14
+; GFX9-O0-NEXT: v_mul_lo_u32 v10, v10, v4
+; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v2, v4, 0
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v15
+; GFX9-O0-NEXT: v_add3_u32 v2, v2, v3, v10
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; implicit-def: $sgpr7
; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, s6
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, s6
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10
; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s4, v[2:3]
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v3
-; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 killed $vgpr15_vgpr16 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v3
+; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5
-; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v11
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v16
-; GFX9-O0-NEXT: v_or_b32_e64 v11, v11, v12
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, s5
+; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v10
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v15
+; GFX9-O0-NEXT: v_or_b32_e64 v10, v10, v11
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v15
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v14
; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v13
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v12
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v14
-; GFX9-O0-NEXT: v_add_co_u32_e64 v13, s[6:7], v11, v12
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13
+; GFX9-O0-NEXT: v_add_co_u32_e64 v12, s[6:7], v10, v11
; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v3, s[6:7]
-; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v14, v2
-; GFX9-O0-NEXT: v_mad_u64_u32 v[15:16], s[6:7], v8, v6, 0
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v15
+; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v13, v2
+; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v7, v5, 0
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v14
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, s5
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v3
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; implicit-def: $sgpr7
; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6
-; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v12
-; GFX9-O0-NEXT: v_lshlrev_b64 v[15:16], s4, v[15:16]
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v16
-; GFX9-O0-NEXT: v_or_b32_e64 v11, v11, v12
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, s6
+; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v11
+; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15]
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v15
+; GFX9-O0-NEXT: v_or_b32_e64 v10, v10, v11
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v15
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v14
; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
-; GFX9-O0-NEXT: v_mad_u64_u32 v[15:16], s[6:7], v8, v7, 0
-; GFX9-O0-NEXT: v_mov_b32_e32 v11, v15
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10
+; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v7, v6, 0
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v14
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, s5
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v8
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v12
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, s5
+; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; implicit-def: $sgpr7
; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, s6
-; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v17
-; GFX9-O0-NEXT: v_lshlrev_b64 v[15:16], s4, v[15:16]
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v16
-; GFX9-O0-NEXT: v_or_b32_e64 v8, v8, v17
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v15
-; GFX9-O0-NEXT: v_or_b32_e64 v19, v11, v12
-; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v20, v8
-; GFX9-O0-NEXT: v_mad_u64_u32 v[11:12], s[6:7], v5, v7, 0
-; GFX9-O0-NEXT: v_mov_b32_e32 v17, v12
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, s6
+; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16
+; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15]
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v15
+; GFX9-O0-NEXT: v_or_b32_e64 v7, v7, v16
+; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14
+; GFX9-O0-NEXT: v_or_b32_e64 v18, v10, v11
+; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v19, v7
+; GFX9-O0-NEXT: v_mad_u64_u32 v[10:11], s[6:7], v4, v6, 0
+; GFX9-O0-NEXT: v_mov_b32_e32 v16, v11
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, s5
-; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, s5
+; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v18
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v19
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v20
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v18
-; GFX9-O0-NEXT: v_add_co_u32_e64 v7, s[6:7], v7, v16
-; GFX9-O0-NEXT: v_addc_co_u32_e64 v15, s[6:7], v8, v15, s[6:7]
-; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v15
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17
+; GFX9-O0-NEXT: v_add_co_u32_e64 v6, s[6:7], v6, v15
+; GFX9-O0-NEXT: v_addc_co_u32_e64 v14, s[6:7], v7, v14, s[6:7]
+; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v7
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0xffffffff
; GFX9-O0-NEXT: s_mov_b32 s8, s7
-; GFX9-O0-NEXT: v_and_b32_e64 v15, v15, s8
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v7
+; GFX9-O0-NEXT: v_and_b32_e64 v14, v14, s8
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v6
; GFX9-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 killed $sgpr6_sgpr7
-; GFX9-O0-NEXT: v_and_b32_e64 v17, v16, s6
-; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v18, v15
-; GFX9-O0-NEXT: v_mad_u64_u32 v[15:16], s[6:7], v5, v6, 0
-; GFX9-O0-NEXT: v_mov_b32_e32 v19, v15
+; GFX9-O0-NEXT: v_and_b32_e64 v16, v15, s6
+; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v17, v14
+; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v4, v5, 0
+; GFX9-O0-NEXT: v_mov_b32_e32 v18, v14
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5
-; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v20, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v20
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5
+; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v19, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v19
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; implicit-def: $sgpr7
; GFX9-O0-NEXT: ; implicit-def: $sgpr7
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, s6
-; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v6
-; GFX9-O0-NEXT: v_lshlrev_b64 v[15:16], s4, v[15:16]
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v16
-; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v19
-; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 killed $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT: v_or_b32_e64 v19, v6, v15
-; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v20, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, s6
+; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v5
+; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15]
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v15
+; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v18
+; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT: v_or_b32_e64 v18, v5, v14
+; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v19, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v18
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v19
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v20
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v18
-; GFX9-O0-NEXT: v_add_co_u32_e64 v5, s[6:7], v5, v16
-; GFX9-O0-NEXT: v_addc_co_u32_e64 v15, s[6:7], v6, v15, s[6:7]
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v15
-; GFX9-O0-NEXT: v_lshrrev_b64 v[17:18], s4, v[5:6]
-; GFX9-O0-NEXT: v_lshrrev_b64 v[7:8], s4, v[7:8]
-; GFX9-O0-NEXT: v_mov_b32_e32 v15, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v17
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v8
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v18
-; GFX9-O0-NEXT: v_add_co_u32_e64 v15, s[6:7], v15, v16
-; GFX9-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v7, v8, s[6:7]
-; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v15
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17
+; GFX9-O0-NEXT: v_add_co_u32_e64 v4, s[6:7], v4, v15
+; GFX9-O0-NEXT: v_addc_co_u32_e64 v14, s[6:7], v5, v14, s[6:7]
+; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v14
+; GFX9-O0-NEXT: v_lshrrev_b64 v[16:17], s4, v[4:5]
+; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], s4, v[6:7]
+; GFX9-O0-NEXT: v_mov_b32_e32 v14, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v17
+; GFX9-O0-NEXT: v_add_co_u32_e64 v14, s[6:7], v14, v15
+; GFX9-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v6, v7, s[6:7]
+; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v16
-; GFX9-O0-NEXT: v_add_co_u32_e64 v15, s[6:7], v7, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v15
+; GFX9-O0-NEXT: v_add_co_u32_e64 v14, s[6:7], v6, v7
; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v3, s[6:7]
-; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v16, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v15
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v13
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v16
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14
-; GFX9-O0-NEXT: v_add_co_u32_e64 v2, s[6:7], v2, v8
-; GFX9-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v3, v7, s[6:7]
+; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v15, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v14
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v12
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v15
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v13
+; GFX9-O0-NEXT: v_add_co_u32_e64 v2, s[6:7], v2, v7
+; GFX9-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v3, v6, s[6:7]
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v7
-; GFX9-O0-NEXT: v_lshlrev_b64 v[6:7], s4, v[5:6]
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5
-; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6
+; GFX9-O0-NEXT: v_lshlrev_b64 v[5:6], s4, v[4:5]
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
-; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7
-; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2
+; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT: ; implicit-def: $sgpr6
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5
+; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v11, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v11
+; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10
+; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5
+; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v10
-; GFX9-O0-NEXT: v_sub_co_u32_e32 v7, vcc, v7, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v8
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v9
+; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, v5, v7
; GFX9-O0-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v6, vcc
-; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v3, v5, vcc
+; GFX9-O0-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v4, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
-; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v1
+; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
; GFX9-O0-NEXT: ; implicit-def: $sgpr5
-; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v0
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5
-; GFX9-O0-NEXT: v_lshrrev_b64 v[7:8], s4, v[7:8]
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7
+; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v0
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v5
+; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3
; GFX9-O0-NEXT: v_lshrrev_b64 v[5:6], s4, v[5:6]
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-O0-NEXT: ; kill: killed $vgpr4
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5
+; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s4, v[3:4]
+; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/remat-vop.mir b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
index 248a9e2ddb6360..4f6ea44ccf68bd 100644
--- a/llvm/test/CodeGen/AMDGPU/remat-vop.mir
+++ b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
@@ -7,12 +7,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mov_b32_e32
- ; GCN: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
@@ -31,16 +31,12 @@ body: |
bb.0:
; GCN-LABEL: name: test_no_remat_v_mov_b32_e32_impuse
; GCN: $m0 = IMPLICIT_DEF
- ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec, implicit $m0
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec, implicit $m0
- ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec, implicit $m0
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec, implicit $m0
+ ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec, implicit $m0
+ ; GCN-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec, implicit $m0
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
$m0 = IMPLICIT_DEF
%0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec, implicit $m0
@@ -59,12 +55,12 @@ machineFunctionInfo:
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mov_b32_e32_exec_def
- ; GCN: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_2]]
; GCN-NEXT: $exec = S_ANDN2_B64_term $exec, undef renamable $sgpr0_sgpr1, implicit-def $scc
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
@@ -82,12 +78,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mov_b32_e64
- ; GCN: renamable $vgpr0 = V_MOV_B32_e64 1, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e64 2, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e64 3, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MOV_B32_e64_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e64 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e64 2, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_e64_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e64 3, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_MOV_B32_e64 1, implicit $exec
%1:vgpr_32 = V_MOV_B32_e64 2, implicit $exec
@@ -105,16 +101,12 @@ machineFunctionInfo:
body: |
bb.0:
; GCN-LABEL: name: test_no_remat_v_mov_b32_dpp
- ; GCN: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_dpp undef $vgpr1, undef $vgpr0, 1, 15, 15, 1, implicit $exec
- ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp undef [[V_MOV_B32_dpp]], undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp undef [[V_MOV_B32_dpp1]], undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp undef [[V_MOV_B32_dpp2]], undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_dpp]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_dpp1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_dpp2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%2:vgpr_32 = V_MOV_B32_dpp undef %2:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
@@ -130,12 +122,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_accvgpr_read_b32
- ; GCN: renamable $vgpr0 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_ACCVGPR_READ_B32_e64_:%[0-9]+]]:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
+ ; GCN-NEXT: [[V_ACCVGPR_READ_B32_e64_1:%[0-9]+]]:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
+ ; GCN-NEXT: [[V_ACCVGPR_READ_B32_e64_2:%[0-9]+]]:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_READ_B32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_READ_B32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_READ_B32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
%1:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
@@ -151,12 +143,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_accvgpr_write_b32
- ; GCN: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
- ; GCN-NEXT: renamable $agpr1 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr1
- ; GCN-NEXT: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 3, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0
+ ; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
+ ; GCN-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec
+ ; GCN-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 3, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_WRITE_B32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_WRITE_B32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_WRITE_B32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%0:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
%1:agpr_32 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec
@@ -172,12 +164,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mov_b64_pseudo
- ; GCN: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 1, implicit $exec
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_MOV_B64_PSEUDO 2, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 3, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 2, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 3, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B2]]
; GCN-NEXT: S_ENDPGM 0
%0:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec
%1:vreg_64_align2 = V_MOV_B64_PSEUDO 2, implicit $exec
@@ -193,12 +185,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
@@ -216,16 +208,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_fp_except
- ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
@@ -245,16 +233,12 @@ body: |
bb.0:
; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_mode_def
; GCN: $mode = IMPLICIT_DEF
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]]
; GCN-NEXT: S_ENDPGM 0
$mode = IMPLICIT_DEF
%0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
@@ -271,12 +255,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e64
- ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, 1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e64 0, 2, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, 3, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_I32_F64_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 1, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_I32_F64_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 2, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_I32_F64_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 3, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 1, 0, 0, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 2, 0, 0, implicit $exec, implicit $mode
@@ -294,12 +278,12 @@ machineFunctionInfo:
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e64_undef
- ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_I32_F64_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %1:vreg_64, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_I32_F64_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %1:vreg_64, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_I32_F64_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %1:vreg_64, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %0:vreg_64, 0, 0, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %0:vreg_64, 0, 0, implicit $exec, implicit $mode
@@ -317,16 +301,12 @@ machineFunctionInfo:
body: |
bb.0:
; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_dpp
- ; GCN: renamable $vgpr0 = V_CVT_I32_F64_dpp undef $vgpr0, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_dpp undef $vgpr1, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_dpp undef $vgpr0, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_I32_F64_dpp:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_dpp undef [[V_CVT_I32_F64_dpp]], 0, undef %1:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_I32_F64_dpp1:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_dpp undef [[V_CVT_I32_F64_dpp1]], 0, undef %1:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_I32_F64_dpp2:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_dpp undef [[V_CVT_I32_F64_dpp2]], 0, undef %1:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_dpp]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_dpp1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_dpp2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CVT_I32_F64_dpp undef %1:vgpr_32, 0, undef %0:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode
%2:vgpr_32 = V_CVT_I32_F64_dpp undef %2:vgpr_32, 0, undef %0:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode
@@ -344,16 +324,12 @@ machineFunctionInfo:
body: |
bb.0:
; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_imp_def
- ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
- ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
+ ; GCN-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+ ; GCN-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
%1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
@@ -371,16 +347,12 @@ machineFunctionInfo:
body: |
bb.0:
; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_imp_use
- ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit $m0
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0
- ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit $m0
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit $m0
+ ; GCN-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0
+ ; GCN-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit $m0
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit $m0
%1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0
@@ -396,12 +368,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_f64_i32_e32
- ; GCN: renamable $vgpr0_vgpr1 = V_CVT_F64_I32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_CVT_F64_I32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_CVT_F64_I32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_CVT_F64_I32_e32_:%[0-9]+]]:vreg_64_align2 = V_CVT_F64_I32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_F64_I32_e32_1:%[0-9]+]]:vreg_64_align2 = V_CVT_F64_I32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_F64_I32_e32_2:%[0-9]+]]:vreg_64_align2 = V_CVT_F64_I32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_I32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_I32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_I32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vreg_64_align2 = V_CVT_F64_I32_e32 1, implicit $exec, implicit $mode
%1:vreg_64_align2 = V_CVT_F64_I32_e32 2, implicit $exec, implicit $mode
@@ -417,12 +389,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_f32_f64_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_CVT_F32_F64_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_F32_F64_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_F32_F64_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_F32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_F32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_F32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_F64_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_F64_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_F64_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 2, implicit $exec, implicit $mode
@@ -438,12 +410,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_f64_f32_e32
- ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_CVT_F64_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_CVT_F64_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_CVT_F64_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_CVT_F64_F32_e32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_F64_F32_e32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_F64_F32_e32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 1, implicit $exec, implicit $mode
%1:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 2, implicit $exec, implicit $mode
@@ -459,12 +431,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_u32_f64_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_CVT_U32_F64_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_U32_F64_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_U32_F64_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_U32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_U32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_U32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_U32_F64_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_U32_F64_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_U32_F64_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 2, implicit $exec, implicit $mode
@@ -480,12 +452,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_f64_u32_e32
- ; GCN: renamable $vgpr0_vgpr1 = V_CVT_F64_U32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_CVT_F64_U32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_CVT_F64_U32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_CVT_F64_U32_e32_:%[0-9]+]]:vreg_64_align2 = V_CVT_F64_U32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_F64_U32_e32_1:%[0-9]+]]:vreg_64_align2 = V_CVT_F64_U32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_F64_U32_e32_2:%[0-9]+]]:vreg_64_align2 = V_CVT_F64_U32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_U32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_U32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_U32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vreg_64_align2 = V_CVT_F64_U32_e32 1, implicit $exec, implicit $mode
%1:vreg_64_align2 = V_CVT_F64_U32_e32 2, implicit $exec, implicit $mode
@@ -501,12 +473,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_f32_i32_e32
- ; GCN: renamable $vgpr0 = V_CVT_F32_I32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_I32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_I32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_F32_I32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_F32_I32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_CVT_F32_I32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = V_CVT_F32_I32_e32 2, implicit $exec, implicit $mode
@@ -522,12 +494,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_f32_i32_sdwa
- ; GCN: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_F32_I32_sdwa:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_F32_I32_sdwa1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_F32_I32_sdwa2:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_sdwa]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_sdwa1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_sdwa2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
%2:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
@@ -547,16 +519,12 @@ machineFunctionInfo:
body: |
bb.0:
; GCN-LABEL: name: test_no_remat_v_cvt_f32_i32_sdwa_dst_unused_preserve
- ; GCN: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr0(tied-def 0)
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr1(tied-def 0)
- ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr0(tied-def 0)
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_F32_I32_sdwa:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %1:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef [[V_CVT_F32_I32_sdwa]](tied-def 0)
+ ; GCN-NEXT: [[V_CVT_F32_I32_sdwa1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %1:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef [[V_CVT_F32_I32_sdwa1]](tied-def 0)
+ ; GCN-NEXT: [[V_CVT_F32_I32_sdwa2:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %1:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef [[V_CVT_F32_I32_sdwa2]](tied-def 0)
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_sdwa]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_sdwa1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_sdwa2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef %1:vgpr_32(tied-def 0)
%2:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef %2:vgpr_32(tied-def 0)
@@ -572,12 +540,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_f32_u32_e32
- ; GCN: renamable $vgpr0 = V_CVT_F32_U32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_U32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_U32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_F32_U32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_F32_U32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_U32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_U32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_U32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_CVT_F32_U32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = V_CVT_F32_U32_e32 2, implicit $exec, implicit $mode
@@ -593,12 +561,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_u32_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_CVT_U32_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_U32_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_U32_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_U32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_U32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_U32_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_U32_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_U32_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 2, implicit $exec, implicit $mode
@@ -614,12 +582,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_i32_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
@@ -635,12 +603,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_f32_f16_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_CVT_F32_F16_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_F32_F16_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_F32_F16_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_F32_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_F32_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_F32_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_F16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_F16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_F16_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 2, implicit $exec, implicit $mode
@@ -656,12 +624,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_rpi_i32_f32_e32
- ; GCN: renamable $vgpr0 = V_CVT_RPI_I32_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_CVT_RPI_I32_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_CVT_RPI_I32_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_RPI_I32_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_RPI_I32_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_RPI_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_RPI_I32_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_RPI_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_RPI_I32_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_RPI_I32_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_RPI_I32_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_RPI_I32_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_CVT_RPI_I32_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = V_CVT_RPI_I32_F32_e32 2, implicit $exec, implicit $mode
@@ -677,12 +645,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_flr_i32_f32_e32
- ; GCN: renamable $vgpr0 = V_CVT_FLR_I32_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_CVT_FLR_I32_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_CVT_FLR_I32_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_FLR_I32_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_FLR_I32_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_FLR_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_FLR_I32_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_FLR_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_FLR_I32_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_FLR_I32_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_FLR_I32_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_FLR_I32_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_CVT_FLR_I32_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = V_CVT_FLR_I32_F32_e32 2, implicit $exec, implicit $mode
@@ -698,12 +666,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_off_f32_i4_e32
- ; GCN: renamable $vgpr0 = V_CVT_OFF_F32_I4_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_CVT_OFF_F32_I4_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_CVT_OFF_F32_I4_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_OFF_F32_I4_e32_:%[0-9]+]]:vgpr_32 = V_CVT_OFF_F32_I4_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_OFF_F32_I4_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_OFF_F32_I4_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_OFF_F32_I4_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_OFF_F32_I4_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_OFF_F32_I4_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_OFF_F32_I4_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_OFF_F32_I4_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_CVT_OFF_F32_I4_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = V_CVT_OFF_F32_I4_e32 2, implicit $exec, implicit $mode
@@ -719,12 +687,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_f32_ubyte0_e32
- ; GCN: renamable $vgpr0 = V_CVT_F32_UBYTE0_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_UBYTE0_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_UBYTE0_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_F32_UBYTE0_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_UBYTE0_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_F32_UBYTE0_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_UBYTE0_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_F32_UBYTE0_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_F32_UBYTE0_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_UBYTE0_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_UBYTE0_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_UBYTE0_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_CVT_F32_UBYTE0_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = V_CVT_F32_UBYTE0_e32 2, implicit $exec, implicit $mode
@@ -740,12 +708,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_fract_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_FRACT_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FRACT_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FRACT_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_FRACT_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FRACT_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FRACT_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FRACT_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FRACT_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FRACT_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FRACT_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FRACT_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FRACT_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_FRACT_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_FRACT_F32_e32 2, implicit $exec, implicit $mode
@@ -761,12 +729,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_trunc_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_TRUNC_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_TRUNC_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_TRUNC_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_TRUNC_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_TRUNC_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_TRUNC_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_TRUNC_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_TRUNC_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_TRUNC_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_TRUNC_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_TRUNC_F32_e32 2, implicit $exec, implicit $mode
@@ -782,12 +750,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_ceil_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_CEIL_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CEIL_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CEIL_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CEIL_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CEIL_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CEIL_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CEIL_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CEIL_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CEIL_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_CEIL_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_CEIL_F32_e32 2, implicit $exec, implicit $mode
@@ -803,12 +771,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_rndne_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_RNDNE_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RNDNE_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RNDNE_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_RNDNE_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_RNDNE_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_RNDNE_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_RNDNE_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_RNDNE_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_RNDNE_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_RNDNE_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_RNDNE_F32_e32 2, implicit $exec, implicit $mode
@@ -824,12 +792,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_floor_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_FLOOR_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FLOOR_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FLOOR_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_FLOOR_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FLOOR_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FLOOR_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FLOOR_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FLOOR_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FLOOR_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_FLOOR_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_FLOOR_F32_e32 2, implicit $exec, implicit $mode
@@ -845,12 +813,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_exp_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_EXP_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_EXP_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_EXP_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_EXP_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_EXP_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_EXP_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_EXP_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_EXP_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_EXP_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_EXP_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_EXP_F32_e32 2, implicit $exec, implicit $mode
@@ -866,12 +834,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_log_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_LOG_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LOG_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_LOG_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_LOG_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_LOG_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_LOG_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LOG_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LOG_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LOG_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_LOG_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_LOG_F32_e32 2, implicit $exec, implicit $mode
@@ -887,12 +855,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_rcp_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_RCP_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RCP_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RCP_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_RCP_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_RCP_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_RCP_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_RCP_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_RCP_F32_e32 2, implicit $exec, implicit $mode
@@ -908,12 +876,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_rcp_iflag_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_RCP_IFLAG_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RCP_IFLAG_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RCP_IFLAG_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_RCP_IFLAG_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_RCP_IFLAG_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_RCP_IFLAG_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_IFLAG_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_IFLAG_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_IFLAG_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 2, implicit $exec, implicit $mode
@@ -929,12 +897,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_rsq_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_RSQ_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RSQ_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RSQ_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_RSQ_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_RSQ_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_RSQ_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_RSQ_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_RSQ_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_RSQ_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_RSQ_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_RSQ_F32_e32 2, implicit $exec, implicit $mode
@@ -950,12 +918,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_sqrt_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_SQRT_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SQRT_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SQRT_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_SQRT_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_SQRT_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_SQRT_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SQRT_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_SQRT_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SQRT_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SQRT_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SQRT_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SQRT_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_SQRT_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_SQRT_F32_e32 2, implicit $exec, implicit $mode
@@ -971,12 +939,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_rcp_f64_e32
- ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_RCP_F64_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_RCP_F64_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_RCP_F64_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_RCP_F64_e32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_RCP_F64_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_RCP_F64_e32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_RCP_F64_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_RCP_F64_e32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_RCP_F64_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_F64_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_F64_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_F64_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vreg_64_align2 = nofpexcept V_RCP_F64_e32 1, implicit $exec, implicit $mode
%1:vreg_64_align2 = nofpexcept V_RCP_F64_e32 2, implicit $exec, implicit $mode
@@ -992,12 +960,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_rsq_f64_e32
- ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_RSQ_F64_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_RSQ_F64_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_RSQ_F64_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_RSQ_F64_e32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_RSQ_F64_e32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_RSQ_F64_e32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_RSQ_F64_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_RSQ_F64_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_RSQ_F64_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 1, implicit $exec, implicit $mode
%1:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 2, implicit $exec, implicit $mode
@@ -1013,12 +981,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_sqrt_f64_e32
- ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_SQRT_F64_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_SQRT_F64_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_SQRT_F64_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_SQRT_F64_e32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_SQRT_F64_e32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_SQRT_F64_e32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SQRT_F64_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SQRT_F64_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SQRT_F64_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 1, implicit $exec, implicit $mode
%1:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 2, implicit $exec, implicit $mode
@@ -1034,12 +1002,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_sin_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_SIN_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SIN_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SIN_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_SIN_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_SIN_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_SIN_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SIN_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SIN_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SIN_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_SIN_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_SIN_F32_e32 2, implicit $exec, implicit $mode
@@ -1055,12 +1023,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cos_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_COS_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_COS_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_COS_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_COS_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_COS_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_COS_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_COS_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_COS_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_COS_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_COS_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_COS_F32_e32 2, implicit $exec, implicit $mode
@@ -1076,12 +1044,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_not_b32_e32
- ; GCN: renamable $vgpr0 = V_NOT_B32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_NOT_B32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_NOT_B32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_NOT_B32_e32_1:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_NOT_B32_e32_2:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_NOT_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_NOT_B32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_NOT_B32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_NOT_B32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = V_NOT_B32_e32 2, implicit $exec, implicit $mode
@@ -1097,12 +1065,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_bfrev_b32_e32
- ; GCN: renamable $vgpr0 = V_BFREV_B32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_BFREV_B32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_BFREV_B32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_BFREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_BFREV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_BFREV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_BFREV_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_BFREV_B32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_BFREV_B32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_BFREV_B32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = V_BFREV_B32_e32 2, implicit $exec, implicit $mode
@@ -1118,12 +1086,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_ffbh_u32_e32
- ; GCN: renamable $vgpr0 = V_FFBH_U32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_FFBH_U32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_FFBH_U32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_FFBH_U32_e32_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FFBH_U32_e32_1:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FFBH_U32_e32_2:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FFBH_U32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FFBH_U32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FFBH_U32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_FFBH_U32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = V_FFBH_U32_e32 2, implicit $exec, implicit $mode
@@ -1139,12 +1107,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_ffbl_b32_e32
- ; GCN: renamable $vgpr0 = V_FFBL_B32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_FFBL_B32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_FFBL_B32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_FFBL_B32_e32_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FFBL_B32_e32_1:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FFBL_B32_e32_2:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FFBL_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FFBL_B32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FFBL_B32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_FFBL_B32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = V_FFBL_B32_e32 2, implicit $exec, implicit $mode
@@ -1160,12 +1128,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_ffbh_i32_e32
- ; GCN: renamable $vgpr0 = V_FFBH_I32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_FFBH_I32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_FFBH_I32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_FFBH_I32_e32_:%[0-9]+]]:vgpr_32 = V_FFBH_I32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FFBH_I32_e32_1:%[0-9]+]]:vgpr_32 = V_FFBH_I32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FFBH_I32_e32_2:%[0-9]+]]:vgpr_32 = V_FFBH_I32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FFBH_I32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FFBH_I32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FFBH_I32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_FFBH_I32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = V_FFBH_I32_e32 2, implicit $exec, implicit $mode
@@ -1181,12 +1149,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_frexp_exp_i32_f64_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F64_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FREXP_EXP_I32_F64_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F64_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_FREXP_EXP_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FREXP_EXP_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FREXP_EXP_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_EXP_I32_F64_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_EXP_I32_F64_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_EXP_I32_F64_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 2, implicit $exec, implicit $mode
@@ -1202,12 +1170,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_frexp_mant_f64_e32
- ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FREXP_MANT_F64_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_FREXP_MANT_F64_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FREXP_MANT_F64_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_FREXP_MANT_F64_e32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FREXP_MANT_F64_e32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FREXP_MANT_F64_e32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_MANT_F64_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_MANT_F64_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_MANT_F64_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 1, implicit $exec, implicit $mode
%1:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 2, implicit $exec, implicit $mode
@@ -1223,12 +1191,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_fract_f64_e32
- ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FRACT_F64_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_FRACT_F64_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FRACT_F64_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_FRACT_F64_e32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FRACT_F64_e32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FRACT_F64_e32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FRACT_F64_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FRACT_F64_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FRACT_F64_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 1, implicit $exec, implicit $mode
%1:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 2, implicit $exec, implicit $mode
@@ -1244,12 +1212,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_frexp_exp_i32_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FREXP_EXP_I32_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_FREXP_EXP_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FREXP_EXP_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FREXP_EXP_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_EXP_I32_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_EXP_I32_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_EXP_I32_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 2, implicit $exec, implicit $mode
@@ -1265,12 +1233,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_frexp_mant_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_MANT_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FREXP_MANT_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FREXP_MANT_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_FREXP_MANT_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FREXP_MANT_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FREXP_MANT_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_MANT_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_MANT_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_MANT_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 2, implicit $exec, implicit $mode
@@ -1286,12 +1254,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_exp_legacy_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_EXP_LEGACY_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_EXP_LEGACY_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_EXP_LEGACY_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_EXP_LEGACY_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_EXP_LEGACY_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_EXP_LEGACY_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_EXP_LEGACY_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_EXP_LEGACY_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_EXP_LEGACY_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 2, implicit $exec, implicit $mode
@@ -1307,12 +1275,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_log_legacy_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_LOG_LEGACY_F32_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LOG_LEGACY_F32_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_LOG_LEGACY_F32_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_LOG_LEGACY_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_LOG_LEGACY_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_LOG_LEGACY_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LOG_LEGACY_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LOG_LEGACY_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LOG_LEGACY_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 2, implicit $exec, implicit $mode
@@ -1328,12 +1296,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_sat_pk_u8_i16_e32
- ; GCN: renamable $vgpr0 = V_SAT_PK_U8_I16_e32 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_SAT_PK_U8_I16_e32 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_SAT_PK_U8_I16_e32 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_SAT_PK_U8_I16_e32_:%[0-9]+]]:vgpr_32 = V_SAT_PK_U8_I16_e32 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_SAT_PK_U8_I16_e32_1:%[0-9]+]]:vgpr_32 = V_SAT_PK_U8_I16_e32 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_SAT_PK_U8_I16_e32_2:%[0-9]+]]:vgpr_32 = V_SAT_PK_U8_I16_e32 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SAT_PK_U8_I16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SAT_PK_U8_I16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SAT_PK_U8_I16_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_SAT_PK_U8_I16_e32 1, implicit $exec, implicit $mode
%1:vgpr_32 = V_SAT_PK_U8_I16_e32 2, implicit $exec, implicit $mode
@@ -1349,12 +1317,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_accvgpr_mov_b32
- ; GCN: renamable $agpr0 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
- ; GCN-NEXT: renamable $agpr1 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr1
- ; GCN-NEXT: renamable $agpr0 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0
+ ; GCN: [[V_ACCVGPR_MOV_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
+ ; GCN-NEXT: [[V_ACCVGPR_MOV_B32_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
+ ; GCN-NEXT: [[V_ACCVGPR_MOV_B32_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_MOV_B32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_MOV_B32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_MOV_B32_2]]
; GCN-NEXT: S_ENDPGM 0
%0:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
%1:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
@@ -1372,16 +1340,12 @@ machineFunctionInfo:
body: |
bb.0:
; GCN-LABEL: name: test_no_remat_v_cndmask_b32_e32
- ; GCN: renamable $vgpr0 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc
- ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 1, undef %1:vgpr_32, implicit $exec, implicit undef $vcc
+ ; GCN-NEXT: [[V_CNDMASK_B32_e32_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 1, undef %1:vgpr_32, implicit $exec, implicit undef $vcc
+ ; GCN-NEXT: [[V_CNDMASK_B32_e32_2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 1, undef %1:vgpr_32, implicit $exec, implicit undef $vcc
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CNDMASK_B32_e32 1, undef %0:vgpr_32, implicit $exec, implicit undef $vcc
%2:vgpr_32 = V_CNDMASK_B32_e32 1, undef %0:vgpr_32, implicit $exec, implicit undef $vcc
@@ -1399,16 +1363,12 @@ machineFunctionInfo:
body: |
bb.0:
; GCN-LABEL: name: test_no_remat_v_cndmask_b32_sdwa
- ; GCN: renamable $vgpr0 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
- ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CNDMASK_B32_sdwa:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
+ ; GCN-NEXT: [[V_CNDMASK_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
+ ; GCN-NEXT: [[V_CNDMASK_B32_sdwa2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_sdwa]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_sdwa1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_sdwa2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
%2:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
@@ -1426,16 +1386,12 @@ machineFunctionInfo:
body: |
bb.0:
; GCN-LABEL: name: test_no_remat_v_cndmask_b32_dpp
- ; GCN: renamable $vgpr0 = V_CNDMASK_B32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_dpp undef $vgpr1, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
- ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CNDMASK_B32_dpp:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_dpp undef [[V_CNDMASK_B32_dpp]], 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
+ ; GCN-NEXT: [[V_CNDMASK_B32_dpp1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_dpp undef [[V_CNDMASK_B32_dpp1]], 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
+ ; GCN-NEXT: [[V_CNDMASK_B32_dpp2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_dpp undef [[V_CNDMASK_B32_dpp2]], 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_dpp]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_dpp1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_dpp2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CNDMASK_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
%2:vgpr_32 = V_CNDMASK_B32_dpp undef %2:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
@@ -1451,12 +1407,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cndmask_b32_e64
- ; GCN: renamable $vgpr0 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
+ ; GCN-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
+ ; GCN-NEXT: [[V_CNDMASK_B32_e64_2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef %0:sreg_64_xexec, implicit $exec
%2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef %0:sreg_64_xexec, implicit $exec
@@ -1472,12 +1428,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_madmk_f32
- ; GCN: renamable $vgpr0 = nofpexcept V_MADMK_F32 1, 1, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADMK_F32 2, 2, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MADMK_F32 3, 3, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MADMK_F32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MADMK_F32 1, 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MADMK_F32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MADMK_F32 2, 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MADMK_F32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MADMK_F32 3, 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MADMK_F32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MADMK_F32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MADMK_F32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_MADMK_F32 1, 1, undef %0:vgpr_32, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_MADMK_F32 2, 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1493,12 +1449,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_add_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_ADD_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_ADD_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_ADD_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_ADD_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_ADD_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1514,12 +1470,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_add_f32_e64
- ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_e64 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -1535,12 +1491,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_add_f32_sdwa
- ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_ADD_F32_sdwa:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_ADD_F32_sdwa1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_ADD_F32_sdwa2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_sdwa]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_sdwa1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_sdwa2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
@@ -1558,16 +1514,12 @@ machineFunctionInfo:
body: |
bb.0:
; GCN-LABEL: name: test_no_remat_v_add_f32_dpp
- ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_dpp undef $vgpr1, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_ADD_F32_dpp:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_dpp undef [[V_ADD_F32_dpp]], 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_ADD_F32_dpp1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_dpp undef [[V_ADD_F32_dpp1]], 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_ADD_F32_dpp2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_dpp undef [[V_ADD_F32_dpp2]], 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_dpp]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_dpp1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_dpp2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %2:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode
@@ -1583,12 +1535,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_sub_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_SUB_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SUB_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_SUB_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_SUB_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_SUB_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_SUB_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_SUB_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1604,12 +1556,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_subrev_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_SUBREV_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SUBREV_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_SUBREV_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_SUBREV_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_SUBREV_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_SUBREV_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_SUBREV_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1625,12 +1577,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mul_legacy_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_MUL_LEGACY_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MUL_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MUL_LEGACY_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MUL_LEGACY_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MUL_LEGACY_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LEGACY_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LEGACY_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LEGACY_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1646,12 +1598,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mul_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_MUL_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MUL_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MUL_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MUL_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MUL_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_MUL_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_MUL_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1667,12 +1619,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mul_i32_i24_e32
- ; GCN: renamable $vgpr0 = V_MUL_I32_I24_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MUL_I32_I24_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MUL_I32_I24_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MUL_I32_I24_e32_:%[0-9]+]]:vgpr_32 = V_MUL_I32_I24_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MUL_I32_I24_e32_1:%[0-9]+]]:vgpr_32 = V_MUL_I32_I24_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MUL_I32_I24_e32_2:%[0-9]+]]:vgpr_32 = V_MUL_I32_I24_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_I32_I24_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_I32_I24_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_I32_I24_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MUL_I32_I24_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MUL_I32_I24_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1688,12 +1640,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mul_hi_i32_i24_e32
- ; GCN: renamable $vgpr0 = V_MUL_HI_I32_I24_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_I32_I24_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_I32_I24_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MUL_HI_I32_I24_e32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MUL_HI_I32_I24_e32_1:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MUL_HI_I32_I24_e32_2:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_I32_I24_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_I32_I24_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_I32_I24_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MUL_HI_I32_I24_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MUL_HI_I32_I24_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1709,12 +1661,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mul_u32_u24_e32
- ; GCN: renamable $vgpr0 = V_MUL_U32_U24_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MUL_U32_U24_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MUL_U32_U24_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MUL_U32_U24_e32_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MUL_U32_U24_e32_1:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MUL_U32_U24_e32_2:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_U32_U24_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_U32_U24_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_U32_U24_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MUL_U32_U24_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MUL_U32_U24_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1730,12 +1682,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mul_hi_u32_u24_e32
- ; GCN: renamable $vgpr0 = V_MUL_HI_U32_U24_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_U32_U24_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_U32_U24_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MUL_HI_U32_U24_e32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MUL_HI_U32_U24_e32_1:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MUL_HI_U32_U24_e32_2:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_U32_U24_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_U32_U24_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_U32_U24_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MUL_HI_U32_U24_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MUL_HI_U32_U24_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1751,12 +1703,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_min_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_MIN_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MIN_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MIN_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MIN_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MIN_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_MIN_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_MIN_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1772,12 +1724,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_max_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_MAX_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAX_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MAX_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MAX_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MAX_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_MAX_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_MAX_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1793,12 +1745,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_min_i32_e32
- ; GCN: renamable $vgpr0 = V_MIN_I32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MIN_I32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MIN_I32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MIN_I32_e32_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MIN_I32_e32_1:%[0-9]+]]:vgpr_32 = V_MIN_I32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MIN_I32_e32_2:%[0-9]+]]:vgpr_32 = V_MIN_I32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MIN_I32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MIN_I32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1814,12 +1766,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_max_i32_e32
- ; GCN: renamable $vgpr0 = V_MAX_I32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MAX_I32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MAX_I32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MAX_I32_e32_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MAX_I32_e32_1:%[0-9]+]]:vgpr_32 = V_MAX_I32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MAX_I32_e32_2:%[0-9]+]]:vgpr_32 = V_MAX_I32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MAX_I32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MAX_I32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1835,12 +1787,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_min_u32_e32
- ; GCN: renamable $vgpr0 = V_MIN_U32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MIN_U32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MIN_U32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MIN_U32_e32_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MIN_U32_e32_1:%[0-9]+]]:vgpr_32 = V_MIN_U32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MIN_U32_e32_2:%[0-9]+]]:vgpr_32 = V_MIN_U32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MIN_U32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MIN_U32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1856,12 +1808,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_max_u32_e32
- ; GCN: renamable $vgpr0 = V_MAX_U32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MAX_U32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MAX_U32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MAX_U32_e32_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MAX_U32_e32_1:%[0-9]+]]:vgpr_32 = V_MAX_U32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MAX_U32_e32_2:%[0-9]+]]:vgpr_32 = V_MAX_U32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MAX_U32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MAX_U32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1877,12 +1829,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_lshrrev_b32_e32
- ; GCN: renamable $vgpr0 = V_LSHRREV_B32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_LSHRREV_B32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_LSHRREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_LSHRREV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_LSHRREV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_LSHRREV_B32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_LSHRREV_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1898,12 +1850,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_lshlrev_b32_e32
- ; GCN: renamable $vgpr0 = V_LSHLREV_B32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_LSHLREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_LSHLREV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_LSHLREV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_LSHLREV_B32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_LSHLREV_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1919,12 +1871,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_ashrrev_i32_e32
- ; GCN: renamable $vgpr0 = V_ASHRREV_I32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_ASHRREV_I32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_ASHRREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_ASHRREV_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_ASHRREV_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_ASHRREV_I32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_ASHRREV_I32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1940,12 +1892,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_and_b32_e32
- ; GCN: renamable $vgpr0 = V_AND_B32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_AND_B32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_AND_B32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_AND_B32_e32_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_AND_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_AND_B32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_AND_B32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_AND_B32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_AND_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1961,12 +1913,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_or_b32_e32
- ; GCN: renamable $vgpr0 = V_OR_B32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_OR_B32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_OR_B32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_OR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_OR_B32_e32_2:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_OR_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_OR_B32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_OR_B32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_OR_B32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_OR_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -1982,12 +1934,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_xor_b32_e32
- ; GCN: renamable $vgpr0 = V_XOR_B32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_XOR_B32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_XOR_B32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_XOR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_XOR_B32_e32_2:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_XOR_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_XOR_B32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_XOR_B32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_XOR_B32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_XOR_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2003,12 +1955,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_madak_f32
- ; GCN: renamable $vgpr0 = nofpexcept V_MADAK_F32 1, undef $vgpr0, 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADAK_F32 2, undef $vgpr0, 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MADAK_F32 3, undef $vgpr0, 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MADAK_F32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MADAK_F32 1, undef %1:vgpr_32, 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MADAK_F32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MADAK_F32 2, undef %1:vgpr_32, 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MADAK_F32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MADAK_F32 3, undef %1:vgpr_32, 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MADAK_F32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MADAK_F32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MADAK_F32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_MADAK_F32 1, undef %0:vgpr_32, 1, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_MADAK_F32 2, undef %0:vgpr_32, 2, implicit $exec, implicit $mode
@@ -2024,12 +1976,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_add_u32_e32
- ; GCN: renamable $vgpr0 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_ADD_U32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_ADD_U32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_ADD_U32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_ADD_U32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2045,12 +1997,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_sub_u32_e32
- ; GCN: renamable $vgpr0 = V_SUB_U32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_SUB_U32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_SUB_U32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_SUB_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_SUB_U32_e32_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_SUB_U32_e32_2:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_SUB_U32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_SUB_U32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2066,12 +2018,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_subrev_u32_e32
- ; GCN: renamable $vgpr0 = V_SUBREV_U32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_SUBREV_U32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_SUBREV_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_SUBREV_U32_e32_1:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_SUBREV_U32_e32_2:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_SUBREV_U32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_SUBREV_U32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2087,12 +2039,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_bfm_b32_e32
- ; GCN: renamable $vgpr0 = V_BFM_B32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_BFM_B32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_BFM_B32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_BFM_B32_e32_:%[0-9]+]]:vgpr_32 = V_BFM_B32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_BFM_B32_e32_1:%[0-9]+]]:vgpr_32 = V_BFM_B32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_BFM_B32_e32_2:%[0-9]+]]:vgpr_32 = V_BFM_B32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_BFM_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_BFM_B32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_BFM_B32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_BFM_B32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_BFM_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2108,12 +2060,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_bcnt_u32_b32_e32
- ; GCN: renamable $vgpr0 = V_BCNT_U32_B32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_BCNT_U32_B32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_BCNT_U32_B32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_BCNT_U32_B32_e32_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_BCNT_U32_B32_e32_1:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_BCNT_U32_B32_e32_2:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_BCNT_U32_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_BCNT_U32_B32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_BCNT_U32_B32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_BCNT_U32_B32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_BCNT_U32_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2129,12 +2081,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mbcnt_lo_u32_b32_e32
- ; GCN: renamable $vgpr0 = V_MBCNT_LO_U32_B32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MBCNT_LO_U32_B32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MBCNT_LO_U32_B32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MBCNT_LO_U32_B32_e32_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MBCNT_LO_U32_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MBCNT_LO_U32_B32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MBCNT_LO_U32_B32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MBCNT_LO_U32_B32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MBCNT_LO_U32_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2150,12 +2102,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mbcnt_hi_u32_b32_e32
- ; GCN: renamable $vgpr0 = V_MBCNT_HI_U32_B32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MBCNT_HI_U32_B32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MBCNT_HI_U32_B32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MBCNT_HI_U32_B32_e32_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MBCNT_HI_U32_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MBCNT_HI_U32_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MBCNT_HI_U32_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MBCNT_HI_U32_B32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MBCNT_HI_U32_B32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MBCNT_HI_U32_B32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MBCNT_HI_U32_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2171,12 +2123,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_ldexp_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_LDEXP_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_LDEXP_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_LDEXP_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_LDEXP_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_LDEXP_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_LDEXP_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_LDEXP_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2192,12 +2144,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_pknorm_i16_f32_e32
- ; GCN: renamable $vgpr0 = V_CVT_PKNORM_I16_F32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_CVT_PKNORM_I16_F32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_CVT_PKNORM_I16_F32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_PKNORM_I16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKNORM_I16_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKNORM_I16_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKNORM_I16_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2213,12 +2165,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_pknorm_u16_f32_e32
- ; GCN: renamable $vgpr0 = V_CVT_PKNORM_U16_F32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_CVT_PKNORM_U16_F32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_CVT_PKNORM_U16_F32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_PKNORM_U16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKNORM_U16_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKNORM_U16_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKNORM_U16_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2234,12 +2186,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_pkrtz_f16_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_PKRTZ_F16_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_PKRTZ_F16_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_PKRTZ_F16_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKRTZ_F16_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKRTZ_F16_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKRTZ_F16_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2255,12 +2207,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_pk_u16_u32_e32
- ; GCN: renamable $vgpr0 = V_CVT_PK_U16_U32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_CVT_PK_U16_U32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_CVT_PK_U16_U32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_PK_U16_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_CVT_PK_U16_U32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_CVT_PK_U16_U32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_U16_U32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_U16_U32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_U16_U32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CVT_PK_U16_U32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_CVT_PK_U16_U32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2276,12 +2228,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_pk_i16_i32_e32
- ; GCN: renamable $vgpr0 = V_CVT_PK_I16_I32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_CVT_PK_I16_I32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_CVT_PK_I16_I32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_PK_I16_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_CVT_PK_I16_I32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_CVT_PK_I16_I32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_I16_I32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_I16_I32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_I16_I32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CVT_PK_I16_I32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_CVT_PK_I16_I32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2297,12 +2249,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_min_legacy_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_MIN_LEGACY_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MIN_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MIN_LEGACY_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MIN_LEGACY_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MIN_LEGACY_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_LEGACY_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_LEGACY_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_LEGACY_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2318,12 +2270,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_max_legacy_f32_e32
- ; GCN: renamable $vgpr0 = nofpexcept V_MAX_LEGACY_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAX_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MAX_LEGACY_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MAX_LEGACY_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MAX_LEGACY_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_LEGACY_F32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_LEGACY_F32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_LEGACY_F32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2339,12 +2291,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_lshr_b32_e32
- ; GCN: renamable $vgpr0 = V_LSHR_B32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_LSHR_B32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_LSHR_B32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_LSHR_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHR_B32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_LSHR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_LSHR_B32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_LSHR_B32_e32_2:%[0-9]+]]:vgpr_32 = V_LSHR_B32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHR_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHR_B32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHR_B32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_LSHR_B32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_LSHR_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2360,12 +2312,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_lshl_b32_e32
- ; GCN: renamable $vgpr0 = V_LSHL_B32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_LSHL_B32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_LSHL_B32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_LSHL_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHL_B32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_LSHL_B32_e32_1:%[0-9]+]]:vgpr_32 = V_LSHL_B32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_LSHL_B32_e32_2:%[0-9]+]]:vgpr_32 = V_LSHL_B32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_B32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_B32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_LSHL_B32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_LSHL_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2381,12 +2333,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_ashr_i32_e32
- ; GCN: renamable $vgpr0 = V_ASHR_I32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_ASHR_I32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_ASHR_I32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_ASHR_I32_e32_:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_ASHR_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_ASHR_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ASHR_I32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ASHR_I32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ASHR_I32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_ASHR_I32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_ASHR_I32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2402,12 +2354,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_xnor_b32_e32
- ; GCN: renamable $vgpr0 = V_XNOR_B32_e32 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_XNOR_B32_e32 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_XNOR_B32_e32 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_XNOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XNOR_B32_e32 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_XNOR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_XNOR_B32_e32 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_XNOR_B32_e32_2:%[0-9]+]]:vgpr_32 = V_XNOR_B32_e32 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_XNOR_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_XNOR_B32_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_XNOR_B32_e32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_XNOR_B32_e32 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_XNOR_B32_e32 2, undef %0:vgpr_32, implicit $exec
@@ -2423,12 +2375,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_fmamk_f32
- ; GCN: renamable $vgpr0 = nofpexcept V_FMAMK_F32 1, 1, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMAMK_F32 2, 2, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMAMK_F32 3, 3, undef $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_FMAMK_F32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAMK_F32 1, 1, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FMAMK_F32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAMK_F32 2, 2, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FMAMK_F32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAMK_F32 3, 3, undef %1:vgpr_32, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMAMK_F32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMAMK_F32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMAMK_F32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_FMAMK_F32 1, 1, undef %0:vgpr_32, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_FMAMK_F32 2, 2, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2444,12 +2396,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_fmaak_f32
- ; GCN: renamable $vgpr0 = nofpexcept V_FMAAK_F32 1, undef $vgpr0, 1, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMAAK_F32 2, undef $vgpr0, 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMAAK_F32 3, undef $vgpr0, 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_FMAAK_F32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAAK_F32 1, undef %1:vgpr_32, 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FMAAK_F32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAAK_F32 2, undef %1:vgpr_32, 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FMAAK_F32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAAK_F32 3, undef %1:vgpr_32, 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMAAK_F32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMAAK_F32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMAAK_F32_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_FMAAK_F32 1, undef %0:vgpr_32, 1, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_FMAAK_F32 2, undef %0:vgpr_32, 2, implicit $exec, implicit $mode
@@ -2465,12 +2417,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mad_legacy_f32_e64
- ; GCN: renamable $vgpr0 = nofpexcept V_MAD_LEGACY_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAD_LEGACY_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAD_LEGACY_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MAD_LEGACY_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MAD_LEGACY_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MAD_LEGACY_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_LEGACY_F32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_LEGACY_F32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_LEGACY_F32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2486,12 +2438,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mad_f32_e64
- ; GCN: renamable $vgpr0 = nofpexcept V_MAD_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAD_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAD_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MAD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MAD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_F32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_F32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_F32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2507,12 +2459,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_fma_legacy_f32_e64
- ; GCN: renamable $vgpr0 = nofpexcept V_FMA_LEGACY_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMA_LEGACY_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMA_LEGACY_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_FMA_LEGACY_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FMA_LEGACY_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FMA_LEGACY_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_LEGACY_F32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_LEGACY_F32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_LEGACY_F32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2528,12 +2480,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_fma_f32_e64
- ; GCN: renamable $vgpr0 = nofpexcept V_FMA_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMA_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMA_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FMA_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FMA_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_F32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_F32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_F32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2549,12 +2501,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mad_i32_i24_e64
- ; GCN: renamable $vgpr0 = V_MAD_I32_I24_e64 1, 1, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MAD_I32_I24_e64 2, 2, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 3, 3, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MAD_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MAD_I32_I24_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: [[V_MAD_I32_I24_e64_1:%[0-9]+]]:vgpr_32 = V_MAD_I32_I24_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: [[V_MAD_I32_I24_e64_2:%[0-9]+]]:vgpr_32 = V_MAD_I32_I24_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_I32_I24_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_I32_I24_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_I32_I24_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MAD_I32_I24_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
%2:vgpr_32 = V_MAD_I32_I24_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
@@ -2570,12 +2522,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mad_u32_u24_e64
- ; GCN: renamable $vgpr0 = V_MAD_U32_U24_e64 1, 1, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MAD_U32_U24_e64 2, 2, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MAD_U32_U24_e64 3, 3, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MAD_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MAD_U32_U24_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: [[V_MAD_U32_U24_e64_1:%[0-9]+]]:vgpr_32 = V_MAD_U32_U24_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: [[V_MAD_U32_U24_e64_2:%[0-9]+]]:vgpr_32 = V_MAD_U32_U24_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_U32_U24_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_U32_U24_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_U32_U24_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MAD_U32_U24_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
%2:vgpr_32 = V_MAD_U32_U24_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
@@ -2591,12 +2543,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_lerp_u8_e64
- ; GCN: renamable $vgpr0 = V_LERP_U8_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_LERP_U8_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_LERP_U8_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_LERP_U8_e64_:%[0-9]+]]:vgpr_32 = V_LERP_U8_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_LERP_U8_e64_1:%[0-9]+]]:vgpr_32 = V_LERP_U8_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_LERP_U8_e64_2:%[0-9]+]]:vgpr_32 = V_LERP_U8_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LERP_U8_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LERP_U8_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LERP_U8_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_LERP_U8_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_LERP_U8_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -2612,12 +2564,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_fma_f64_e64
- ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, 1, 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_FMA_F64_e64 0, 2, 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, 3, 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 1, 0, 1, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FMA_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 2, 0, 2, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FMA_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 3, 0, 3, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_F64_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_F64_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_F64_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 1, 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
%2:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 2, 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2633,12 +2585,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_add_f64_e64
- ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_ADD_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_ADD_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_ADD_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 1, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 2, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_ADD_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 3, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F64_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F64_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F64_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
%2:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2654,12 +2606,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mul_f64_e64
- ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MUL_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_MUL_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_MUL_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 1, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MUL_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 2, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MUL_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 3, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F64_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F64_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F64_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
%2:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2675,12 +2627,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_min_f64_e64
- ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MIN_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_MIN_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_MIN_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_MIN_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 1, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MIN_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 2, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MIN_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 3, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F64_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F64_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F64_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
%2:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2696,12 +2648,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_max_f64_e64
- ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MAX_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_MAX_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_MAX_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 1, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MAX_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 2, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MAX_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 3, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F64_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F64_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F64_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
%2:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2717,12 +2669,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mul_lo_u32_e64
- ; GCN: renamable $vgpr0 = V_MUL_LO_U32_e64 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U32_e64 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MUL_LO_U32_e64 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MUL_LO_U32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MUL_LO_U32_e64 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MUL_LO_U32_e64 2, undef %0:vgpr_32, implicit $exec
@@ -2738,12 +2690,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mul_hi_u32_e64
- ; GCN: renamable $vgpr0 = V_MUL_HI_U32_e64 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_U32_e64 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_U32_e64 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MUL_HI_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MUL_HI_U32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_U32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_U32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_U32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MUL_HI_U32_e64 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MUL_HI_U32_e64 2, undef %0:vgpr_32, implicit $exec
@@ -2759,12 +2711,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mul_lo_i32_e64
- ; GCN: renamable $vgpr0 = V_MUL_LO_I32_e64 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_I32_e64 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MUL_LO_I32_e64 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MUL_LO_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_I32_e64 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MUL_LO_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_I32_e64 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MUL_LO_I32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_I32_e64 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_I32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_I32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_I32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MUL_LO_I32_e64 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MUL_LO_I32_e64 2, undef %0:vgpr_32, implicit $exec
@@ -2780,12 +2732,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_mul_hi_i32_e64
- ; GCN: renamable $vgpr0 = V_MUL_HI_I32_e64 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_I32_e64 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_I32_e64 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MUL_HI_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MUL_HI_I32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_I32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_I32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_I32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MUL_HI_I32_e64 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MUL_HI_I32_e64 2, undef %0:vgpr_32, implicit $exec
@@ -2801,12 +2753,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cubeid_f32_e64
- ; GCN: renamable $vgpr0 = V_CUBEID_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_CUBEID_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_CUBEID_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CUBEID_F32_e64_:%[0-9]+]]:vgpr_32 = V_CUBEID_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CUBEID_F32_e64_1:%[0-9]+]]:vgpr_32 = V_CUBEID_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CUBEID_F32_e64_2:%[0-9]+]]:vgpr_32 = V_CUBEID_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CUBEID_F32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CUBEID_F32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CUBEID_F32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CUBEID_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
%2:vgpr_32 = V_CUBEID_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2822,12 +2774,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cubesc_f32_e64
- ; GCN: renamable $vgpr0 = V_CUBESC_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_CUBESC_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_CUBESC_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CUBESC_F32_e64_:%[0-9]+]]:vgpr_32 = V_CUBESC_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CUBESC_F32_e64_1:%[0-9]+]]:vgpr_32 = V_CUBESC_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CUBESC_F32_e64_2:%[0-9]+]]:vgpr_32 = V_CUBESC_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CUBESC_F32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CUBESC_F32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CUBESC_F32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CUBESC_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
%2:vgpr_32 = V_CUBESC_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2843,12 +2795,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cubetc_f32_e64
- ; GCN: renamable $vgpr0 = V_CUBETC_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_CUBETC_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_CUBETC_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CUBETC_F32_e64_:%[0-9]+]]:vgpr_32 = V_CUBETC_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CUBETC_F32_e64_1:%[0-9]+]]:vgpr_32 = V_CUBETC_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CUBETC_F32_e64_2:%[0-9]+]]:vgpr_32 = V_CUBETC_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CUBETC_F32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CUBETC_F32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CUBETC_F32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CUBETC_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
%2:vgpr_32 = V_CUBETC_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2864,12 +2816,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cubema_f32_e64
- ; GCN: renamable $vgpr0 = V_CUBEMA_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_CUBEMA_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_CUBEMA_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CUBEMA_F32_e64_:%[0-9]+]]:vgpr_32 = V_CUBEMA_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CUBEMA_F32_e64_1:%[0-9]+]]:vgpr_32 = V_CUBEMA_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CUBEMA_F32_e64_2:%[0-9]+]]:vgpr_32 = V_CUBEMA_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CUBEMA_F32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CUBEMA_F32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CUBEMA_F32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_CUBEMA_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
%2:vgpr_32 = V_CUBEMA_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2885,12 +2837,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_bfe_u32_e64
- ; GCN: renamable $vgpr0 = V_BFE_U32_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_BFE_U32_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_BFE_U32_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_BFE_U32_e64_1:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_BFE_U32_e64_2:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_BFE_U32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_BFE_U32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_BFE_U32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_BFE_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_BFE_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -2906,12 +2858,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_bfe_i32_e64
- ; GCN: renamable $vgpr0 = V_BFE_I32_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_BFE_I32_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_BFE_I32_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_BFE_I32_e64_1:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_BFE_I32_e64_2:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_BFE_I32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_BFE_I32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_BFE_I32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_BFE_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_BFE_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -2927,12 +2879,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_bfi_b32_e64
- ; GCN: renamable $vgpr0 = V_BFI_B32_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_BFI_B32_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_BFI_B32_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_BFI_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_BFI_B32_e64_1:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_BFI_B32_e64_2:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_BFI_B32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_BFI_B32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_BFI_B32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_BFI_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_BFI_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -2948,12 +2900,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_alignbit_b32_e64
- ; GCN: renamable $vgpr0 = V_ALIGNBIT_B32_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_ALIGNBIT_B32_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_ALIGNBIT_B32_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_ALIGNBIT_B32_e64_1:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_ALIGNBIT_B32_e64_2:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ALIGNBIT_B32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ALIGNBIT_B32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ALIGNBIT_B32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_ALIGNBIT_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_ALIGNBIT_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -2969,12 +2921,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_alignbyte_b32_e64
- ; GCN: renamable $vgpr0 = V_ALIGNBYTE_B32_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_ALIGNBYTE_B32_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_ALIGNBYTE_B32_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_ALIGNBYTE_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBYTE_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_ALIGNBYTE_B32_e64_1:%[0-9]+]]:vgpr_32 = V_ALIGNBYTE_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_ALIGNBYTE_B32_e64_2:%[0-9]+]]:vgpr_32 = V_ALIGNBYTE_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ALIGNBYTE_B32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ALIGNBYTE_B32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ALIGNBYTE_B32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_ALIGNBYTE_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_ALIGNBYTE_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -2990,12 +2942,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_min3_i32_e64
- ; GCN: renamable $vgpr0 = V_MIN3_I32_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MIN3_I32_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MIN3_I32_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MIN3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN3_I32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MIN3_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN3_I32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MIN3_I32_e64_2:%[0-9]+]]:vgpr_32 = V_MIN3_I32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_I32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_I32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_I32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MIN3_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MIN3_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3011,12 +2963,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_min3_u32_e64
- ; GCN: renamable $vgpr0 = V_MIN3_U32_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MIN3_U32_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MIN3_U32_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MIN3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN3_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MIN3_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN3_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MIN3_U32_e64_2:%[0-9]+]]:vgpr_32 = V_MIN3_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_U32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_U32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_U32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MIN3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MIN3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3032,12 +2984,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_max3_i32_e64
- ; GCN: renamable $vgpr0 = V_MAX3_I32_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MAX3_I32_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MAX3_I32_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MAX3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX3_I32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MAX3_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX3_I32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MAX3_I32_e64_2:%[0-9]+]]:vgpr_32 = V_MAX3_I32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_I32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_I32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_I32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MAX3_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MAX3_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3053,12 +3005,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_max3_u32_e64
- ; GCN: renamable $vgpr0 = V_MAX3_U32_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MAX3_U32_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MAX3_U32_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MAX3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX3_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MAX3_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX3_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MAX3_U32_e64_2:%[0-9]+]]:vgpr_32 = V_MAX3_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_U32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_U32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_U32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MAX3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MAX3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3074,12 +3026,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_med3_i32_e64
- ; GCN: renamable $vgpr0 = V_MED3_I32_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MED3_I32_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MED3_I32_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MED3_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MED3_I32_e64_2:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_I32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_I32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_I32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MED3_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MED3_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3095,12 +3047,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_med3_u32_e64
- ; GCN: renamable $vgpr0 = V_MED3_U32_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MED3_U32_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MED3_U32_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MED3_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_MED3_U32_e64_2:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_U32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_U32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_U32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MED3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MED3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3116,12 +3068,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_min3_f32_e64
- ; GCN: renamable $vgpr0 = V_MIN3_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_MIN3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MIN3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MIN3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MIN3_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MIN3_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN3_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MIN3_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MIN3_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_F32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_F32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_F32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MIN3_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
%2:vgpr_32 = V_MIN3_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3137,12 +3089,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_max3_f32_e64
- ; GCN: renamable $vgpr0 = V_MAX3_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_MAX3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MAX3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MAX3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX3_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MAX3_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX3_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MAX3_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MAX3_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_F32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_F32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_F32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MAX3_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
%2:vgpr_32 = V_MAX3_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3158,12 +3110,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_med3_f32_e64
- ; GCN: renamable $vgpr0 = V_MED3_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = V_MED3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MED3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MED3_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MED3_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_F32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_F32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_F32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MED3_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
%2:vgpr_32 = V_MED3_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3179,12 +3131,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_sad_u8_e64
- ; GCN: renamable $vgpr0 = V_SAD_U8_e64 1, 1, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_SAD_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_SAD_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_SAD_U8_e64_:%[0-9]+]]:vgpr_32 = V_SAD_U8_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: [[V_SAD_U8_e64_1:%[0-9]+]]:vgpr_32 = V_SAD_U8_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: [[V_SAD_U8_e64_2:%[0-9]+]]:vgpr_32 = V_SAD_U8_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U8_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U8_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U8_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_SAD_U8_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
%2:vgpr_32 = V_SAD_U8_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
@@ -3200,12 +3152,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_sad_hi_u8_e64
- ; GCN: renamable $vgpr0 = V_SAD_HI_U8_e64 1, 1, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_SAD_HI_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_SAD_HI_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_SAD_HI_U8_e64_:%[0-9]+]]:vgpr_32 = V_SAD_HI_U8_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: [[V_SAD_HI_U8_e64_1:%[0-9]+]]:vgpr_32 = V_SAD_HI_U8_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: [[V_SAD_HI_U8_e64_2:%[0-9]+]]:vgpr_32 = V_SAD_HI_U8_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_HI_U8_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_HI_U8_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_HI_U8_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_SAD_HI_U8_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
%2:vgpr_32 = V_SAD_HI_U8_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
@@ -3221,12 +3173,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_sad_u16_e64
- ; GCN: renamable $vgpr0 = V_SAD_U16_e64 1, 1, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_SAD_U16_e64 2, 2, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_SAD_U16_e64 3, 3, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_SAD_U16_e64_:%[0-9]+]]:vgpr_32 = V_SAD_U16_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: [[V_SAD_U16_e64_1:%[0-9]+]]:vgpr_32 = V_SAD_U16_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: [[V_SAD_U16_e64_2:%[0-9]+]]:vgpr_32 = V_SAD_U16_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U16_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U16_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U16_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_SAD_U16_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
%2:vgpr_32 = V_SAD_U16_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
@@ -3242,12 +3194,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_sad_u32_e64
- ; GCN: renamable $vgpr0 = V_SAD_U32_e64 1, 1, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_SAD_U32_e64 2, 2, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_SAD_U32_e64 3, 3, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_SAD_U32_e64_:%[0-9]+]]:vgpr_32 = V_SAD_U32_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: [[V_SAD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SAD_U32_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: [[V_SAD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SAD_U32_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_SAD_U32_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
%2:vgpr_32 = V_SAD_U32_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
@@ -3263,12 +3215,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_cvt_pk_u8_f32_e64
- ; GCN: renamable $vgpr0 = nofpexcept V_CVT_PK_U8_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_PK_U8_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_PK_U8_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_CVT_PK_U8_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_PK_U8_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_CVT_PK_U8_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_U8_F32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_U8_F32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_U8_F32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, implicit $exec, implicit $mode
@@ -3284,12 +3236,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_div_fixup_f64_e64
- ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_DIV_FIXUP_F64_e64 0, 1, 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_DIV_FIXUP_F64_e64 0, 2, 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_DIV_FIXUP_F64_e64 0, 3, 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_DIV_FIXUP_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 1, 0, 1, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_DIV_FIXUP_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 2, 0, 2, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_DIV_FIXUP_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 3, 0, 3, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_DIV_FIXUP_F64_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_DIV_FIXUP_F64_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_DIV_FIXUP_F64_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 1, 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
%2:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 2, 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -3305,12 +3257,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_ldexp_f64_e64
- ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_LDEXP_F64_e64 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_LDEXP_F64_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_LDEXP_F64_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_LDEXP_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_LDEXP_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F64_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F64_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F64_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
%2:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3326,12 +3278,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_msad_u8_e64
- ; GCN: renamable $vgpr0 = V_MSAD_U8_e64 1, 1, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_MSAD_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_MSAD_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_MSAD_U8_e64_:%[0-9]+]]:vgpr_32 = V_MSAD_U8_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: [[V_MSAD_U8_e64_1:%[0-9]+]]:vgpr_32 = V_MSAD_U8_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: [[V_MSAD_U8_e64_2:%[0-9]+]]:vgpr_32 = V_MSAD_U8_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MSAD_U8_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MSAD_U8_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MSAD_U8_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_MSAD_U8_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
%2:vgpr_32 = V_MSAD_U8_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
@@ -3347,12 +3299,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_trig_preop_f64_e64
- ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_TRIG_PREOP_F64_e64 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_TRIG_PREOP_F64_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_TRIG_PREOP_F64_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_TRIG_PREOP_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_TRIG_PREOP_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_TRIG_PREOP_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_TRIG_PREOP_F64_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_TRIG_PREOP_F64_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_TRIG_PREOP_F64_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
%2:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3368,12 +3320,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_lshlrev_b64_e64
- ; GCN: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 1, undef $vgpr0_vgpr1, implicit $exec
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_LSHLREV_B64_e64 2, undef $vgpr0_vgpr1, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 3, undef $vgpr0_vgpr1, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 1, undef %1:vreg_64_align2, implicit $exec
+ ; GCN-NEXT: [[V_LSHLREV_B64_e64_1:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 2, undef %1:vreg_64_align2, implicit $exec
+ ; GCN-NEXT: [[V_LSHLREV_B64_e64_2:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 3, undef %1:vreg_64_align2, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B64_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B64_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B64_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vreg_64_align2 = V_LSHLREV_B64_e64 1, undef %0:vreg_64_align2, implicit $exec
%2:vreg_64_align2 = V_LSHLREV_B64_e64 2, undef %0:vreg_64_align2, implicit $exec
@@ -3389,12 +3341,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_lshrrev_b64_e64
- ; GCN: renamable $vgpr0_vgpr1 = V_LSHRREV_B64_e64 1, undef $vgpr0_vgpr1, implicit $exec
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_LSHRREV_B64_e64 2, undef $vgpr0_vgpr1, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_LSHRREV_B64_e64 3, undef $vgpr0_vgpr1, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHRREV_B64_e64 1, undef %1:vreg_64_align2, implicit $exec
+ ; GCN-NEXT: [[V_LSHRREV_B64_e64_1:%[0-9]+]]:vreg_64_align2 = V_LSHRREV_B64_e64 2, undef %1:vreg_64_align2, implicit $exec
+ ; GCN-NEXT: [[V_LSHRREV_B64_e64_2:%[0-9]+]]:vreg_64_align2 = V_LSHRREV_B64_e64 3, undef %1:vreg_64_align2, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B64_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B64_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B64_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vreg_64_align2 = V_LSHRREV_B64_e64 1, undef %0:vreg_64_align2, implicit $exec
%2:vreg_64_align2 = V_LSHRREV_B64_e64 2, undef %0:vreg_64_align2, implicit $exec
@@ -3410,12 +3362,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_ashrrev_i64_e64
- ; GCN: renamable $vgpr0_vgpr1 = V_ASHRREV_I64_e64 1, undef $vgpr0_vgpr1, implicit $exec
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_ASHRREV_I64_e64 2, undef $vgpr0_vgpr1, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_ASHRREV_I64_e64 3, undef $vgpr0_vgpr1, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64_align2 = V_ASHRREV_I64_e64 1, undef %1:vreg_64_align2, implicit $exec
+ ; GCN-NEXT: [[V_ASHRREV_I64_e64_1:%[0-9]+]]:vreg_64_align2 = V_ASHRREV_I64_e64 2, undef %1:vreg_64_align2, implicit $exec
+ ; GCN-NEXT: [[V_ASHRREV_I64_e64_2:%[0-9]+]]:vreg_64_align2 = V_ASHRREV_I64_e64 3, undef %1:vreg_64_align2, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I64_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I64_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I64_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vreg_64_align2 = V_ASHRREV_I64_e64 1, undef %0:vreg_64_align2, implicit $exec
%2:vreg_64_align2 = V_ASHRREV_I64_e64 2, undef %0:vreg_64_align2, implicit $exec
@@ -3431,12 +3383,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_perm_b32_e64
- ; GCN: renamable $vgpr0 = V_PERM_B32_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_PERM_B32_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_PERM_B32_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_PERM_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_PERM_B32_e64_1:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_PERM_B32_e64_2:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PERM_B32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PERM_B32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PERM_B32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_PERM_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_PERM_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3452,12 +3404,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_add3_u32_e64
- ; GCN: renamable $vgpr0 = V_ADD3_U32_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_ADD3_U32_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_ADD3_U32_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_ADD3_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_ADD3_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD3_U32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD3_U32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD3_U32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_ADD3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_ADD3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3473,12 +3425,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_and_or_b32_e64
- ; GCN: renamable $vgpr0 = V_AND_OR_B32_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_AND_OR_B32_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_AND_OR_B32_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_AND_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_AND_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_AND_OR_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_AND_OR_B32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_AND_OR_B32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_AND_OR_B32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_AND_OR_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_AND_OR_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3494,12 +3446,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_or3_b32_e64
- ; GCN: renamable $vgpr0 = V_OR3_B32_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_OR3_B32_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_OR3_B32_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_OR3_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_OR3_B32_e64_2:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_OR3_B32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_OR3_B32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_OR3_B32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_OR3_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_OR3_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3515,12 +3467,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_xad_u32_e64
- ; GCN: renamable $vgpr0 = V_XAD_U32_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_XAD_U32_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_XAD_U32_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_XAD_U32_e64_:%[0-9]+]]:vgpr_32 = V_XAD_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_XAD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_XAD_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_XAD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_XAD_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_XAD_U32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_XAD_U32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_XAD_U32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_XAD_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_XAD_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3536,12 +3488,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_add_i32_e64
- ; GCN: renamable $vgpr0 = V_ADD_I32_e64 1, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_ADD_I32_e64 2, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_ADD_I32_e64 3, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e64 1, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: [[V_ADD_I32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_I32_e64 2, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: [[V_ADD_I32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_I32_e64 3, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_I32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_I32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_I32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_ADD_I32_e64 1, undef %0:vgpr_32, 0, implicit $exec
%2:vgpr_32 = V_ADD_I32_e64 2, undef %0:vgpr_32, 0, implicit $exec
@@ -3557,12 +3509,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_add_lshl_u32_e64
- ; GCN: renamable $vgpr0 = V_ADD_LSHL_U32_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_ADD_LSHL_U32_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_ADD_LSHL_U32_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_ADD_LSHL_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_LSHL_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_ADD_LSHL_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_LSHL_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_ADD_LSHL_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_LSHL_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_LSHL_U32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_LSHL_U32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_LSHL_U32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_ADD_LSHL_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_ADD_LSHL_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3578,12 +3530,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_sub_i32_e64
- ; GCN: renamable $vgpr0 = V_SUB_I32_e64 1, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_SUB_I32_e64 2, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_SUB_I32_e64 3, undef $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e64 1, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: [[V_SUB_I32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_I32_e64 2, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: [[V_SUB_I32_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_I32_e64 3, undef %1:vgpr_32, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_I32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_I32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_I32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_SUB_I32_e64 1, undef %0:vgpr_32, 0, implicit $exec
%2:vgpr_32 = V_SUB_I32_e64 2, undef %0:vgpr_32, 0, implicit $exec
@@ -3599,12 +3551,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_lshl_add_u32_e64
- ; GCN: renamable $vgpr0 = V_LSHL_ADD_U32_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_LSHL_ADD_U32_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_LSHL_ADD_U32_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_LSHL_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_LSHL_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_LSHL_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_ADD_U32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_ADD_U32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_ADD_U32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_LSHL_ADD_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_LSHL_ADD_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3620,12 +3572,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_v_lshl_or_b32_e64
- ; GCN: renamable $vgpr0 = V_LSHL_OR_B32_e64 1, 1, undef $vgpr0, implicit $exec
- ; GCN-NEXT: renamable $vgpr1 = V_LSHL_OR_B32_e64 2, 2, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr0 = V_LSHL_OR_B32_e64 3, 3, undef $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_LSHL_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: [[V_LSHL_OR_B32_e64_2:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_OR_B32_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_OR_B32_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_OR_B32_e64_2]]
; GCN-NEXT: S_ENDPGM 0
%1:vgpr_32 = V_LSHL_OR_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_LSHL_OR_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
@@ -3645,13 +3597,14 @@ body: |
; GCN-LABEL: name: test_remat_v_lshlrev_b16_e32
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e32 1, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e32 2, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e32 3, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_LSHLREV_B16_e32_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 1, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_LSHLREV_B16_e32_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 2, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_LSHLREV_B16_e32_2:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 3, [[COPY]], implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B16_e32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_LSHLREV_B16_e32 1, %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_LSHLREV_B16_e32 2, %0:vgpr_32, implicit $exec
@@ -3670,13 +3623,14 @@ body: |
; GCN-LABEL: name: test_remat_v_lshlrev_b16_e64
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e64 1, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e64 2, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e64 3, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 1, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_LSHLREV_B16_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 2, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_LSHLREV_B16_e64_2:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 3, [[COPY]], implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B16_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B16_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B16_e64_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_LSHLREV_B16_e64 1, %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_LSHLREV_B16_e64 2, %0:vgpr_32, implicit $exec
@@ -3696,13 +3650,14 @@ body: |
; GCN-LABEL: name: test_remat_v_lshrrev_b16_e32
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e32 1, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e32 2, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e32 3, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_LSHRREV_B16_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 1, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_LSHRREV_B16_e32_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 2, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_LSHRREV_B16_e32_2:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 3, [[COPY]], implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B16_e32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_LSHRREV_B16_e32 1, %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_LSHRREV_B16_e32 2, %0:vgpr_32, implicit $exec
@@ -3721,13 +3676,14 @@ body: |
; GCN-LABEL: name: test_remat_v_lshrrev_b16_e64
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e64 1, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e64 2, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e64 3, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 1, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_LSHRREV_B16_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 2, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_LSHRREV_B16_e64_2:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 3, [[COPY]], implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B16_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B16_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B16_e64_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_LSHRREV_B16_e64 1, %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_LSHRREV_B16_e64 2, %0:vgpr_32, implicit $exec
@@ -3747,13 +3703,14 @@ body: |
; GCN-LABEL: name: test_remat_v_ashrrev_i16_e32
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e32 1, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e32 2, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e32 3, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_ASHRREV_I16_e32_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e32 1, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_ASHRREV_I16_e32_1:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e32 2, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_ASHRREV_I16_e32_2:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e32 3, [[COPY]], implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I16_e32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_ASHRREV_I16_e32 1, %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_ASHRREV_I16_e32 2, %0:vgpr_32, implicit $exec
@@ -3772,13 +3729,14 @@ body: |
; GCN-LABEL: name: test_remat_v_ashrrev_i16_e64
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e64 1, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e64 2, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e64 3, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 1, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_ASHRREV_I16_e64_1:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 2, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_ASHRREV_I16_e64_2:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 3, [[COPY]], implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I16_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I16_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I16_e64_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_ASHRREV_I16_e64 1, %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_ASHRREV_I16_e64 2, %0:vgpr_32, implicit $exec
@@ -3798,13 +3756,14 @@ body: |
; GCN-LABEL: name: test_remat_v_add_u16_e32
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_ADD_U16_e32 1, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_ADD_U16_e32 2, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_ADD_U16_e32 3, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_ADD_U16_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e32 1, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_ADD_U16_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U16_e32 2, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_ADD_U16_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U16_e32 3, [[COPY]], implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U16_e32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_ADD_U16_e32 1, %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_ADD_U16_e32 2, %0:vgpr_32, implicit $exec
@@ -3824,13 +3783,14 @@ body: |
; GCN-LABEL: name: test_remat_v_add_u16_e64
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_ADD_U16_e64 1, $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_ADD_U16_e64 2, $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_ADD_U16_e64 3, $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 1, [[COPY]], 0, implicit $exec
+ ; GCN-NEXT: [[V_ADD_U16_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 2, [[COPY]], 0, implicit $exec
+ ; GCN-NEXT: [[V_ADD_U16_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 3, [[COPY]], 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U16_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U16_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U16_e64_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_ADD_U16_e64 1, %0:vgpr_32, 0, implicit $exec
%2:vgpr_32 = V_ADD_U16_e64 2, %0:vgpr_32, 0, implicit $exec
@@ -3850,13 +3810,14 @@ body: |
; GCN-LABEL: name: test_remat_v_sub_u16_e32
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_SUB_U16_e32 1, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_SUB_U16_e32 2, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_SUB_U16_e32 3, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_SUB_U16_e32_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e32 1, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_SUB_U16_e32_1:%[0-9]+]]:vgpr_32 = V_SUB_U16_e32 2, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_SUB_U16_e32_2:%[0-9]+]]:vgpr_32 = V_SUB_U16_e32 3, [[COPY]], implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U16_e32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_SUB_U16_e32 1, %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_SUB_U16_e32 2, %0:vgpr_32, implicit $exec
@@ -3876,13 +3837,14 @@ body: |
; GCN-LABEL: name: test_remat_v_sub_u16_e64
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_SUB_U16_e64 1, $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_SUB_U16_e64 2, $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_SUB_U16_e64 3, $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 1, [[COPY]], 0, implicit $exec
+ ; GCN-NEXT: [[V_SUB_U16_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 2, [[COPY]], 0, implicit $exec
+ ; GCN-NEXT: [[V_SUB_U16_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 3, [[COPY]], 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U16_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U16_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U16_e64_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_SUB_U16_e64 1, %0:vgpr_32, 0, implicit $exec
%2:vgpr_32 = V_SUB_U16_e64 2, %0:vgpr_32, 0, implicit $exec
@@ -3902,13 +3864,14 @@ body: |
; GCN-LABEL: name: test_remat_v_subrev_u16_e32
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U16_e32 1, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U16_e32 2, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U16_e32 3, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_SUBREV_U16_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_U16_e32 1, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_SUBREV_U16_e32_1:%[0-9]+]]:vgpr_32 = V_SUBREV_U16_e32 2, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_SUBREV_U16_e32_2:%[0-9]+]]:vgpr_32 = V_SUBREV_U16_e32 3, [[COPY]], implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U16_e32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_SUBREV_U16_e32 1, %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_SUBREV_U16_e32 2, %0:vgpr_32, implicit $exec
@@ -3928,13 +3891,14 @@ body: |
; GCN-LABEL: name: test_remat_v_subrev_u16_e64
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U16_e64 1, $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U16_e64 2, $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U16_e64 3, $vgpr0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_SUBREV_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUBREV_U16_e64 1, [[COPY]], 0, implicit $exec
+ ; GCN-NEXT: [[V_SUBREV_U16_e64_1:%[0-9]+]]:vgpr_32 = V_SUBREV_U16_e64 2, [[COPY]], 0, implicit $exec
+ ; GCN-NEXT: [[V_SUBREV_U16_e64_2:%[0-9]+]]:vgpr_32 = V_SUBREV_U16_e64 3, [[COPY]], 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U16_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U16_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U16_e64_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_SUBREV_U16_e64 1, %0:vgpr_32, 0, implicit $exec
%2:vgpr_32 = V_SUBREV_U16_e64 2, %0:vgpr_32, 0, implicit $exec
@@ -3954,13 +3918,14 @@ body: |
; GCN-LABEL: name: test_remat_v_min_u16_e32
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_MIN_U16_e32 1, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MIN_U16_e32 2, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MIN_U16_e32 3, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_MIN_U16_e32_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e32 1, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MIN_U16_e32_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e32 2, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MIN_U16_e32_2:%[0-9]+]]:vgpr_32 = V_MIN_U16_e32 3, [[COPY]], implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U16_e32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_MIN_U16_e32 1, %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MIN_U16_e32 2, %0:vgpr_32, implicit $exec
@@ -3980,13 +3945,14 @@ body: |
; GCN-LABEL: name: test_remat_v_min_u16_e64
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_MIN_U16_e64 1, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MIN_U16_e64 2, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MIN_U16_e64 3, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 1, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 2, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MIN_U16_e64_2:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 3, [[COPY]], implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U16_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U16_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U16_e64_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_MIN_U16_e64 1, %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MIN_U16_e64 2, %0:vgpr_32, implicit $exec
@@ -4006,13 +3972,14 @@ body: |
; GCN-LABEL: name: test_remat_v_max_u16_e32
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_MAX_U16_e32 1, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MAX_U16_e32 2, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MAX_U16_e32 3, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_MAX_U16_e32_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e32 1, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MAX_U16_e32_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e32 2, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MAX_U16_e32_2:%[0-9]+]]:vgpr_32 = V_MAX_U16_e32 3, [[COPY]], implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U16_e32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_MAX_U16_e32 1, %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MAX_U16_e32 2, %0:vgpr_32, implicit $exec
@@ -4032,13 +3999,14 @@ body: |
; GCN-LABEL: name: test_remat_v_max_u16_e64
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_MAX_U16_e64 1, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MAX_U16_e64 2, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MAX_U16_e64 3, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 1, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 2, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MAX_U16_e64_2:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 3, [[COPY]], implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U16_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U16_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U16_e64_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_MAX_U16_e64 1, %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MAX_U16_e64 2, %0:vgpr_32, implicit $exec
@@ -4058,13 +4026,14 @@ body: |
; GCN-LABEL: name: test_remat_v_min_i16_e32
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_MIN_I16_e32 1, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MIN_I16_e32 2, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MIN_I16_e32 3, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_MIN_I16_e32_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e32 1, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MIN_I16_e32_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e32 2, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MIN_I16_e32_2:%[0-9]+]]:vgpr_32 = V_MIN_I16_e32 3, [[COPY]], implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I16_e32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_MIN_I16_e32 1, %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MIN_I16_e32 2, %0:vgpr_32, implicit $exec
@@ -4084,13 +4053,14 @@ body: |
; GCN-LABEL: name: test_remat_v_min_i16_e64
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_MIN_I16_e64 1, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MIN_I16_e64 2, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MIN_I16_e64 3, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 1, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 2, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MIN_I16_e64_2:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 3, [[COPY]], implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I16_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I16_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I16_e64_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_MIN_I16_e64 1, %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MIN_I16_e64 2, %0:vgpr_32, implicit $exec
@@ -4110,13 +4080,14 @@ body: |
; GCN-LABEL: name: test_remat_v_max_i16_e32
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_MAX_I16_e32 1, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MAX_I16_e32 2, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MAX_I16_e32 3, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_MAX_I16_e32_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e32 1, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MAX_I16_e32_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e32 2, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MAX_I16_e32_2:%[0-9]+]]:vgpr_32 = V_MAX_I16_e32 3, [[COPY]], implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I16_e32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_MAX_I16_e32 1, %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MAX_I16_e32 2, %0:vgpr_32, implicit $exec
@@ -4136,13 +4107,14 @@ body: |
; GCN-LABEL: name: test_remat_v_max_i16_e64
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_MAX_I16_e64 1, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MAX_I16_e64 2, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MAX_I16_e64 3, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 1, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 2, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MAX_I16_e64_2:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 3, [[COPY]], implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I16_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I16_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I16_e64_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_MAX_I16_e64 1, %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MAX_I16_e64 2, %0:vgpr_32, implicit $exec
@@ -4162,13 +4134,14 @@ body: |
; GCN-LABEL: name: test_remat_v_mul_lo_u16_e32
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U16_e32 1, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U16_e32 2, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U16_e32 3, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_MUL_LO_U16_e32_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U16_e32 1, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MUL_LO_U16_e32_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U16_e32 2, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MUL_LO_U16_e32_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_U16_e32 3, [[COPY]], implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U16_e32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_MUL_LO_U16_e32 1, %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MUL_LO_U16_e32 2, %0:vgpr_32, implicit $exec
@@ -4188,13 +4161,14 @@ body: |
; GCN-LABEL: name: test_remat_v_mul_lo_u16_e64
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U16_e64 1, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U16_e64 2, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U16_e64 3, $vgpr0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_MUL_LO_U16_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U16_e64 1, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MUL_LO_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U16_e64 2, [[COPY]], implicit $exec
+ ; GCN-NEXT: [[V_MUL_LO_U16_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_U16_e64 3, [[COPY]], implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U16_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U16_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U16_e64_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_MUL_LO_U16_e64 1, %0:vgpr_32, implicit $exec
%2:vgpr_32 = V_MUL_LO_U16_e64 2, %0:vgpr_32, implicit $exec
@@ -4214,13 +4188,14 @@ body: |
; GCN-LABEL: name: test_remat_v_add_f16_e32
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F16_e32 1, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F16_e32 2, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F16_e32 3, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_ADD_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e32 1, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_ADD_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e32 2, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_ADD_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e32 3, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F16_e32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_ADD_F16_e32 1, %0:vgpr_32, implicit $mode, implicit $exec
%2:vgpr_32 = nofpexcept V_ADD_F16_e32 2, %0:vgpr_32, implicit $mode, implicit $exec
@@ -4240,13 +4215,14 @@ body: |
; GCN-LABEL: name: test_remat_v_add_f16_e64
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F16_e64 0, 1, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F16_e64 0, 2, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F16_e64 0, 3, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, 1, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_ADD_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, 2, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_ADD_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, 3, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F16_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F16_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F16_e64_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_ADD_F16_e64 0, 1, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
%2:vgpr_32 = nofpexcept V_ADD_F16_e64 0, 2, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
@@ -4266,13 +4242,14 @@ body: |
; GCN-LABEL: name: test_remat_v_sub_f16_e32
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F16_e32 1, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F16_e32 2, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F16_e32 3, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_SUB_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F16_e32 1, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_SUB_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F16_e32 2, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_SUB_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F16_e32 3, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F16_e32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_SUB_F16_e32 1, %0:vgpr_32, implicit $mode, implicit $exec
%2:vgpr_32 = nofpexcept V_SUB_F16_e32 2, %0:vgpr_32, implicit $mode, implicit $exec
@@ -4292,13 +4269,14 @@ body: |
; GCN-LABEL: name: test_remat_v_sub_f16_e64
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F16_e64 0, 1, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F16_e64 0, 2, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F16_e64 0, 3, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_SUB_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F16_e64 0, 1, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_SUB_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F16_e64 0, 2, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_SUB_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F16_e64 0, 3, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F16_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F16_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F16_e64_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_SUB_F16_e64 0, 1, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
%2:vgpr_32 = nofpexcept V_SUB_F16_e64 0, 2, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
@@ -4318,13 +4296,14 @@ body: |
; GCN-LABEL: name: test_remat_v_subrev_f16_e32
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F16_e32 1, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F16_e32 2, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F16_e32 3, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_SUBREV_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F16_e32 1, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_SUBREV_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F16_e32 2, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_SUBREV_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F16_e32 3, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F16_e32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_SUBREV_F16_e32 1, %0:vgpr_32, implicit $mode, implicit $exec
%2:vgpr_32 = nofpexcept V_SUBREV_F16_e32 2, %0:vgpr_32, implicit $mode, implicit $exec
@@ -4344,13 +4323,14 @@ body: |
; GCN-LABEL: name: test_remat_v_subrev_f16_e64
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F16_e64 0, 1, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F16_e64 0, 2, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F16_e64 0, 3, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_SUBREV_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F16_e64 0, 1, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_SUBREV_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F16_e64 0, 2, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_SUBREV_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F16_e64 0, 3, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F16_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F16_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F16_e64_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_SUBREV_F16_e64 0, 1, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
%2:vgpr_32 = nofpexcept V_SUBREV_F16_e64 0, 2, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
@@ -4370,13 +4350,14 @@ body: |
; GCN-LABEL: name: test_remat_v_mul_f16_e32
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F16_e32 1, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F16_e32 2, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F16_e32 3, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_MUL_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e32 1, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_MUL_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e32 2, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_MUL_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e32 3, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F16_e32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_MUL_F16_e32 1, %0:vgpr_32, implicit $mode, implicit $exec
%2:vgpr_32 = nofpexcept V_MUL_F16_e32 2, %0:vgpr_32, implicit $mode, implicit $exec
@@ -4396,13 +4377,14 @@ body: |
; GCN-LABEL: name: test_remat_v_mul_f16_e64
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F16_e64 0, 1, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F16_e64 0, 2, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F16_e64 0, 3, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_MUL_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 1, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_MUL_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 2, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_MUL_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 3, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F16_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F16_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F16_e64_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 1, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
%2:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 2, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
@@ -4422,13 +4404,14 @@ body: |
; GCN-LABEL: name: test_remat_v_ldexp_f16_e32
; GCN: liveins: $vgpr0, $vgpr1
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e32 1, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e32 1, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e32 1, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_LDEXP_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F16_e32 1, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_LDEXP_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F16_e32 1, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_LDEXP_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F16_e32 1, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F16_e32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_LDEXP_F16_e32 1, %0, implicit $mode, implicit $exec
%2:vgpr_32 = nofpexcept V_LDEXP_F16_e32 1, %0, implicit $mode, implicit $exec
@@ -4448,13 +4431,14 @@ body: |
; GCN-LABEL: name: test_remat_v_ldexp_f16_e64
; GCN: liveins: $vgpr0, $vgpr1
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e64 0, $vgpr0, 0, 1, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e64 0, $vgpr0, 0, 1, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e64 0, $vgpr0, 0, 1, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_LDEXP_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, 1, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_LDEXP_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, 1, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_LDEXP_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, 1, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F16_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F16_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F16_e64_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, %0, 0, 1, 0, 0, implicit $mode, implicit $exec
%2:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, %0, 0, 1, 0, 0, implicit $mode, implicit $exec
@@ -4474,13 +4458,14 @@ body: |
; GCN-LABEL: name: test_remat_v_min_f16_e32
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F16_e32 1, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F16_e32 2, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F16_e32 3, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_MIN_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e32 1, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_MIN_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e32 2, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_MIN_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e32 3, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F16_e32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_MIN_F16_e32 1, %0:vgpr_32, implicit $mode, implicit $exec
%2:vgpr_32 = nofpexcept V_MIN_F16_e32 2, %0:vgpr_32, implicit $mode, implicit $exec
@@ -4500,13 +4485,14 @@ body: |
; GCN-LABEL: name: test_remat_v_min_f16_e64
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F16_e64 0, 1, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F16_e64 0, 2, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F16_e64 0, 3, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, 1, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_MIN_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, 2, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_MIN_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, 3, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F16_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F16_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F16_e64_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_MIN_F16_e64 0, 1, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
%2:vgpr_32 = nofpexcept V_MIN_F16_e64 0, 2, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
@@ -4526,13 +4512,14 @@ body: |
; GCN-LABEL: name: test_remat_v_max_f16_e32
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F16_e32 1, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F16_e32 2, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F16_e32 3, $vgpr0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_MAX_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e32 1, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_MAX_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e32 2, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_MAX_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e32 3, [[COPY]], implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F16_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F16_e32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F16_e32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_MAX_F16_e32 1, %0:vgpr_32, implicit $mode, implicit $exec
%2:vgpr_32 = nofpexcept V_MAX_F16_e32 2, %0:vgpr_32, implicit $mode, implicit $exec
@@ -4552,13 +4539,14 @@ body: |
; GCN-LABEL: name: test_remat_v_max_f16_e64
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F16_e64 0, 1, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F16_e64 0, 2, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F16_e64 0, 3, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, 1, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_MAX_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, 2, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_MAX_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, 3, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F16_e64_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F16_e64_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F16_e64_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_MAX_F16_e64 0, 1, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
%2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, 2, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
@@ -4578,13 +4566,14 @@ body: |
; GCN-LABEL: name: test_remat_v_madak_f16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADAK_F16 1, $vgpr0, 1, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADAK_F16 2, $vgpr0, 2, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADAK_F16 3, $vgpr0, 3, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_MADAK_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_MADAK_F16 1, [[COPY]], 1, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MADAK_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MADAK_F16 2, [[COPY]], 2, implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MADAK_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MADAK_F16 3, [[COPY]], 3, implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MADAK_F16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MADAK_F16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MADAK_F16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_MADAK_F16 1, %0, 1, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_MADAK_F16 2, %0, 2, implicit $exec, implicit $mode
@@ -4604,13 +4593,14 @@ body: |
; GCN-LABEL: name: test_remat_v_madmk_f16
; GCN: liveins: $vgpr0, $vgpr1
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADMK_F16 1, 1, $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADMK_F16 2, 2, $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADMK_F16 3, 3, $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_MADMK_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_MADMK_F16 1, 1, [[COPY]], implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MADMK_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MADMK_F16 2, 2, [[COPY]], implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_MADMK_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MADMK_F16 3, 3, [[COPY]], implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MADMK_F16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MADMK_F16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MADMK_F16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_MADMK_F16 1, 1, %0, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_MADMK_F16 2, 2, %0, implicit $exec, implicit $mode
@@ -4630,13 +4620,14 @@ body: |
; GCN-LABEL: name: test_remat_v_fmamk_f16
; GCN: liveins: $vgpr0, $vgpr1
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMAMK_F16 1, 1, $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMAMK_F16 2, 2, $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMAMK_F16 3, 3, $vgpr0, implicit $exec, implicit $mode
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_FMAMK_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAMK_F16 1, 1, [[COPY]], implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FMAMK_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAMK_F16 2, 2, [[COPY]], implicit $exec, implicit $mode
+ ; GCN-NEXT: [[V_FMAMK_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAMK_F16 3, 3, [[COPY]], implicit $exec, implicit $mode
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMAMK_F16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMAMK_F16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMAMK_F16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_FMAMK_F16 1, 1, %0, implicit $exec, implicit $mode
%2:vgpr_32 = nofpexcept V_FMAMK_F16 2, 2, %0, implicit $exec, implicit $mode
@@ -4656,13 +4647,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_mad_i16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MAD_I16 8, $vgpr0, 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MAD_I16 9, $vgpr0, 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MAD_I16 10, $vgpr0, 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_MAD_I16_:%[0-9]+]]:vgpr_32 = V_PK_MAD_I16 8, [[COPY]], 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_MAD_I16_1:%[0-9]+]]:vgpr_32 = V_PK_MAD_I16 9, [[COPY]], 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_MAD_I16_2:%[0-9]+]]:vgpr_32 = V_PK_MAD_I16 10, [[COPY]], 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAD_I16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAD_I16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAD_I16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_PK_MAD_I16 8, %0, 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4683,13 +4675,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_mad_u16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MAD_U16 8, $vgpr0, 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MAD_U16 9, $vgpr0, 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MAD_U16 10, $vgpr0, 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_MAD_U16_:%[0-9]+]]:vgpr_32 = V_PK_MAD_U16 8, [[COPY]], 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_MAD_U16_1:%[0-9]+]]:vgpr_32 = V_PK_MAD_U16 9, [[COPY]], 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_MAD_U16_2:%[0-9]+]]:vgpr_32 = V_PK_MAD_U16 10, [[COPY]], 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAD_U16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAD_U16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAD_U16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_PK_MAD_U16 8, %0, 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4710,13 +4703,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_add_u16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_U16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_U16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_U16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_ADD_U16_:%[0-9]+]]:vgpr_32 = V_PK_ADD_U16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_ADD_U16_1:%[0-9]+]]:vgpr_32 = V_PK_ADD_U16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_ADD_U16_2:%[0-9]+]]:vgpr_32 = V_PK_ADD_U16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_U16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_U16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_U16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_PK_ADD_U16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4737,13 +4731,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_add_i16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_I16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_I16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_I16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_ADD_I16_:%[0-9]+]]:vgpr_32 = V_PK_ADD_I16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_ADD_I16_1:%[0-9]+]]:vgpr_32 = V_PK_ADD_I16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_ADD_I16_2:%[0-9]+]]:vgpr_32 = V_PK_ADD_I16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_I16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_I16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_I16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_PK_ADD_I16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4764,13 +4759,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_mul_lo_u16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MUL_LO_U16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MUL_LO_U16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MUL_LO_U16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_MUL_LO_U16_:%[0-9]+]]:vgpr_32 = V_PK_MUL_LO_U16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_MUL_LO_U16_1:%[0-9]+]]:vgpr_32 = V_PK_MUL_LO_U16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_MUL_LO_U16_2:%[0-9]+]]:vgpr_32 = V_PK_MUL_LO_U16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_LO_U16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_LO_U16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_LO_U16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_PK_MUL_LO_U16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4791,13 +4787,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_min_i16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MIN_I16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MIN_I16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MIN_I16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_MIN_I16_:%[0-9]+]]:vgpr_32 = V_PK_MIN_I16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_MIN_I16_1:%[0-9]+]]:vgpr_32 = V_PK_MIN_I16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_MIN_I16_2:%[0-9]+]]:vgpr_32 = V_PK_MIN_I16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_I16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_I16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_I16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_PK_MIN_I16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4818,13 +4815,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_max_i16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MAX_I16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MAX_I16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MAX_I16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_MAX_I16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_I16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_MAX_I16_1:%[0-9]+]]:vgpr_32 = V_PK_MAX_I16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_MAX_I16_2:%[0-9]+]]:vgpr_32 = V_PK_MAX_I16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_I16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_I16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_I16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_PK_MAX_I16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4845,13 +4843,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_min_u16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MIN_U16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MIN_U16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MIN_U16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_MIN_U16_:%[0-9]+]]:vgpr_32 = V_PK_MIN_U16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_MIN_U16_1:%[0-9]+]]:vgpr_32 = V_PK_MIN_U16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_MIN_U16_2:%[0-9]+]]:vgpr_32 = V_PK_MIN_U16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_U16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_U16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_U16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_PK_MIN_U16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4872,13 +4871,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_max_u16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MAX_U16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MAX_U16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_MAX_U16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_MAX_U16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_U16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_MAX_U16_1:%[0-9]+]]:vgpr_32 = V_PK_MAX_U16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_MAX_U16_2:%[0-9]+]]:vgpr_32 = V_PK_MAX_U16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_U16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_U16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_U16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_PK_MAX_U16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4899,13 +4899,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_sub_u16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_PK_SUB_U16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_SUB_U16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_SUB_U16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_SUB_U16_:%[0-9]+]]:vgpr_32 = V_PK_SUB_U16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_SUB_U16_1:%[0-9]+]]:vgpr_32 = V_PK_SUB_U16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_SUB_U16_2:%[0-9]+]]:vgpr_32 = V_PK_SUB_U16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_SUB_U16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_SUB_U16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_SUB_U16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_PK_SUB_U16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4926,13 +4927,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_sub_i16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_PK_SUB_I16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_SUB_I16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_SUB_I16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_SUB_I16_:%[0-9]+]]:vgpr_32 = V_PK_SUB_I16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_SUB_I16_1:%[0-9]+]]:vgpr_32 = V_PK_SUB_I16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_SUB_I16_2:%[0-9]+]]:vgpr_32 = V_PK_SUB_I16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_SUB_I16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_SUB_I16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_SUB_I16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_PK_SUB_I16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4953,13 +4955,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_lshlrev_b16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_PK_LSHLREV_B16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_LSHLREV_B16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_LSHLREV_B16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_LSHLREV_B16_1:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_LSHLREV_B16_2:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_LSHLREV_B16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_LSHLREV_B16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_LSHLREV_B16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_PK_LSHLREV_B16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -4980,13 +4983,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_ashrrev_i16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_PK_ASHRREV_I16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_ASHRREV_I16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_ASHRREV_I16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_ASHRREV_I16_1:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_ASHRREV_I16_2:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ASHRREV_I16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ASHRREV_I16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ASHRREV_I16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_PK_ASHRREV_I16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -5007,13 +5011,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_lshrrev_b16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_PK_LSHRREV_B16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_LSHRREV_B16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = V_PK_LSHRREV_B16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_LSHRREV_B16_1:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_LSHRREV_B16_2:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_LSHRREV_B16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_LSHRREV_B16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_LSHRREV_B16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_PK_LSHRREV_B16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec
@@ -5034,13 +5039,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_add_f16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_ADD_F16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_ADD_F16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_ADD_F16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_ADD_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_ADD_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_PK_ADD_F16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5064,19 +5070,14 @@ body: |
; GCN-LABEL: name: test_no_remat_v_pk_add_f16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_F16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_F16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_F16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
- ; GCN-NEXT: renamable $vgpr1 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = SI_SPILL_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = V_PK_ADD_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_ADD_F16_1:%[0-9]+]]:vgpr_32 = V_PK_ADD_F16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_ADD_F16_2:%[0-9]+]]:vgpr_32 = V_PK_ADD_F16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = V_PK_ADD_F16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5097,13 +5098,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_mul_f16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MUL_F16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MUL_F16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MUL_F16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_MUL_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_MUL_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_F16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_F16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_F16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_PK_MUL_F16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5124,13 +5126,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_min_f16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MIN_F16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MIN_F16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MIN_F16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_MIN_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MIN_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_MIN_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MIN_F16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_MIN_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MIN_F16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_F16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_F16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_F16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_PK_MIN_F16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5151,13 +5154,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_max_f16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MAX_F16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MAX_F16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MAX_F16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_MAX_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_MAX_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_F16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_F16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_F16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5178,13 +5182,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_fma_f16
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_FMA_F16 8, $vgpr0, 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_FMA_F16 9, $vgpr0, 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_FMA_F16 10, $vgpr0, 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_PK_FMA_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_FMA_F16 8, [[COPY]], 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_FMA_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_FMA_F16 9, [[COPY]], 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_FMA_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_FMA_F16 10, [[COPY]], 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F16_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F16_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F16_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_PK_FMA_F16 8, %0, 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
%2:vgpr_32 = nofpexcept V_PK_FMA_F16 9, %0, 9, %0, 9, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5205,13 +5210,14 @@ body: |
; GCN-LABEL: name: test_remat_v_mad_mix_f32
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAD_MIX_F32 8, $vgpr0, 8, $vgpr0, 8, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAD_MIX_F32 9, $vgpr0, 9, $vgpr0, 9, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAD_MIX_F32 10, $vgpr0, 10, $vgpr0, 10, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_MAD_MIX_F32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_MIX_F32 8, [[COPY]], 8, [[COPY]], 8, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_MAD_MIX_F32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_MIX_F32 9, [[COPY]], 9, [[COPY]], 9, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_MAD_MIX_F32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_MIX_F32 10, [[COPY]], 10, [[COPY]], 10, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_MIX_F32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_MIX_F32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_MIX_F32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_MAD_MIX_F32 8, %0, 8, %0, 8, %0, 0, 0, 0, implicit $mode, implicit $exec
%2:vgpr_32 = nofpexcept V_MAD_MIX_F32 9, %0, 9, %0, 9, %0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5231,13 +5237,14 @@ body: |
; GCN-LABEL: name: test_remat_v_fma_mix_f32
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMA_MIX_F32 8, $vgpr0, 8, $vgpr0, 8, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMA_MIX_F32 9, $vgpr0, 9, $vgpr0, 9, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMA_MIX_F32 10, $vgpr0, 10, $vgpr0, 10, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_FMA_MIX_F32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, [[COPY]], 8, [[COPY]], 8, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_FMA_MIX_F32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIX_F32 9, [[COPY]], 9, [[COPY]], 9, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_FMA_MIX_F32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIX_F32 10, [[COPY]], 10, [[COPY]], 10, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_MIX_F32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_MIX_F32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_MIX_F32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %0, 8, %0, 8, %0, 0, 0, 0, implicit $mode, implicit $exec
%2:vgpr_32 = nofpexcept V_FMA_MIX_F32 9, %0, 9, %0, 9, %0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5257,13 +5264,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_fma_f32
; GCN: liveins: $vgpr0_vgpr1
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+ ; GCN-NEXT: [[V_PK_FMA_F32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, [[COPY]], 8, [[COPY]], 11, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_FMA_F32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, [[COPY]], 8, [[COPY]], 11, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_FMA_F32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, [[COPY]], 8, [[COPY]], 11, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
%2:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5285,19 +5293,14 @@ body: |
; GCN-LABEL: name: test_no_remat_v_pk_fma_f32
; GCN: liveins: $vgpr0_vgpr1
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.1, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.1, align 4, addrspace 5)
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.2, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.2, align 4, addrspace 5)
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.2, align 4, addrspace 5)
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+ ; GCN-NEXT: [[V_PK_FMA_F32_:%[0-9]+]]:vreg_64_align2 = V_PK_FMA_F32 8, [[COPY]], 8, [[COPY]], 11, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_FMA_F32_1:%[0-9]+]]:vreg_64_align2 = V_PK_FMA_F32 8, [[COPY]], 8, [[COPY]], 11, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_FMA_F32_2:%[0-9]+]]:vreg_64_align2 = V_PK_FMA_F32 8, [[COPY]], 8, [[COPY]], 11, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:vreg_64_align2 = V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
%2:vreg_64_align2 = V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5317,13 +5320,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_mul_f32
; GCN: liveins: $vgpr0_vgpr1
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_MUL_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_MUL_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_MUL_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+ ; GCN-NEXT: [[V_PK_MUL_F32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_MUL_F32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_MUL_F32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_F32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_F32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_F32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
%2:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5343,13 +5347,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_add_f32
; GCN: liveins: $vgpr0_vgpr1
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_ADD_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_ADD_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_ADD_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+ ; GCN-NEXT: [[V_PK_ADD_F32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_ADD_F32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_PK_ADD_F32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
%2:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec
@@ -5369,13 +5374,14 @@ body: |
; GCN-LABEL: name: test_remat_v_pk_mov_b32
; GCN: liveins: $vgpr0_vgpr1
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_MOV_B32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_MOV_B32 9, $vgpr0_vgpr1, 9, $vgpr0_vgpr1, 12, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_MOV_B32 10, $vgpr0_vgpr1, 10, $vgpr0_vgpr1, 13, 0, 0, 0, 0, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
- ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1
+ ; GCN-NEXT: [[V_PK_MOV_B32_:%[0-9]+]]:vreg_64_align2 = V_PK_MOV_B32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_MOV_B32_1:%[0-9]+]]:vreg_64_align2 = V_PK_MOV_B32 9, [[COPY]], 9, [[COPY]], 12, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_PK_MOV_B32_2:%[0-9]+]]:vreg_64_align2 = V_PK_MOV_B32 10, [[COPY]], 10, [[COPY]], 13, 0, 0, 0, 0, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MOV_B32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MOV_B32_1]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MOV_B32_2]]
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]]
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:vreg_64_align2 = V_PK_MOV_B32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $exec
%2:vreg_64_align2 = V_PK_MOV_B32 9, %0, 9, %0, 12, 0, 0, 0, 0, implicit $exec
@@ -5395,12 +5401,12 @@ tracksRegLiveness: true
body: |
bb.0:
; GCN-LABEL: name: test_remat_subreg_def
- ; GCN: renamable $vgpr1 = V_MOV_B32_e32 1, implicit $exec
- ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
- ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
- ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 2, implicit $exec
- ; GCN-NEXT: S_NOP 0, implicit renamable $vgpr0_vgpr1
+ ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+ ; GCN-NEXT: undef [[V_MOV_B32_e32_1:%[0-9]+]].sub0:vreg_64 = V_MOV_B32_e32 2, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_2]]
+ ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]]
; GCN-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
undef %1.sub0:vreg_64 = V_MOV_B32_e32 2, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll b/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll
index ad82869c001f6f..7f8240eeb98ebf 100644
--- a/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll
@@ -8,9 +8,6 @@
define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 {
; CHECK-LABEL: kernel0:
; CHECK: ; %bb.0:
-; CHECK-NEXT: ; implicit-def: $vgpr23 : SGPR spill to VGPR lane
-; CHECK-NEXT: ;;#ASMSTART
-; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
@@ -22,46 +19,47 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 {
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s[2:3]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_writelane_b32 v23, s2, 0
+; CHECK-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
; CHECK-NEXT: s_load_dword s0, s[6:7], 0x8
-; CHECK-NEXT: v_writelane_b32 v23, s3, 1
+; CHECK-NEXT: v_writelane_b32 v22, s2, 0
+; CHECK-NEXT: v_writelane_b32 v22, s3, 1
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s[4:7]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_writelane_b32 v23, s4, 2
-; CHECK-NEXT: v_writelane_b32 v23, s5, 3
-; CHECK-NEXT: v_writelane_b32 v23, s6, 4
-; CHECK-NEXT: v_writelane_b32 v23, s7, 5
+; CHECK-NEXT: v_writelane_b32 v22, s4, 2
+; CHECK-NEXT: v_writelane_b32 v22, s5, 3
+; CHECK-NEXT: v_writelane_b32 v22, s6, 4
+; CHECK-NEXT: v_writelane_b32 v22, s7, 5
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s[4:11]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_writelane_b32 v23, s4, 6
-; CHECK-NEXT: v_writelane_b32 v23, s5, 7
-; CHECK-NEXT: v_writelane_b32 v23, s6, 8
-; CHECK-NEXT: v_writelane_b32 v23, s7, 9
-; CHECK-NEXT: v_writelane_b32 v23, s8, 10
-; CHECK-NEXT: v_writelane_b32 v23, s9, 11
-; CHECK-NEXT: v_writelane_b32 v23, s10, 12
-; CHECK-NEXT: v_writelane_b32 v23, s11, 13
+; CHECK-NEXT: v_writelane_b32 v22, s4, 6
+; CHECK-NEXT: v_writelane_b32 v22, s5, 7
+; CHECK-NEXT: v_writelane_b32 v22, s6, 8
+; CHECK-NEXT: v_writelane_b32 v22, s7, 9
+; CHECK-NEXT: v_writelane_b32 v22, s8, 10
+; CHECK-NEXT: v_writelane_b32 v22, s9, 11
+; CHECK-NEXT: v_writelane_b32 v22, s10, 12
+; CHECK-NEXT: v_writelane_b32 v22, s11, 13
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s[4:19]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_writelane_b32 v23, s4, 14
-; CHECK-NEXT: v_writelane_b32 v23, s5, 15
-; CHECK-NEXT: v_writelane_b32 v23, s6, 16
-; CHECK-NEXT: v_writelane_b32 v23, s7, 17
-; CHECK-NEXT: v_writelane_b32 v23, s8, 18
-; CHECK-NEXT: v_writelane_b32 v23, s9, 19
-; CHECK-NEXT: v_writelane_b32 v23, s10, 20
-; CHECK-NEXT: v_writelane_b32 v23, s11, 21
-; CHECK-NEXT: v_writelane_b32 v23, s12, 22
-; CHECK-NEXT: v_writelane_b32 v23, s13, 23
-; CHECK-NEXT: v_writelane_b32 v23, s14, 24
-; CHECK-NEXT: v_writelane_b32 v23, s15, 25
-; CHECK-NEXT: v_writelane_b32 v23, s16, 26
-; CHECK-NEXT: v_writelane_b32 v23, s17, 27
-; CHECK-NEXT: v_writelane_b32 v23, s18, 28
-; CHECK-NEXT: v_writelane_b32 v23, s19, 29
+; CHECK-NEXT: v_writelane_b32 v22, s4, 14
+; CHECK-NEXT: v_writelane_b32 v22, s5, 15
+; CHECK-NEXT: v_writelane_b32 v22, s6, 16
+; CHECK-NEXT: v_writelane_b32 v22, s7, 17
+; CHECK-NEXT: v_writelane_b32 v22, s8, 18
+; CHECK-NEXT: v_writelane_b32 v22, s9, 19
+; CHECK-NEXT: v_writelane_b32 v22, s10, 20
+; CHECK-NEXT: v_writelane_b32 v22, s11, 21
+; CHECK-NEXT: v_writelane_b32 v22, s12, 22
+; CHECK-NEXT: v_writelane_b32 v22, s13, 23
+; CHECK-NEXT: v_writelane_b32 v22, s14, 24
+; CHECK-NEXT: v_writelane_b32 v22, s15, 25
+; CHECK-NEXT: v_writelane_b32 v22, s16, 26
+; CHECK-NEXT: v_writelane_b32 v22, s17, 27
+; CHECK-NEXT: v_writelane_b32 v22, s18, 28
+; CHECK-NEXT: v_writelane_b32 v22, s19, 29
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s[42:43]
; CHECK-NEXT: ;;#ASMEND
@@ -71,14 +69,14 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 {
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s[4:11]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_writelane_b32 v23, s4, 30
-; CHECK-NEXT: v_writelane_b32 v23, s5, 31
-; CHECK-NEXT: v_writelane_b32 v23, s6, 32
-; CHECK-NEXT: v_writelane_b32 v23, s7, 33
-; CHECK-NEXT: v_writelane_b32 v23, s8, 34
-; CHECK-NEXT: v_writelane_b32 v23, s9, 35
-; CHECK-NEXT: v_writelane_b32 v23, s10, 36
-; CHECK-NEXT: v_writelane_b32 v23, s11, 37
+; CHECK-NEXT: v_writelane_b32 v22, s4, 30
+; CHECK-NEXT: v_writelane_b32 v22, s5, 31
+; CHECK-NEXT: v_writelane_b32 v22, s6, 32
+; CHECK-NEXT: v_writelane_b32 v22, s7, 33
+; CHECK-NEXT: v_writelane_b32 v22, s8, 34
+; CHECK-NEXT: v_writelane_b32 v22, s9, 35
+; CHECK-NEXT: v_writelane_b32 v22, s10, 36
+; CHECK-NEXT: v_writelane_b32 v22, s11, 37
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
; CHECK-NEXT: ;;#ASMSTART
@@ -96,161 +94,159 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 {
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s[0:15]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_writelane_b32 v23, s0, 38
-; CHECK-NEXT: v_writelane_b32 v23, s1, 39
-; CHECK-NEXT: v_writelane_b32 v23, s2, 40
-; CHECK-NEXT: v_writelane_b32 v23, s3, 41
-; CHECK-NEXT: v_writelane_b32 v23, s4, 42
-; CHECK-NEXT: v_writelane_b32 v23, s5, 43
-; CHECK-NEXT: v_writelane_b32 v23, s6, 44
-; CHECK-NEXT: v_writelane_b32 v23, s7, 45
-; CHECK-NEXT: v_writelane_b32 v23, s8, 46
-; CHECK-NEXT: v_writelane_b32 v23, s9, 47
-; CHECK-NEXT: v_writelane_b32 v23, s10, 48
-; CHECK-NEXT: v_writelane_b32 v23, s11, 49
-; CHECK-NEXT: v_writelane_b32 v23, s12, 50
-; CHECK-NEXT: v_writelane_b32 v23, s13, 51
-; CHECK-NEXT: v_writelane_b32 v23, s14, 52
-; CHECK-NEXT: v_writelane_b32 v23, s15, 53
+; CHECK-NEXT: v_writelane_b32 v22, s0, 38
+; CHECK-NEXT: v_writelane_b32 v22, s1, 39
+; CHECK-NEXT: v_writelane_b32 v22, s2, 40
+; CHECK-NEXT: v_writelane_b32 v22, s3, 41
+; CHECK-NEXT: v_writelane_b32 v22, s4, 42
+; CHECK-NEXT: v_writelane_b32 v22, s5, 43
+; CHECK-NEXT: v_writelane_b32 v22, s6, 44
+; CHECK-NEXT: v_writelane_b32 v22, s7, 45
+; CHECK-NEXT: v_writelane_b32 v22, s8, 46
+; CHECK-NEXT: v_writelane_b32 v22, s9, 47
+; CHECK-NEXT: v_writelane_b32 v22, s10, 48
+; CHECK-NEXT: v_writelane_b32 v22, s11, 49
+; CHECK-NEXT: v_writelane_b32 v22, s12, 50
+; CHECK-NEXT: v_writelane_b32 v22, s13, 51
+; CHECK-NEXT: v_writelane_b32 v22, s14, 52
+; CHECK-NEXT: v_writelane_b32 v22, s15, 53
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s[34:35]
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s[0:3]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_writelane_b32 v23, s0, 54
-; CHECK-NEXT: v_writelane_b32 v23, s1, 55
-; CHECK-NEXT: v_writelane_b32 v23, s2, 56
-; CHECK-NEXT: v_writelane_b32 v23, s3, 57
+; CHECK-NEXT: v_writelane_b32 v22, s0, 54
+; CHECK-NEXT: v_writelane_b32 v22, s1, 55
+; CHECK-NEXT: v_writelane_b32 v22, s2, 56
+; CHECK-NEXT: v_writelane_b32 v22, s3, 57
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s[0:7]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_writelane_b32 v23, s0, 58
-; CHECK-NEXT: v_writelane_b32 v23, s1, 59
-; CHECK-NEXT: v_writelane_b32 v23, s2, 60
-; CHECK-NEXT: ; implicit-def: $vgpr0
-; CHECK-NEXT: v_writelane_b32 v23, s3, 61
-; CHECK-NEXT: v_writelane_b32 v23, s4, 62
-; CHECK-NEXT: v_writelane_b32 v0, s6, 0
-; CHECK-NEXT: v_writelane_b32 v23, s5, 63
-; CHECK-NEXT: v_writelane_b32 v0, s7, 1
+; CHECK-NEXT: v_writelane_b32 v22, s0, 58
+; CHECK-NEXT: v_writelane_b32 v22, s1, 59
+; CHECK-NEXT: v_writelane_b32 v22, s2, 60
+; CHECK-NEXT: ; implicit-def: $vgpr23 : SGPR spill to VGPR lane
+; CHECK-NEXT: v_writelane_b32 v22, s3, 61
+; CHECK-NEXT: v_writelane_b32 v22, s4, 62
+; CHECK-NEXT: v_writelane_b32 v23, s6, 0
+; CHECK-NEXT: v_writelane_b32 v22, s5, 63
+; CHECK-NEXT: v_writelane_b32 v23, s7, 1
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s[0:15]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_writelane_b32 v0, s0, 2
-; CHECK-NEXT: v_writelane_b32 v0, s1, 3
-; CHECK-NEXT: v_writelane_b32 v0, s2, 4
-; CHECK-NEXT: v_writelane_b32 v0, s3, 5
-; CHECK-NEXT: v_writelane_b32 v0, s4, 6
-; CHECK-NEXT: v_writelane_b32 v0, s5, 7
-; CHECK-NEXT: v_writelane_b32 v0, s6, 8
-; CHECK-NEXT: v_writelane_b32 v0, s7, 9
-; CHECK-NEXT: v_writelane_b32 v0, s8, 10
-; CHECK-NEXT: v_writelane_b32 v0, s9, 11
-; CHECK-NEXT: v_writelane_b32 v0, s10, 12
-; CHECK-NEXT: v_writelane_b32 v0, s11, 13
-; CHECK-NEXT: v_writelane_b32 v0, s12, 14
-; CHECK-NEXT: v_writelane_b32 v0, s13, 15
-; CHECK-NEXT: v_writelane_b32 v0, s14, 16
-; CHECK-NEXT: v_writelane_b32 v0, s15, 17
+; CHECK-NEXT: v_writelane_b32 v23, s0, 2
+; CHECK-NEXT: v_writelane_b32 v23, s1, 3
+; CHECK-NEXT: v_writelane_b32 v23, s2, 4
+; CHECK-NEXT: v_writelane_b32 v23, s3, 5
+; CHECK-NEXT: v_writelane_b32 v23, s4, 6
+; CHECK-NEXT: v_writelane_b32 v23, s5, 7
+; CHECK-NEXT: v_writelane_b32 v23, s6, 8
+; CHECK-NEXT: v_writelane_b32 v23, s7, 9
+; CHECK-NEXT: v_writelane_b32 v23, s8, 10
+; CHECK-NEXT: v_writelane_b32 v23, s9, 11
+; CHECK-NEXT: v_writelane_b32 v23, s10, 12
+; CHECK-NEXT: v_writelane_b32 v23, s11, 13
+; CHECK-NEXT: v_writelane_b32 v23, s12, 14
+; CHECK-NEXT: v_writelane_b32 v23, s13, 15
+; CHECK-NEXT: v_writelane_b32 v23, s14, 16
+; CHECK-NEXT: v_writelane_b32 v23, s15, 17
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s[0:1]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_writelane_b32 v0, s0, 18
-; CHECK-NEXT: v_writelane_b32 v0, s1, 19
+; CHECK-NEXT: v_writelane_b32 v23, s0, 18
+; CHECK-NEXT: v_writelane_b32 v23, s1, 19
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s[0:3]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_writelane_b32 v0, s0, 20
-; CHECK-NEXT: v_writelane_b32 v0, s1, 21
-; CHECK-NEXT: v_writelane_b32 v0, s2, 22
-; CHECK-NEXT: v_writelane_b32 v0, s3, 23
+; CHECK-NEXT: v_writelane_b32 v23, s0, 20
+; CHECK-NEXT: v_writelane_b32 v23, s1, 21
+; CHECK-NEXT: v_writelane_b32 v23, s2, 22
+; CHECK-NEXT: v_writelane_b32 v23, s3, 23
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s[0:7]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_writelane_b32 v0, s0, 24
-; CHECK-NEXT: v_writelane_b32 v0, s1, 25
-; CHECK-NEXT: v_writelane_b32 v0, s2, 26
-; CHECK-NEXT: v_writelane_b32 v0, s3, 27
-; CHECK-NEXT: v_writelane_b32 v0, s4, 28
-; CHECK-NEXT: v_writelane_b32 v0, s5, 29
-; CHECK-NEXT: v_writelane_b32 v0, s6, 30
-; CHECK-NEXT: v_writelane_b32 v0, s7, 31
+; CHECK-NEXT: v_writelane_b32 v23, s0, 24
+; CHECK-NEXT: v_writelane_b32 v23, s1, 25
+; CHECK-NEXT: v_writelane_b32 v23, s2, 26
+; CHECK-NEXT: v_writelane_b32 v23, s3, 27
+; CHECK-NEXT: v_writelane_b32 v23, s4, 28
+; CHECK-NEXT: v_writelane_b32 v23, s5, 29
+; CHECK-NEXT: v_writelane_b32 v23, s6, 30
+; CHECK-NEXT: v_writelane_b32 v23, s7, 31
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def s[0:15]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_writelane_b32 v0, s0, 32
-; CHECK-NEXT: v_writelane_b32 v0, s1, 33
-; CHECK-NEXT: v_writelane_b32 v0, s2, 34
-; CHECK-NEXT: v_writelane_b32 v0, s3, 35
-; CHECK-NEXT: v_writelane_b32 v0, s4, 36
-; CHECK-NEXT: v_writelane_b32 v0, s5, 37
-; CHECK-NEXT: v_writelane_b32 v0, s6, 38
-; CHECK-NEXT: v_writelane_b32 v0, s7, 39
-; CHECK-NEXT: v_writelane_b32 v0, s8, 40
-; CHECK-NEXT: v_writelane_b32 v0, s9, 41
-; CHECK-NEXT: v_writelane_b32 v0, s10, 42
-; CHECK-NEXT: v_writelane_b32 v0, s11, 43
-; CHECK-NEXT: v_writelane_b32 v0, s12, 44
-; CHECK-NEXT: v_writelane_b32 v0, s13, 45
-; CHECK-NEXT: v_writelane_b32 v0, s14, 46
-; CHECK-NEXT: v_writelane_b32 v0, s15, 47
+; CHECK-NEXT: v_writelane_b32 v23, s0, 32
+; CHECK-NEXT: v_writelane_b32 v23, s1, 33
+; CHECK-NEXT: v_writelane_b32 v23, s2, 34
+; CHECK-NEXT: v_writelane_b32 v23, s3, 35
+; CHECK-NEXT: v_writelane_b32 v23, s4, 36
+; CHECK-NEXT: v_writelane_b32 v23, s5, 37
+; CHECK-NEXT: v_writelane_b32 v23, s6, 38
+; CHECK-NEXT: v_writelane_b32 v23, s7, 39
+; CHECK-NEXT: v_writelane_b32 v23, s8, 40
+; CHECK-NEXT: v_writelane_b32 v23, s9, 41
+; CHECK-NEXT: v_writelane_b32 v23, s10, 42
+; CHECK-NEXT: v_writelane_b32 v23, s11, 43
+; CHECK-NEXT: v_writelane_b32 v23, s12, 44
+; CHECK-NEXT: v_writelane_b32 v23, s13, 45
+; CHECK-NEXT: v_writelane_b32 v23, s14, 46
+; CHECK-NEXT: v_writelane_b32 v23, s15, 47
; CHECK-NEXT: s_cbranch_scc0 .LBB0_2
; CHECK-NEXT: ; %bb.1: ; %ret
-; CHECK-NEXT: ; kill: killed $vgpr23
-; CHECK-NEXT: ; kill: killed $vgpr0
; CHECK-NEXT: s_endpgm
; CHECK-NEXT: .LBB0_2: ; %bb0
-; CHECK-NEXT: v_readlane_b32 s0, v23, 0
-; CHECK-NEXT: v_readlane_b32 s1, v23, 1
+; CHECK-NEXT: v_readlane_b32 s0, v22, 0
+; CHECK-NEXT: v_readlane_b32 s1, v22, 1
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s[0:1]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_readlane_b32 s0, v23, 2
-; CHECK-NEXT: v_readlane_b32 s1, v23, 3
-; CHECK-NEXT: v_readlane_b32 s2, v23, 4
-; CHECK-NEXT: v_readlane_b32 s3, v23, 5
+; CHECK-NEXT: v_readlane_b32 s0, v22, 2
+; CHECK-NEXT: v_readlane_b32 s1, v22, 3
+; CHECK-NEXT: v_readlane_b32 s2, v22, 4
+; CHECK-NEXT: v_readlane_b32 s3, v22, 5
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s[0:3]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_readlane_b32 s0, v23, 6
-; CHECK-NEXT: v_readlane_b32 s1, v23, 7
-; CHECK-NEXT: v_readlane_b32 s2, v23, 8
-; CHECK-NEXT: v_readlane_b32 s3, v23, 9
-; CHECK-NEXT: v_readlane_b32 s4, v23, 10
-; CHECK-NEXT: v_readlane_b32 s5, v23, 11
-; CHECK-NEXT: v_readlane_b32 s6, v23, 12
-; CHECK-NEXT: v_readlane_b32 s7, v23, 13
+; CHECK-NEXT: v_readlane_b32 s0, v22, 6
+; CHECK-NEXT: v_readlane_b32 s1, v22, 7
+; CHECK-NEXT: v_readlane_b32 s2, v22, 8
+; CHECK-NEXT: v_readlane_b32 s3, v22, 9
+; CHECK-NEXT: v_readlane_b32 s4, v22, 10
+; CHECK-NEXT: v_readlane_b32 s5, v22, 11
+; CHECK-NEXT: v_readlane_b32 s6, v22, 12
+; CHECK-NEXT: v_readlane_b32 s7, v22, 13
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s[0:7]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_readlane_b32 s0, v23, 14
-; CHECK-NEXT: v_readlane_b32 s1, v23, 15
-; CHECK-NEXT: v_readlane_b32 s2, v23, 16
-; CHECK-NEXT: v_readlane_b32 s3, v23, 17
-; CHECK-NEXT: v_readlane_b32 s4, v23, 18
-; CHECK-NEXT: v_readlane_b32 s5, v23, 19
-; CHECK-NEXT: v_readlane_b32 s6, v23, 20
-; CHECK-NEXT: v_readlane_b32 s7, v23, 21
-; CHECK-NEXT: v_readlane_b32 s8, v23, 22
-; CHECK-NEXT: v_readlane_b32 s9, v23, 23
-; CHECK-NEXT: v_readlane_b32 s10, v23, 24
-; CHECK-NEXT: v_readlane_b32 s11, v23, 25
-; CHECK-NEXT: v_readlane_b32 s12, v23, 26
-; CHECK-NEXT: v_readlane_b32 s13, v23, 27
-; CHECK-NEXT: v_readlane_b32 s14, v23, 28
-; CHECK-NEXT: v_readlane_b32 s15, v23, 29
+; CHECK-NEXT: v_readlane_b32 s0, v22, 14
+; CHECK-NEXT: v_readlane_b32 s1, v22, 15
+; CHECK-NEXT: v_readlane_b32 s2, v22, 16
+; CHECK-NEXT: v_readlane_b32 s3, v22, 17
+; CHECK-NEXT: v_readlane_b32 s4, v22, 18
+; CHECK-NEXT: v_readlane_b32 s5, v22, 19
+; CHECK-NEXT: v_readlane_b32 s6, v22, 20
+; CHECK-NEXT: v_readlane_b32 s7, v22, 21
+; CHECK-NEXT: v_readlane_b32 s8, v22, 22
+; CHECK-NEXT: v_readlane_b32 s9, v22, 23
+; CHECK-NEXT: v_readlane_b32 s10, v22, 24
+; CHECK-NEXT: v_readlane_b32 s11, v22, 25
+; CHECK-NEXT: v_readlane_b32 s12, v22, 26
+; CHECK-NEXT: v_readlane_b32 s13, v22, 27
+; CHECK-NEXT: v_readlane_b32 s14, v22, 28
+; CHECK-NEXT: v_readlane_b32 s15, v22, 29
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s[0:15]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_readlane_b32 s0, v23, 30
-; CHECK-NEXT: v_readlane_b32 s1, v23, 31
-; CHECK-NEXT: v_readlane_b32 s2, v23, 32
-; CHECK-NEXT: v_readlane_b32 s3, v23, 33
-; CHECK-NEXT: v_readlane_b32 s4, v23, 34
-; CHECK-NEXT: v_readlane_b32 s5, v23, 35
-; CHECK-NEXT: v_readlane_b32 s6, v23, 36
-; CHECK-NEXT: v_readlane_b32 s7, v23, 37
+; CHECK-NEXT: v_readlane_b32 s0, v22, 30
+; CHECK-NEXT: v_readlane_b32 s1, v22, 31
+; CHECK-NEXT: v_readlane_b32 s2, v22, 32
+; CHECK-NEXT: v_readlane_b32 s3, v22, 33
+; CHECK-NEXT: v_readlane_b32 s4, v22, 34
+; CHECK-NEXT: v_readlane_b32 s5, v22, 35
+; CHECK-NEXT: v_readlane_b32 s6, v22, 36
+; CHECK-NEXT: v_readlane_b32 s7, v22, 37
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s[42:43]
; CHECK-NEXT: ;;#ASMEND
@@ -260,10 +256,10 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 {
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s[0:7]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_readlane_b32 s0, v23, 38
-; CHECK-NEXT: v_readlane_b32 s1, v23, 39
-; CHECK-NEXT: v_readlane_b32 s2, v23, 40
-; CHECK-NEXT: v_readlane_b32 s3, v23, 41
+; CHECK-NEXT: v_readlane_b32 s0, v22, 38
+; CHECK-NEXT: v_readlane_b32 s1, v22, 39
+; CHECK-NEXT: v_readlane_b32 s2, v22, 40
+; CHECK-NEXT: v_readlane_b32 s3, v22, 41
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s[16:31]
; CHECK-NEXT: ;;#ASMEND
@@ -276,111 +272,108 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 {
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s[44:51]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_readlane_b32 s4, v23, 42
-; CHECK-NEXT: v_readlane_b32 s5, v23, 43
-; CHECK-NEXT: v_readlane_b32 s6, v23, 44
-; CHECK-NEXT: v_readlane_b32 s7, v23, 45
-; CHECK-NEXT: v_readlane_b32 s8, v23, 46
-; CHECK-NEXT: v_readlane_b32 s9, v23, 47
-; CHECK-NEXT: v_readlane_b32 s10, v23, 48
-; CHECK-NEXT: v_readlane_b32 s11, v23, 49
-; CHECK-NEXT: v_readlane_b32 s12, v23, 50
-; CHECK-NEXT: v_readlane_b32 s13, v23, 51
-; CHECK-NEXT: v_readlane_b32 s14, v23, 52
-; CHECK-NEXT: v_readlane_b32 s15, v23, 53
+; CHECK-NEXT: v_readlane_b32 s4, v22, 42
+; CHECK-NEXT: v_readlane_b32 s5, v22, 43
+; CHECK-NEXT: v_readlane_b32 s6, v22, 44
+; CHECK-NEXT: v_readlane_b32 s7, v22, 45
+; CHECK-NEXT: v_readlane_b32 s8, v22, 46
+; CHECK-NEXT: v_readlane_b32 s9, v22, 47
+; CHECK-NEXT: v_readlane_b32 s10, v22, 48
+; CHECK-NEXT: v_readlane_b32 s11, v22, 49
+; CHECK-NEXT: v_readlane_b32 s12, v22, 50
+; CHECK-NEXT: v_readlane_b32 s13, v22, 51
+; CHECK-NEXT: v_readlane_b32 s14, v22, 52
+; CHECK-NEXT: v_readlane_b32 s15, v22, 53
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s[0:15]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_readlane_b32 s0, v23, 54
-; CHECK-NEXT: v_readlane_b32 s1, v23, 55
-; CHECK-NEXT: v_readlane_b32 s2, v23, 56
-; CHECK-NEXT: v_readlane_b32 s3, v23, 57
+; CHECK-NEXT: v_readlane_b32 s0, v22, 54
+; CHECK-NEXT: v_readlane_b32 s1, v22, 55
+; CHECK-NEXT: v_readlane_b32 s2, v22, 56
+; CHECK-NEXT: v_readlane_b32 s3, v22, 57
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s[34:35]
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s[0:3]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_readlane_b32 s0, v23, 58
-; CHECK-NEXT: v_readlane_b32 s1, v23, 59
-; CHECK-NEXT: v_readlane_b32 s2, v23, 60
-; CHECK-NEXT: v_readlane_b32 s3, v23, 61
-; CHECK-NEXT: v_readlane_b32 s4, v23, 62
-; CHECK-NEXT: v_readlane_b32 s5, v23, 63
-; CHECK-NEXT: v_readlane_b32 s6, v0, 0
-; CHECK-NEXT: v_readlane_b32 s7, v0, 1
+; CHECK-NEXT: v_readlane_b32 s0, v22, 58
+; CHECK-NEXT: v_readlane_b32 s1, v22, 59
+; CHECK-NEXT: v_readlane_b32 s2, v22, 60
+; CHECK-NEXT: v_readlane_b32 s3, v22, 61
+; CHECK-NEXT: v_readlane_b32 s4, v22, 62
+; CHECK-NEXT: v_readlane_b32 s5, v22, 63
+; CHECK-NEXT: v_readlane_b32 s6, v23, 0
+; CHECK-NEXT: v_readlane_b32 s7, v23, 1
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s[0:7]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_readlane_b32 s0, v0, 2
-; CHECK-NEXT: v_readlane_b32 s1, v0, 3
-; CHECK-NEXT: v_readlane_b32 s2, v0, 4
-; CHECK-NEXT: v_readlane_b32 s3, v0, 5
-; CHECK-NEXT: v_readlane_b32 s4, v0, 6
-; CHECK-NEXT: v_readlane_b32 s5, v0, 7
-; CHECK-NEXT: v_readlane_b32 s6, v0, 8
-; CHECK-NEXT: v_readlane_b32 s7, v0, 9
-; CHECK-NEXT: v_readlane_b32 s8, v0, 10
-; CHECK-NEXT: v_readlane_b32 s9, v0, 11
-; CHECK-NEXT: v_readlane_b32 s10, v0, 12
-; CHECK-NEXT: v_readlane_b32 s11, v0, 13
-; CHECK-NEXT: v_readlane_b32 s12, v0, 14
-; CHECK-NEXT: v_readlane_b32 s13, v0, 15
-; CHECK-NEXT: v_readlane_b32 s14, v0, 16
-; CHECK-NEXT: v_readlane_b32 s15, v0, 17
+; CHECK-NEXT: v_readlane_b32 s0, v23, 2
+; CHECK-NEXT: v_readlane_b32 s1, v23, 3
+; CHECK-NEXT: v_readlane_b32 s2, v23, 4
+; CHECK-NEXT: v_readlane_b32 s3, v23, 5
+; CHECK-NEXT: v_readlane_b32 s4, v23, 6
+; CHECK-NEXT: v_readlane_b32 s5, v23, 7
+; CHECK-NEXT: v_readlane_b32 s6, v23, 8
+; CHECK-NEXT: v_readlane_b32 s7, v23, 9
+; CHECK-NEXT: v_readlane_b32 s8, v23, 10
+; CHECK-NEXT: v_readlane_b32 s9, v23, 11
+; CHECK-NEXT: v_readlane_b32 s10, v23, 12
+; CHECK-NEXT: v_readlane_b32 s11, v23, 13
+; CHECK-NEXT: v_readlane_b32 s12, v23, 14
+; CHECK-NEXT: v_readlane_b32 s13, v23, 15
+; CHECK-NEXT: v_readlane_b32 s14, v23, 16
+; CHECK-NEXT: v_readlane_b32 s15, v23, 17
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s[0:15]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_readlane_b32 s0, v0, 18
-; CHECK-NEXT: v_readlane_b32 s1, v0, 19
+; CHECK-NEXT: v_readlane_b32 s0, v23, 18
+; CHECK-NEXT: v_readlane_b32 s1, v23, 19
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s[0:1]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_readlane_b32 s0, v0, 20
-; CHECK-NEXT: v_readlane_b32 s1, v0, 21
-; CHECK-NEXT: v_readlane_b32 s2, v0, 22
-; CHECK-NEXT: v_readlane_b32 s3, v0, 23
+; CHECK-NEXT: v_readlane_b32 s0, v23, 20
+; CHECK-NEXT: v_readlane_b32 s1, v23, 21
+; CHECK-NEXT: v_readlane_b32 s2, v23, 22
+; CHECK-NEXT: v_readlane_b32 s3, v23, 23
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s[0:3]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_readlane_b32 s0, v0, 24
-; CHECK-NEXT: v_readlane_b32 s1, v0, 25
-; CHECK-NEXT: v_readlane_b32 s2, v0, 26
-; CHECK-NEXT: v_readlane_b32 s3, v0, 27
-; CHECK-NEXT: v_readlane_b32 s4, v0, 28
-; CHECK-NEXT: v_readlane_b32 s5, v0, 29
-; CHECK-NEXT: v_readlane_b32 s6, v0, 30
-; CHECK-NEXT: v_readlane_b32 s7, v0, 31
+; CHECK-NEXT: v_readlane_b32 s0, v23, 24
+; CHECK-NEXT: v_readlane_b32 s1, v23, 25
+; CHECK-NEXT: v_readlane_b32 s2, v23, 26
+; CHECK-NEXT: v_readlane_b32 s3, v23, 27
+; CHECK-NEXT: v_readlane_b32 s4, v23, 28
+; CHECK-NEXT: v_readlane_b32 s5, v23, 29
+; CHECK-NEXT: v_readlane_b32 s6, v23, 30
+; CHECK-NEXT: v_readlane_b32 s7, v23, 31
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s[0:7]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: v_readlane_b32 s0, v0, 32
-; CHECK-NEXT: v_readlane_b32 s1, v0, 33
-; CHECK-NEXT: v_readlane_b32 s2, v0, 34
-; CHECK-NEXT: v_readlane_b32 s3, v0, 35
-; CHECK-NEXT: v_readlane_b32 s4, v0, 36
-; CHECK-NEXT: v_readlane_b32 s5, v0, 37
-; CHECK-NEXT: v_readlane_b32 s6, v0, 38
-; CHECK-NEXT: v_readlane_b32 s7, v0, 39
-; CHECK-NEXT: v_readlane_b32 s8, v0, 40
-; CHECK-NEXT: v_readlane_b32 s9, v0, 41
-; CHECK-NEXT: v_readlane_b32 s10, v0, 42
-; CHECK-NEXT: v_readlane_b32 s11, v0, 43
-; CHECK-NEXT: v_readlane_b32 s12, v0, 44
-; CHECK-NEXT: v_readlane_b32 s13, v0, 45
-; CHECK-NEXT: v_readlane_b32 s14, v0, 46
-; CHECK-NEXT: v_readlane_b32 s15, v0, 47
+; CHECK-NEXT: v_readlane_b32 s0, v23, 32
+; CHECK-NEXT: v_readlane_b32 s1, v23, 33
+; CHECK-NEXT: v_readlane_b32 s2, v23, 34
+; CHECK-NEXT: v_readlane_b32 s3, v23, 35
+; CHECK-NEXT: v_readlane_b32 s4, v23, 36
+; CHECK-NEXT: v_readlane_b32 s5, v23, 37
+; CHECK-NEXT: v_readlane_b32 s6, v23, 38
+; CHECK-NEXT: v_readlane_b32 s7, v23, 39
+; CHECK-NEXT: v_readlane_b32 s8, v23, 40
+; CHECK-NEXT: v_readlane_b32 s9, v23, 41
+; CHECK-NEXT: v_readlane_b32 s10, v23, 42
+; CHECK-NEXT: v_readlane_b32 s11, v23, 43
+; CHECK-NEXT: v_readlane_b32 s12, v23, 44
+; CHECK-NEXT: v_readlane_b32 s13, v23, 45
+; CHECK-NEXT: v_readlane_b32 s14, v23, 46
+; CHECK-NEXT: v_readlane_b32 s15, v23, 47
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s[0:15]
; CHECK-NEXT: ;;#ASMEND
-; CHECK-NEXT: ; kill: killed $vgpr23
-; CHECK-NEXT: ; kill: killed $vgpr0
; CHECK-NEXT: s_endpgm
call void asm sideeffect "", "~{v[0:7]}" () #0
call void asm sideeffect "", "~{v[8:15]}" () #0
call void asm sideeffect "", "~{v[16:19]}"() #0
call void asm sideeffect "", "~{v[20:21]}"() #0
- call void asm sideeffect "", "~{v22}"() #0
%val0 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
%val1 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
index 17a19116735e4e..14a02d4d2dcec0 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
@@ -1,19 +1,19 @@
; REQUIRES: asserts
; RUN: llc -verify-machineinstrs=0 -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=DEFAULT %s
-; RUN: llc -verify-machineinstrs=0 -sgpr-regalloc=greedy -vgpr-regalloc=greedy -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=DEFAULT %s
+; RUN: llc -verify-machineinstrs=0 -sgpr-regalloc=greedy -wwm-regalloc=greedy -vgpr-regalloc=greedy -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=DEFAULT %s
; RUN: llc -verify-machineinstrs=0 -O0 -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=O0 %s
-; RUN: llc -verify-machineinstrs=0 -vgpr-regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=DEFAULT-BASIC %s
+; RUN: llc -verify-machineinstrs=0 -wwm-regalloc=basic -vgpr-regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=DEFAULT-BASIC %s
; RUN: llc -verify-machineinstrs=0 -sgpr-regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=BASIC-DEFAULT %s
-; RUN: llc -verify-machineinstrs=0 -sgpr-regalloc=basic -vgpr-regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=BASIC-BASIC %s
+; RUN: llc -verify-machineinstrs=0 -sgpr-regalloc=basic -wwm-regalloc=basic -vgpr-regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=BASIC-BASIC %s
; RUN: not --crash llc -verify-machineinstrs=0 -regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC %s
; RUN: not --crash llc -verify-machineinstrs=0 -regalloc=fast -O0 -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC %s
-; REGALLOC: -regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc
+; REGALLOC: -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, and -vgpr-regalloc
; DEFAULT: Greedy Register Allocator
; DEFAULT-NEXT: Virtual Register Rewriter
@@ -23,6 +23,11 @@
; DEFAULT-NEXT: SI Pre-allocate WWM Registers
; DEFAULT-NEXT: Greedy Register Allocator
; DEFAULT-NEXT: SI Lower WWM Copies
+; DEFAULT-NEXT: Virtual Register Rewriter
+; DEFAULT-NEXT: AMDGPU Reserve WWM Registers
+; DEFAULT-NEXT: Virtual Register Map
+; DEFAULT-NEXT: Live Register Matrix
+; DEFAULT-NEXT: Greedy Register Allocator
; DEFAULT-NEXT: GCN NSA Reassign
; DEFAULT-NEXT: Virtual Register Rewriter
; DEFAULT-NEXT: AMDGPU Mark Last Scratch Load
@@ -37,6 +42,8 @@
; O0-NEXT: SI Pre-allocate WWM Registers
; O0-NEXT: Fast Register Allocator
; O0-NEXT: SI Lower WWM Copies
+; O0-NEXT: AMDGPU Reserve WWM Registers
+; O0-NEXT: Fast Register Allocator
; O0-NEXT: SI Fix VGPR copies
@@ -60,6 +67,11 @@
; BASIC-DEFAULT-NEXT: Machine Optimization Remark Emitter
; BASIC-DEFAULT-NEXT: Greedy Register Allocator
; BASIC-DEFAULT-NEXT: SI Lower WWM Copies
+; BASIC-DEFAULT-NEXT: Virtual Register Rewriter
+; BASIC-DEFAULT-NEXT: AMDGPU Reserve WWM Registers
+; BASIC-DEFAULT-NEXT: Virtual Register Map
+; BASIC-DEFAULT-NEXT: Live Register Matrix
+; BASIC-DEFAULT-NEXT: Greedy Register Allocator
; BASIC-DEFAULT-NEXT: GCN NSA Reassign
; BASIC-DEFAULT-NEXT: Virtual Register Rewriter
; BASIC-DEFAULT-NEXT: AMDGPU Mark Last Scratch Load
@@ -75,6 +87,11 @@
; DEFAULT-BASIC-NEXT: SI Pre-allocate WWM Registers
; DEFAULT-BASIC-NEXT: Basic Register Allocator
; DEFAULT-BASIC-NEXT: SI Lower WWM Copies
+; DEFAULT-BASIC-NEXT: Virtual Register Rewriter
+; DEFAULT-BASIC-NEXT: AMDGPU Reserve WWM Registers
+; DEFAULT-BASIC-NEXT: Virtual Register Map
+; DEFAULT-BASIC-NEXT: Live Register Matrix
+; DEFAULT-BASIC-NEXT: Basic Register Allocator
; DEFAULT-BASIC-NEXT: GCN NSA Reassign
; DEFAULT-BASIC-NEXT: Virtual Register Rewriter
; DEFAULT-BASIC-NEXT: AMDGPU Mark Last Scratch Load
@@ -96,6 +113,11 @@
; BASIC-BASIC-NEXT: SI Pre-allocate WWM Registers
; BASIC-BASIC-NEXT: Basic Register Allocator
; BASIC-BASIC-NEXT: SI Lower WWM Copies
+; BASIC-BASIC-NEXT: Virtual Register Rewriter
+; BASIC-BASIC-NEXT: AMDGPU Reserve WWM Registers
+; BASIC-BASIC-NEXT: Virtual Register Map
+; BASIC-BASIC-NEXT: Live Register Matrix
+; BASIC-BASIC-NEXT: Basic Register Allocator
; BASIC-BASIC-NEXT: GCN NSA Reassign
; BASIC-BASIC-NEXT: Virtual Register Rewriter
; BASIC-BASIC-NEXT: AMDGPU Mark Last Scratch Load
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir
index 189aead1e5646a..520717391b5968 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir
@@ -1,3 +1,4 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILL %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs --start-before=si-lower-sgpr-spills --stop-after=prologepilog -o - %s | FileCheck -check-prefix=PEI %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -passes=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILL %s
@@ -45,28 +46,25 @@ body: |
; SGPR_SPILL: bb.0:
; SGPR_SPILL-NEXT: successors: %bb.1(0x80000000)
; SGPR_SPILL-NEXT: {{ $}}
- ; SGPR_SPILL-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; SGPR_SPILL-NEXT: renamable $sgpr10 = IMPLICIT_DEF
- ; SGPR_SPILL-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[V_WRITELANE_B32_]]
+ ; SGPR_SPILL-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; SGPR_SPILL-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
; SGPR_SPILL-NEXT: DBG_VALUE $noreg, 0
; SGPR_SPILL-NEXT: {{ $}}
; SGPR_SPILL-NEXT: bb.1:
- ; SGPR_SPILL-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[V_WRITELANE_B32_]], 0
- ; SGPR_SPILL-NEXT: KILL [[V_WRITELANE_B32_]]
+ ; SGPR_SPILL-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
; SGPR_SPILL-NEXT: S_ENDPGM 0
+ ;
; PEI-LABEL: name: test
; PEI: bb.0:
; PEI-NEXT: successors: %bb.1(0x80000000)
; PEI-NEXT: {{ $}}
- ; PEI-NEXT: renamable $vgpr0 = IMPLICIT_DEF
; PEI-NEXT: renamable $sgpr10 = IMPLICIT_DEF
- ; PEI-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, killed $vgpr0
+ ; PEI-NEXT: $vgpr0 = IMPLICIT_DEF
+ ; PEI-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, killed $vgpr0
; PEI-NEXT: {{ $}}
; PEI-NEXT: bb.1:
- ; PEI-NEXT: liveins: $vgpr0
- ; PEI-NEXT: {{ $}}
- ; PEI-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
- ; PEI-NEXT: KILL killed renamable $vgpr0
+ ; PEI-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
; PEI-NEXT: S_ENDPGM 0
bb.0:
renamable $sgpr10 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll
index 29622d3fd0f1b5..5692dc1e2a2c65 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll
@@ -9,7 +9,6 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 {
call void asm sideeffect "", "~{v[8:15]}" () #0
call void asm sideeffect "", "~{v[16:19]}"() #0
call void asm sideeffect "", "~{v[20:21]}"() #0
- call void asm sideeffect "", "~{v22}"() #0
%val0 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
%val1 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0
%val2 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
index d430ba758572d6..59036c64c8afcc 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
@@ -9,19 +9,9 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %ou
; GCN: ; %bb.0:
; GCN-NEXT: s_add_u32 s0, s0, s13
; GCN-NEXT: s_addc_u32 s1, s1, 0
-; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
-; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN-NEXT: s_load_dword s4, s[6:7], 0x2
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1
-; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[24:25]
-; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1
-; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[24:25]
-; GCN-NEXT: ;;#ASMSTART
-; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: ;;#ASMSTART
@@ -31,91 +21,91 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %ou
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[8:23]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_waitcnt vmcnt(1)
-; GCN-NEXT: v_writelane_b32 v1, s8, 0
-; GCN-NEXT: v_writelane_b32 v1, s9, 1
-; GCN-NEXT: v_writelane_b32 v1, s10, 2
-; GCN-NEXT: v_writelane_b32 v1, s11, 3
-; GCN-NEXT: v_writelane_b32 v1, s12, 4
-; GCN-NEXT: v_writelane_b32 v1, s13, 5
-; GCN-NEXT: v_writelane_b32 v1, s14, 6
-; GCN-NEXT: v_writelane_b32 v1, s15, 7
-; GCN-NEXT: v_writelane_b32 v1, s16, 8
-; GCN-NEXT: v_writelane_b32 v1, s17, 9
-; GCN-NEXT: v_writelane_b32 v1, s18, 10
-; GCN-NEXT: v_writelane_b32 v1, s19, 11
-; GCN-NEXT: v_writelane_b32 v1, s20, 12
-; GCN-NEXT: v_writelane_b32 v1, s21, 13
-; GCN-NEXT: v_writelane_b32 v1, s22, 14
-; GCN-NEXT: v_writelane_b32 v1, s23, 15
+; GCN-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
+; GCN-NEXT: v_writelane_b32 v22, s8, 0
+; GCN-NEXT: v_writelane_b32 v22, s9, 1
+; GCN-NEXT: v_writelane_b32 v22, s10, 2
+; GCN-NEXT: v_writelane_b32 v22, s11, 3
+; GCN-NEXT: v_writelane_b32 v22, s12, 4
+; GCN-NEXT: v_writelane_b32 v22, s13, 5
+; GCN-NEXT: v_writelane_b32 v22, s14, 6
+; GCN-NEXT: v_writelane_b32 v22, s15, 7
+; GCN-NEXT: v_writelane_b32 v22, s16, 8
+; GCN-NEXT: v_writelane_b32 v22, s17, 9
+; GCN-NEXT: v_writelane_b32 v22, s18, 10
+; GCN-NEXT: v_writelane_b32 v22, s19, 11
+; GCN-NEXT: v_writelane_b32 v22, s20, 12
+; GCN-NEXT: v_writelane_b32 v22, s21, 13
+; GCN-NEXT: v_writelane_b32 v22, s22, 14
+; GCN-NEXT: v_writelane_b32 v22, s23, 15
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[8:23]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s8, 16
-; GCN-NEXT: v_writelane_b32 v1, s9, 17
-; GCN-NEXT: v_writelane_b32 v1, s10, 18
-; GCN-NEXT: v_writelane_b32 v1, s11, 19
-; GCN-NEXT: v_writelane_b32 v1, s12, 20
-; GCN-NEXT: v_writelane_b32 v1, s13, 21
-; GCN-NEXT: v_writelane_b32 v1, s14, 22
-; GCN-NEXT: v_writelane_b32 v1, s15, 23
-; GCN-NEXT: v_writelane_b32 v1, s16, 24
-; GCN-NEXT: v_writelane_b32 v1, s17, 25
-; GCN-NEXT: v_writelane_b32 v1, s18, 26
-; GCN-NEXT: v_writelane_b32 v1, s19, 27
-; GCN-NEXT: v_writelane_b32 v1, s20, 28
-; GCN-NEXT: v_writelane_b32 v1, s21, 29
-; GCN-NEXT: v_writelane_b32 v1, s22, 30
-; GCN-NEXT: v_writelane_b32 v1, s23, 31
+; GCN-NEXT: v_writelane_b32 v22, s8, 16
+; GCN-NEXT: v_writelane_b32 v22, s9, 17
+; GCN-NEXT: v_writelane_b32 v22, s10, 18
+; GCN-NEXT: v_writelane_b32 v22, s11, 19
+; GCN-NEXT: v_writelane_b32 v22, s12, 20
+; GCN-NEXT: v_writelane_b32 v22, s13, 21
+; GCN-NEXT: v_writelane_b32 v22, s14, 22
+; GCN-NEXT: v_writelane_b32 v22, s15, 23
+; GCN-NEXT: v_writelane_b32 v22, s16, 24
+; GCN-NEXT: v_writelane_b32 v22, s17, 25
+; GCN-NEXT: v_writelane_b32 v22, s18, 26
+; GCN-NEXT: v_writelane_b32 v22, s19, 27
+; GCN-NEXT: v_writelane_b32 v22, s20, 28
+; GCN-NEXT: v_writelane_b32 v22, s21, 29
+; GCN-NEXT: v_writelane_b32 v22, s22, 30
+; GCN-NEXT: v_writelane_b32 v22, s23, 31
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[8:23]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s8, 32
-; GCN-NEXT: v_writelane_b32 v1, s9, 33
-; GCN-NEXT: v_writelane_b32 v1, s10, 34
-; GCN-NEXT: v_writelane_b32 v1, s11, 35
-; GCN-NEXT: v_writelane_b32 v1, s12, 36
-; GCN-NEXT: v_writelane_b32 v1, s13, 37
-; GCN-NEXT: v_writelane_b32 v1, s14, 38
-; GCN-NEXT: v_writelane_b32 v1, s15, 39
-; GCN-NEXT: v_writelane_b32 v1, s16, 40
-; GCN-NEXT: v_writelane_b32 v1, s17, 41
-; GCN-NEXT: v_writelane_b32 v1, s18, 42
-; GCN-NEXT: v_writelane_b32 v1, s19, 43
-; GCN-NEXT: v_writelane_b32 v1, s20, 44
-; GCN-NEXT: v_writelane_b32 v1, s21, 45
-; GCN-NEXT: v_writelane_b32 v1, s22, 46
-; GCN-NEXT: v_writelane_b32 v1, s23, 47
+; GCN-NEXT: v_writelane_b32 v22, s8, 32
+; GCN-NEXT: v_writelane_b32 v22, s9, 33
+; GCN-NEXT: v_writelane_b32 v22, s10, 34
+; GCN-NEXT: v_writelane_b32 v22, s11, 35
+; GCN-NEXT: v_writelane_b32 v22, s12, 36
+; GCN-NEXT: v_writelane_b32 v22, s13, 37
+; GCN-NEXT: v_writelane_b32 v22, s14, 38
+; GCN-NEXT: v_writelane_b32 v22, s15, 39
+; GCN-NEXT: v_writelane_b32 v22, s16, 40
+; GCN-NEXT: v_writelane_b32 v22, s17, 41
+; GCN-NEXT: v_writelane_b32 v22, s18, 42
+; GCN-NEXT: v_writelane_b32 v22, s19, 43
+; GCN-NEXT: v_writelane_b32 v22, s20, 44
+; GCN-NEXT: v_writelane_b32 v22, s21, 45
+; GCN-NEXT: v_writelane_b32 v22, s22, 46
+; GCN-NEXT: v_writelane_b32 v22, s23, 47
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[8:23]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v1, s8, 48
-; GCN-NEXT: v_writelane_b32 v1, s9, 49
-; GCN-NEXT: v_writelane_b32 v1, s10, 50
-; GCN-NEXT: v_writelane_b32 v1, s11, 51
-; GCN-NEXT: v_writelane_b32 v1, s12, 52
-; GCN-NEXT: v_writelane_b32 v1, s13, 53
-; GCN-NEXT: v_writelane_b32 v1, s14, 54
-; GCN-NEXT: v_writelane_b32 v1, s15, 55
-; GCN-NEXT: v_writelane_b32 v1, s16, 56
-; GCN-NEXT: v_writelane_b32 v1, s17, 57
-; GCN-NEXT: v_writelane_b32 v1, s18, 58
-; GCN-NEXT: v_writelane_b32 v1, s19, 59
-; GCN-NEXT: v_writelane_b32 v1, s20, 60
-; GCN-NEXT: v_writelane_b32 v1, s21, 61
-; GCN-NEXT: v_writelane_b32 v1, s22, 62
-; GCN-NEXT: v_writelane_b32 v1, s23, 63
+; GCN-NEXT: v_writelane_b32 v22, s8, 48
+; GCN-NEXT: v_writelane_b32 v22, s9, 49
+; GCN-NEXT: v_writelane_b32 v22, s10, 50
+; GCN-NEXT: v_writelane_b32 v22, s11, 51
+; GCN-NEXT: v_writelane_b32 v22, s12, 52
+; GCN-NEXT: v_writelane_b32 v22, s13, 53
+; GCN-NEXT: v_writelane_b32 v22, s14, 54
+; GCN-NEXT: v_writelane_b32 v22, s15, 55
+; GCN-NEXT: v_writelane_b32 v22, s16, 56
+; GCN-NEXT: v_writelane_b32 v22, s17, 57
+; GCN-NEXT: v_writelane_b32 v22, s18, 58
+; GCN-NEXT: v_writelane_b32 v22, s19, 59
+; GCN-NEXT: v_writelane_b32 v22, s20, 60
+; GCN-NEXT: v_writelane_b32 v22, s21, 61
+; GCN-NEXT: v_writelane_b32 v22, s22, 62
+; GCN-NEXT: v_writelane_b32 v22, s23, 63
; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1
-; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v22, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[24:25]
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[6:7]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_waitcnt vmcnt(1)
-; GCN-NEXT: v_writelane_b32 v0, s6, 0
-; GCN-NEXT: v_writelane_b32 v0, s7, 1
+; GCN-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane
+; GCN-NEXT: v_writelane_b32 v22, s6, 0
+; GCN-NEXT: v_writelane_b32 v22, s7, 1
; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v22, off, s[0:3], 0 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[24:25]
; GCN-NEXT: s_mov_b32 s5, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -123,88 +113,88 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %ou
; GCN-NEXT: s_cbranch_scc1 .LBB0_2
; GCN-NEXT: ; %bb.1: ; %bb0
; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1
-; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v23, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[24:25]
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_readlane_b32 s4, v1, 0
-; GCN-NEXT: v_readlane_b32 s5, v1, 1
-; GCN-NEXT: v_readlane_b32 s6, v1, 2
-; GCN-NEXT: v_readlane_b32 s7, v1, 3
-; GCN-NEXT: v_readlane_b32 s8, v1, 4
-; GCN-NEXT: v_readlane_b32 s9, v1, 5
-; GCN-NEXT: v_readlane_b32 s10, v1, 6
-; GCN-NEXT: v_readlane_b32 s11, v1, 7
-; GCN-NEXT: v_readlane_b32 s12, v1, 8
-; GCN-NEXT: v_readlane_b32 s13, v1, 9
-; GCN-NEXT: v_readlane_b32 s14, v1, 10
-; GCN-NEXT: v_readlane_b32 s15, v1, 11
-; GCN-NEXT: v_readlane_b32 s16, v1, 12
-; GCN-NEXT: v_readlane_b32 s17, v1, 13
-; GCN-NEXT: v_readlane_b32 s18, v1, 14
-; GCN-NEXT: v_readlane_b32 s19, v1, 15
+; GCN-NEXT: v_readlane_b32 s4, v23, 0
+; GCN-NEXT: v_readlane_b32 s5, v23, 1
+; GCN-NEXT: v_readlane_b32 s6, v23, 2
+; GCN-NEXT: v_readlane_b32 s7, v23, 3
+; GCN-NEXT: v_readlane_b32 s8, v23, 4
+; GCN-NEXT: v_readlane_b32 s9, v23, 5
+; GCN-NEXT: v_readlane_b32 s10, v23, 6
+; GCN-NEXT: v_readlane_b32 s11, v23, 7
+; GCN-NEXT: v_readlane_b32 s12, v23, 8
+; GCN-NEXT: v_readlane_b32 s13, v23, 9
+; GCN-NEXT: v_readlane_b32 s14, v23, 10
+; GCN-NEXT: v_readlane_b32 s15, v23, 11
+; GCN-NEXT: v_readlane_b32 s16, v23, 12
+; GCN-NEXT: v_readlane_b32 s17, v23, 13
+; GCN-NEXT: v_readlane_b32 s18, v23, 14
+; GCN-NEXT: v_readlane_b32 s19, v23, 15
; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1
-; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v22, off, s[0:3], 0 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[24:25]
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[4:19]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s4, v1, 16
-; GCN-NEXT: v_readlane_b32 s5, v1, 17
-; GCN-NEXT: v_readlane_b32 s6, v1, 18
-; GCN-NEXT: v_readlane_b32 s7, v1, 19
-; GCN-NEXT: v_readlane_b32 s8, v1, 20
-; GCN-NEXT: v_readlane_b32 s9, v1, 21
-; GCN-NEXT: v_readlane_b32 s10, v1, 22
-; GCN-NEXT: v_readlane_b32 s11, v1, 23
-; GCN-NEXT: v_readlane_b32 s12, v1, 24
-; GCN-NEXT: v_readlane_b32 s13, v1, 25
-; GCN-NEXT: v_readlane_b32 s14, v1, 26
-; GCN-NEXT: v_readlane_b32 s15, v1, 27
-; GCN-NEXT: v_readlane_b32 s16, v1, 28
-; GCN-NEXT: v_readlane_b32 s17, v1, 29
-; GCN-NEXT: v_readlane_b32 s18, v1, 30
-; GCN-NEXT: v_readlane_b32 s19, v1, 31
+; GCN-NEXT: v_readlane_b32 s4, v23, 16
+; GCN-NEXT: v_readlane_b32 s5, v23, 17
+; GCN-NEXT: v_readlane_b32 s6, v23, 18
+; GCN-NEXT: v_readlane_b32 s7, v23, 19
+; GCN-NEXT: v_readlane_b32 s8, v23, 20
+; GCN-NEXT: v_readlane_b32 s9, v23, 21
+; GCN-NEXT: v_readlane_b32 s10, v23, 22
+; GCN-NEXT: v_readlane_b32 s11, v23, 23
+; GCN-NEXT: v_readlane_b32 s12, v23, 24
+; GCN-NEXT: v_readlane_b32 s13, v23, 25
+; GCN-NEXT: v_readlane_b32 s14, v23, 26
+; GCN-NEXT: v_readlane_b32 s15, v23, 27
+; GCN-NEXT: v_readlane_b32 s16, v23, 28
+; GCN-NEXT: v_readlane_b32 s17, v23, 29
+; GCN-NEXT: v_readlane_b32 s18, v23, 30
+; GCN-NEXT: v_readlane_b32 s19, v23, 31
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[4:19]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s4, v1, 32
-; GCN-NEXT: v_readlane_b32 s5, v1, 33
-; GCN-NEXT: v_readlane_b32 s6, v1, 34
-; GCN-NEXT: v_readlane_b32 s7, v1, 35
-; GCN-NEXT: v_readlane_b32 s8, v1, 36
-; GCN-NEXT: v_readlane_b32 s9, v1, 37
-; GCN-NEXT: v_readlane_b32 s10, v1, 38
-; GCN-NEXT: v_readlane_b32 s11, v1, 39
-; GCN-NEXT: v_readlane_b32 s12, v1, 40
-; GCN-NEXT: v_readlane_b32 s13, v1, 41
-; GCN-NEXT: v_readlane_b32 s14, v1, 42
-; GCN-NEXT: v_readlane_b32 s15, v1, 43
-; GCN-NEXT: v_readlane_b32 s16, v1, 44
-; GCN-NEXT: v_readlane_b32 s17, v1, 45
-; GCN-NEXT: v_readlane_b32 s18, v1, 46
-; GCN-NEXT: v_readlane_b32 s19, v1, 47
+; GCN-NEXT: v_readlane_b32 s4, v23, 32
+; GCN-NEXT: v_readlane_b32 s5, v23, 33
+; GCN-NEXT: v_readlane_b32 s6, v23, 34
+; GCN-NEXT: v_readlane_b32 s7, v23, 35
+; GCN-NEXT: v_readlane_b32 s8, v23, 36
+; GCN-NEXT: v_readlane_b32 s9, v23, 37
+; GCN-NEXT: v_readlane_b32 s10, v23, 38
+; GCN-NEXT: v_readlane_b32 s11, v23, 39
+; GCN-NEXT: v_readlane_b32 s12, v23, 40
+; GCN-NEXT: v_readlane_b32 s13, v23, 41
+; GCN-NEXT: v_readlane_b32 s14, v23, 42
+; GCN-NEXT: v_readlane_b32 s15, v23, 43
+; GCN-NEXT: v_readlane_b32 s16, v23, 44
+; GCN-NEXT: v_readlane_b32 s17, v23, 45
+; GCN-NEXT: v_readlane_b32 s18, v23, 46
+; GCN-NEXT: v_readlane_b32 s19, v23, 47
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[4:19]
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_readlane_b32 s8, v1, 48
-; GCN-NEXT: v_readlane_b32 s9, v1, 49
-; GCN-NEXT: v_readlane_b32 s10, v1, 50
-; GCN-NEXT: v_readlane_b32 s11, v1, 51
-; GCN-NEXT: v_readlane_b32 s12, v1, 52
-; GCN-NEXT: v_readlane_b32 s13, v1, 53
-; GCN-NEXT: v_readlane_b32 s14, v1, 54
-; GCN-NEXT: v_readlane_b32 s15, v1, 55
-; GCN-NEXT: v_readlane_b32 s16, v1, 56
-; GCN-NEXT: v_readlane_b32 s17, v1, 57
-; GCN-NEXT: v_readlane_b32 s18, v1, 58
-; GCN-NEXT: v_readlane_b32 s19, v1, 59
-; GCN-NEXT: v_readlane_b32 s20, v1, 60
-; GCN-NEXT: v_readlane_b32 s21, v1, 61
-; GCN-NEXT: v_readlane_b32 s22, v1, 62
-; GCN-NEXT: v_readlane_b32 s23, v1, 63
+; GCN-NEXT: v_readlane_b32 s8, v23, 48
+; GCN-NEXT: v_readlane_b32 s9, v23, 49
+; GCN-NEXT: v_readlane_b32 s10, v23, 50
+; GCN-NEXT: v_readlane_b32 s11, v23, 51
+; GCN-NEXT: v_readlane_b32 s12, v23, 52
+; GCN-NEXT: v_readlane_b32 s13, v23, 53
+; GCN-NEXT: v_readlane_b32 s14, v23, 54
+; GCN-NEXT: v_readlane_b32 s15, v23, 55
+; GCN-NEXT: v_readlane_b32 s16, v23, 56
+; GCN-NEXT: v_readlane_b32 s17, v23, 57
+; GCN-NEXT: v_readlane_b32 s18, v23, 58
+; GCN-NEXT: v_readlane_b32 s19, v23, 59
+; GCN-NEXT: v_readlane_b32 s20, v23, 60
+; GCN-NEXT: v_readlane_b32 s21, v23, 61
+; GCN-NEXT: v_readlane_b32 s22, v23, 62
+; GCN-NEXT: v_readlane_b32 s23, v23, 63
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_readlane_b32 s4, v0, 0
-; GCN-NEXT: v_readlane_b32 s5, v0, 1
+; GCN-NEXT: v_readlane_b32 s4, v22, 0
+; GCN-NEXT: v_readlane_b32 s5, v22, 1
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[8:23]
; GCN-NEXT: ;;#ASMEND
@@ -212,20 +202,11 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %ou
; GCN-NEXT: ; use s[4:5]
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: .LBB0_2: ; %ret
-; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1
-; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[24:25]
-; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1
-; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[24:25]
-; GCN-NEXT: ; kill: killed $vgpr1
-; GCN-NEXT: ; kill: killed $vgpr0
; GCN-NEXT: s_endpgm
call void asm sideeffect "", "~{v[0:7]}" () #0
call void asm sideeffect "", "~{v[8:15]}" () #0
call void asm sideeffect "", "~{v[16:19]}"() #0
call void asm sideeffect "", "~{v[20:21]}"() #0
- call void asm sideeffect "", "~{v22}"() #0
%wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
%wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
index b0fb24e60bead0..bb0a707a7c90b7 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
@@ -28,181 +28,180 @@ body: |
; GCN-LABEL: name: test_main
; GCN: bb.0:
; GCN-NEXT: successors: %bb.1(0x80000000)
- ; GCN-NEXT: liveins: $vcc_hi, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GCN-NEXT: liveins: $vcc_hi, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: $vcc_hi = frame-setup COPY $sgpr33
; GCN-NEXT: $sgpr33 = frame-setup COPY $sgpr32
; GCN-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5)
- ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr4, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5)
- ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5)
- ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5)
- ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5)
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5)
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5)
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5)
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5)
+ ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5)
; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0
; GCN-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24, implicit-def dead $scc
- ; GCN-NEXT: renamable $vgpr2 = IMPLICIT_DEF
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr12, 8, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr13, 9, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr14, 10, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr15, 11, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr16, 12, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr17, 13, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr18, 14, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr19, 15, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr20, 16, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr21, 17, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr22, 18, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr23, 19, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr24, 20, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr25, 21, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr26, 22, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr27, 23, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr28, 24, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr29, 25, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr30, 26, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr31, 27, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr64, 28, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr65, 29, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr66, 30, $vgpr3
- ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr67, 31, $vgpr3
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr68, 0, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr69, 1, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr70, 2, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr71, 3, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr72, 4, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr73, 5, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr74, 6, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr75, 7, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr76, 8, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr77, 9, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr78, 10, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr79, 11, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr80, 12, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr81, 13, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr82, 14, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr83, 15, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr84, 16, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr85, 17, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr86, 18, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr87, 19, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr88, 20, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr89, 21, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr90, 22, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr91, 23, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr92, 24, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr93, 25, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr94, 26, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr95, 27, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr96, 28, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr97, 29, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr98, 30, $vgpr4
- ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr99, 31, $vgpr4
- ; GCN-NEXT: $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr100, 0, $vgpr5
- ; GCN-NEXT: $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr101, 1, $vgpr5
- ; GCN-NEXT: $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr102, 2, $vgpr5
- ; GCN-NEXT: $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr103, 3, $vgpr5
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr12, 8, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr13, 9, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr14, 10, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr15, 11, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr16, 12, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr17, 13, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr18, 14, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr19, 15, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr20, 16, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr21, 17, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr22, 18, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr23, 19, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr24, 20, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr25, 21, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr26, 22, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr27, 23, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr28, 24, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr29, 25, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr30, 26, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr31, 27, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr64, 28, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr65, 29, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr66, 30, $vgpr2
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr67, 31, $vgpr2
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr68, 0, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr69, 1, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr70, 2, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr71, 3, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr72, 4, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr73, 5, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr74, 6, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr75, 7, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr76, 8, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr77, 9, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr78, 10, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr79, 11, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr80, 12, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr81, 13, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr82, 14, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr83, 15, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr84, 16, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr85, 17, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr86, 18, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr87, 19, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr88, 20, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr89, 21, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr90, 22, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr91, 23, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr92, 24, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr93, 25, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr94, 26, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr95, 27, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr96, 28, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr97, 29, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr98, 30, $vgpr3
+ ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr99, 31, $vgpr3
+ ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr100, 0, $vgpr4
+ ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr101, 1, $vgpr4
+ ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr102, 2, $vgpr4
+ ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr103, 3, $vgpr4
; GCN-NEXT: $sgpr22 = IMPLICIT_DEF
- ; GCN-NEXT: renamable $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr22, 0, killed $vgpr2
+ ; GCN-NEXT: $vgpr5 = IMPLICIT_DEF
+ ; GCN-NEXT: $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr22, 0, killed $vgpr5
; GCN-NEXT: dead $vgpr1 = V_SET_INACTIVE_B32 0, $vgpr0, 0, 0, $sgpr_null, implicit $exec, implicit-def $scc
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.2(0x80000000)
- ; GCN-NEXT: liveins: $vcc_hi, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GCN-NEXT: liveins: $vcc_hi
; GCN-NEXT: {{ $}}
; GCN-NEXT: KILL implicit-def $vcc_lo, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.2:
; GCN-NEXT: successors: %bb.3(0x80000000)
- ; GCN-NEXT: liveins: $vcc_hi, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GCN-NEXT: liveins: $vcc_hi
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
+ ; GCN-NEXT: $sgpr22 = SI_RESTORE_S32_FROM_VGPR killed $vgpr5, 0
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.3:
- ; GCN-NEXT: liveins: $vcc_hi, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GCN-NEXT: liveins: $vcc_hi
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $sgpr103 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 3
- ; GCN-NEXT: $sgpr102 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 2
- ; GCN-NEXT: $sgpr101 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 1
- ; GCN-NEXT: $sgpr100 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 0
- ; GCN-NEXT: $sgpr99 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 31
- ; GCN-NEXT: $sgpr98 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 30
- ; GCN-NEXT: $sgpr97 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 29
- ; GCN-NEXT: $sgpr96 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 28
- ; GCN-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 27
- ; GCN-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 26
- ; GCN-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 25
- ; GCN-NEXT: $sgpr92 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 24
- ; GCN-NEXT: $sgpr91 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 23
- ; GCN-NEXT: $sgpr90 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 22
- ; GCN-NEXT: $sgpr89 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 21
- ; GCN-NEXT: $sgpr88 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 20
- ; GCN-NEXT: $sgpr87 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 19
- ; GCN-NEXT: $sgpr86 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 18
- ; GCN-NEXT: $sgpr85 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 17
- ; GCN-NEXT: $sgpr84 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 16
- ; GCN-NEXT: $sgpr83 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 15
- ; GCN-NEXT: $sgpr82 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 14
- ; GCN-NEXT: $sgpr81 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 13
- ; GCN-NEXT: $sgpr80 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 12
- ; GCN-NEXT: $sgpr79 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 11
- ; GCN-NEXT: $sgpr78 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 10
- ; GCN-NEXT: $sgpr77 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 9
- ; GCN-NEXT: $sgpr76 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 8
- ; GCN-NEXT: $sgpr75 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 7
- ; GCN-NEXT: $sgpr74 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 6
- ; GCN-NEXT: $sgpr73 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 5
- ; GCN-NEXT: $sgpr72 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 4
- ; GCN-NEXT: $sgpr71 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 3
- ; GCN-NEXT: $sgpr70 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 2
- ; GCN-NEXT: $sgpr69 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 1
- ; GCN-NEXT: $sgpr68 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 0
- ; GCN-NEXT: $sgpr67 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 31
- ; GCN-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 30
- ; GCN-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 29
- ; GCN-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 28
- ; GCN-NEXT: $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 27
- ; GCN-NEXT: $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 26
- ; GCN-NEXT: $sgpr29 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 25
- ; GCN-NEXT: $sgpr28 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 24
- ; GCN-NEXT: $sgpr27 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 23
- ; GCN-NEXT: $sgpr26 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 22
- ; GCN-NEXT: $sgpr25 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 21
- ; GCN-NEXT: $sgpr24 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 20
- ; GCN-NEXT: $sgpr23 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 19
- ; GCN-NEXT: $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 18
- ; GCN-NEXT: $sgpr21 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 17
- ; GCN-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 16
- ; GCN-NEXT: $sgpr19 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 15
- ; GCN-NEXT: $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 14
- ; GCN-NEXT: $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 13
- ; GCN-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 12
- ; GCN-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 11
- ; GCN-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 10
- ; GCN-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 9
- ; GCN-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 8
- ; GCN-NEXT: $sgpr11 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 7
- ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 6
- ; GCN-NEXT: $sgpr9 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 5
- ; GCN-NEXT: $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 4
- ; GCN-NEXT: $sgpr7 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 3
- ; GCN-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 2
- ; GCN-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 1
- ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 0
- ; GCN-NEXT: KILL killed renamable $vgpr2
+ ; GCN-NEXT: $sgpr103 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 3
+ ; GCN-NEXT: $sgpr102 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 2
+ ; GCN-NEXT: $sgpr101 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 1
+ ; GCN-NEXT: $sgpr100 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 0
+ ; GCN-NEXT: $sgpr99 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 31
+ ; GCN-NEXT: $sgpr98 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 30
+ ; GCN-NEXT: $sgpr97 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 29
+ ; GCN-NEXT: $sgpr96 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 28
+ ; GCN-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 27
+ ; GCN-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 26
+ ; GCN-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 25
+ ; GCN-NEXT: $sgpr92 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 24
+ ; GCN-NEXT: $sgpr91 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 23
+ ; GCN-NEXT: $sgpr90 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 22
+ ; GCN-NEXT: $sgpr89 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 21
+ ; GCN-NEXT: $sgpr88 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 20
+ ; GCN-NEXT: $sgpr87 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 19
+ ; GCN-NEXT: $sgpr86 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 18
+ ; GCN-NEXT: $sgpr85 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 17
+ ; GCN-NEXT: $sgpr84 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 16
+ ; GCN-NEXT: $sgpr83 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 15
+ ; GCN-NEXT: $sgpr82 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 14
+ ; GCN-NEXT: $sgpr81 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 13
+ ; GCN-NEXT: $sgpr80 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 12
+ ; GCN-NEXT: $sgpr79 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 11
+ ; GCN-NEXT: $sgpr78 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 10
+ ; GCN-NEXT: $sgpr77 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 9
+ ; GCN-NEXT: $sgpr76 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 8
+ ; GCN-NEXT: $sgpr75 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 7
+ ; GCN-NEXT: $sgpr74 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 6
+ ; GCN-NEXT: $sgpr73 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 5
+ ; GCN-NEXT: $sgpr72 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 4
+ ; GCN-NEXT: $sgpr71 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 3
+ ; GCN-NEXT: $sgpr70 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 2
+ ; GCN-NEXT: $sgpr69 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 1
+ ; GCN-NEXT: $sgpr68 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 0
+ ; GCN-NEXT: $sgpr67 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 31
+ ; GCN-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 30
+ ; GCN-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 29
+ ; GCN-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 28
+ ; GCN-NEXT: $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 27
+ ; GCN-NEXT: $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 26
+ ; GCN-NEXT: $sgpr29 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 25
+ ; GCN-NEXT: $sgpr28 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 24
+ ; GCN-NEXT: $sgpr27 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 23
+ ; GCN-NEXT: $sgpr26 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 22
+ ; GCN-NEXT: $sgpr25 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 21
+ ; GCN-NEXT: $sgpr24 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 20
+ ; GCN-NEXT: $sgpr23 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 19
+ ; GCN-NEXT: $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 18
+ ; GCN-NEXT: $sgpr21 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 17
+ ; GCN-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 16
+ ; GCN-NEXT: $sgpr19 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 15
+ ; GCN-NEXT: $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 14
+ ; GCN-NEXT: $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 13
+ ; GCN-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 12
+ ; GCN-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 11
+ ; GCN-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 10
+ ; GCN-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 9
+ ; GCN-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 8
+ ; GCN-NEXT: $sgpr11 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 7
+ ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 6
+ ; GCN-NEXT: $sgpr9 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 5
+ ; GCN-NEXT: $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 4
+ ; GCN-NEXT: $sgpr7 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 3
+ ; GCN-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 2
+ ; GCN-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1
+ ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
; GCN-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5)
- ; GCN-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5)
- ; GCN-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5)
- ; GCN-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5)
- ; GCN-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.73, addrspace 5)
+ ; GCN-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5)
+ ; GCN-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5)
+ ; GCN-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5)
+ ; GCN-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5)
+ ; GCN-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.73, addrspace 5)
; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0
; GCN-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24, implicit-def dead $scc
; GCN-NEXT: $sgpr33 = frame-destroy COPY $vcc_hi
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
index 2c4b7a22facf43..59c4b715dd12e5 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
@@ -218,7 +218,7 @@ body: |
; VMEM-GFX8-LABEL: name: sgpr32_save_clobber_scc_emergency_stack_slot
; VMEM-GFX8: bb.0:
; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; VMEM-GFX8-NEXT: liveins: $sgpr8, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
@@ -235,7 +235,7 @@ body: |
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: bb.1:
; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000)
- ; VMEM-GFX8-NEXT: liveins: $sgpr8, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_NOP 0
; VMEM-GFX8-NEXT: {{ $}}
@@ -271,7 +271,7 @@ body: |
; VMEM-GFX8-LABEL: name: sgpr32_restore_clobber_scc_emergency_stack_slot
; VMEM-GFX8: bb.0:
; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
@@ -288,7 +288,7 @@ body: |
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: bb.1:
; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000)
- ; VMEM-GFX8-NEXT: liveins: $sgpr8, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_NOP 0
; VMEM-GFX8-NEXT: {{ $}}
@@ -327,7 +327,7 @@ body: |
; VMEM-GFX8-LABEL: name: sgpr64_save_clobber_scc_emergency_stack_slot
; VMEM-GFX8: bb.0:
; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; VMEM-GFX8-NEXT: liveins: $sgpr8_sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
@@ -345,7 +345,7 @@ body: |
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: bb.1:
; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000)
- ; VMEM-GFX8-NEXT: liveins: $sgpr8_sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_NOP 0
; VMEM-GFX8-NEXT: {{ $}}
@@ -381,7 +381,7 @@ body: |
; VMEM-GFX8-LABEL: name: sgpr64_restore_clobber_scc_emergency_stack_slot
; VMEM-GFX8: bb.0:
; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
@@ -399,7 +399,7 @@ body: |
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: bb.1:
; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000)
- ; VMEM-GFX8-NEXT: liveins: $sgpr8_sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_NOP 0
; VMEM-GFX8-NEXT: {{ $}}
@@ -441,7 +441,7 @@ body: |
; VMEM-GFX8-LABEL: name: sgpr32_save_clobber_scc_emergency_stack_slot_x2
; VMEM-GFX8: bb.0:
; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; VMEM-GFX8-NEXT: liveins: $sgpr8, $sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
@@ -468,7 +468,7 @@ body: |
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: bb.1:
; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000)
- ; VMEM-GFX8-NEXT: liveins: $sgpr8, $sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_NOP 0
; VMEM-GFX8-NEXT: {{ $}}
@@ -505,7 +505,7 @@ body: |
; VMEM-GFX8-LABEL: name: sgpr32_restore_clobber_scc_emergency_stack_slot_x2
; VMEM-GFX8: bb.0:
; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
@@ -532,7 +532,7 @@ body: |
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: bb.1:
; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000)
- ; VMEM-GFX8-NEXT: liveins: $sgpr8, $sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_NOP 0
; VMEM-GFX8-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spills-empty-prolog-block.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spills-empty-prolog-block.mir
index 0c6c0069911f0a..bed7c0c12b7cbb 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spills-empty-prolog-block.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-empty-prolog-block.mir
@@ -18,11 +18,9 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: $sgpr0 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
- ; CHECK-NEXT: KILL [[DEF]]
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $sgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
index b2f5b6aa7fe36a..ff2202f1e177b8 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
@@ -19,7 +19,7 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
; GCN-NEXT: s_mov_b32 s18, s33
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
-; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s33 offset:452 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[16:17]
; GCN-NEXT: s_add_i32 s32, s32, 0x7400
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill
@@ -135,13 +135,13 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 ; 4-byte Folded Spill
; GCN-NEXT: v_writelane_b32 v255, s30, 0
; GCN-NEXT: v_writelane_b32 v255, s31, 1
-; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:452 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:444
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:452 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload
; GCN-NEXT: s_getpc_b64 s[16:17]
; GCN-NEXT: s_add_u32 s16, s16, child_function at gotpcrel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, child_function at gotpcrel32@hi+12
@@ -266,7 +266,7 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT: buffer_load_dword v255, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v255, off, s[0:3], s33 offset:452 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_add_i32 s32, s32, 0xffff8c00
; GCN-NEXT: s_mov_b32 s33, s18
@@ -313,7 +313,7 @@ define void @spill_to_lowest_available_vgpr() #0 {
; GCN-NEXT: s_mov_b32 s18, s33
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
-; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[16:17]
; GCN-NEXT: s_add_i32 s32, s32, 0x7400
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill
@@ -428,13 +428,13 @@ define void @spill_to_lowest_available_vgpr() #0 {
; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s33 ; 4-byte Folded Spill
; GCN-NEXT: v_writelane_b32 v254, s30, 0
; GCN-NEXT: v_writelane_b32 v254, s31, 1
-; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:440
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload
; GCN-NEXT: s_getpc_b64 s[16:17]
; GCN-NEXT: s_add_u32 s16, s16, child_function at gotpcrel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, child_function at gotpcrel32@hi+12
@@ -558,7 +558,7 @@ define void @spill_to_lowest_available_vgpr() #0 {
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_add_i32 s32, s32, 0xffff8c00
; GCN-NEXT: s_mov_b32 s33, s18
@@ -602,8 +602,8 @@ define void @spill_sgpr_with_sgpr_uses() #0 {
; GCN-LABEL: spill_sgpr_with_sgpr_uses:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill
+; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill
@@ -715,38 +715,30 @@ define void @spill_sgpr_with_sgpr_uses() #0 {
; GCN-NEXT: buffer_store_dword v251, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v252, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s32 ; 4-byte Folded Spill
-; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:440
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_or_saveexec_b64 s[8:9], -1
-; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[8:9]
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s4
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_writelane_b32 v0, s4, 0
+; GCN-NEXT: ; implicit-def: $vgpr254 : SGPR spill to VGPR lane
+; GCN-NEXT: v_writelane_b32 v254, s4, 0
; GCN-NEXT: s_or_saveexec_b64 s[8:9], -1
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[8:9]
; GCN-NEXT: s_cbranch_scc1 .LBB3_2
; GCN-NEXT: ; %bb.1: ; %bb0
; GCN-NEXT: s_or_saveexec_b64 s[8:9], -1
-; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[8:9]
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_readlane_b32 s4, v0, 0
+; GCN-NEXT: v_readlane_b32 s4, v254, 0
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s4
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: .LBB3_2: ; %ret
-; GCN-NEXT: s_or_saveexec_b64 s[8:9], -1
-; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, s[8:9]
-; GCN-NEXT: ; kill: killed $vgpr0
; GCN-NEXT: buffer_load_dword v253, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v252, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v251, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
@@ -857,8 +849,8 @@ define void @spill_sgpr_with_sgpr_uses() #0 {
; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload
-; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload
+; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -1183,7 +1175,7 @@ define void @spill_sgpr_no_free_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill
@@ -1315,16 +1307,16 @@ define void @spill_sgpr_no_free_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in
; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
; GCN-NEXT: flat_load_dwordx4 v[6:9], v[2:3]
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload
-; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload
-; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload
-; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: s_waitcnt vmcnt(0)
@@ -1446,7 +1438,7 @@ define void @spill_sgpr_no_free_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
index 887e9c4b5dc5ed..0e6d9ce4a7f31b 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir
@@ -25,9 +25,9 @@ body: |
; SGPR_SPILLED-LABEL: name: stack-slot-share-equal-sized-spills
; SGPR_SPILLED: liveins: $sgpr30, $sgpr31, $vgpr62
; SGPR_SPILLED-NEXT: {{ $}}
- ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr62
; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]]
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr0, 1, [[DEF]], implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr1, 2, [[DEF]], implicit $sgpr0_sgpr1
@@ -91,9 +91,9 @@ body: |
; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-large-spill-first
; SGPR_SPILLED: liveins: $sgpr30, $sgpr31, $vgpr62
; SGPR_SPILLED-NEXT: {{ $}}
- ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr62
; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]]
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 1, [[DEF]], implicit-def $sgpr2_sgpr3, implicit $sgpr2_sgpr3
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 2, [[DEF]], implicit $sgpr2_sgpr3
@@ -155,9 +155,9 @@ body: |
; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-small-spill-first
; SGPR_SPILLED: liveins: $sgpr30, $sgpr31, $vgpr62
; SGPR_SPILLED-NEXT: {{ $}}
- ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr62
; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62
+ ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]]
; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr0, 1, [[DEF]]
; SGPR_SPILLED-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll b/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
deleted file mode 100644
index c5a5a5209f54fc..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
+++ /dev/null
@@ -1,73 +0,0 @@
-; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SGPR %s
-
-; Make sure this doesn't crash.
-; ALL-LABEL: {{^}}test:
-; ALL: s_mov_b32 s[[LO:[0-9]+]], SCRATCH_RSRC_DWORD0
-; ALL: s_mov_b32 s[[HI:[0-9]+]], 0xe80000
-
-; Make sure we are handling hazards correctly.
-; SGPR: v_mov_b32_e32 v0, vcc_lo
-; SGPR-NEXT: s_or_saveexec_b64 [[EXEC_COPY:s\[[0-9]+:[0-9]+\]]], -1
-; SGPR-NEXT: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 ; 4-byte Folded Reload
-; SGPR-NEXT: s_mov_b64 exec, [[EXEC_COPY]]
-; SGPR-NEXT: s_waitcnt vmcnt(0)
-; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 0
-; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 1
-; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 2
-; SGPR-NEXT: v_readlane_b32 s[[HI:[0-9]+]], [[VHI]], 3
-; SGPR-NEXT: s_or_saveexec_b64 s[100:101], -1
-; SGPR-NEXT: s_mov_b64 exec, s[100:101]
-; SGPR-NEXT: s_nop 2
-; SGPR-NEXT: buffer_store_dword v0, off, s[{{[0-9]+}}:[[HI]]], 0
-; SGPR-NEXT: ; kill: killed $vgpr1
-
-; ALL: s_endpgm
-define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) {
- call void asm sideeffect "", "~{s[0:7]}" ()
- call void asm sideeffect "", "~{s[8:15]}" ()
- call void asm sideeffect "", "~{s[16:23]}" ()
- call void asm sideeffect "", "~{s[24:31]}" ()
- call void asm sideeffect "", "~{s[32:39]}" ()
- call void asm sideeffect "", "~{s[40:47]}" ()
- call void asm sideeffect "", "~{s[48:55]}" ()
- call void asm sideeffect "", "~{s[56:63]}" ()
- call void asm sideeffect "", "~{s[64:71]}" ()
- call void asm sideeffect "", "~{s[72:79]}" ()
- call void asm sideeffect "", "~{s[80:87]}" ()
- call void asm sideeffect "", "~{s[88:95]}" ()
- call void asm sideeffect "", "~{v[0:7]}" ()
- call void asm sideeffect "", "~{v[8:15]}" ()
- call void asm sideeffect "", "~{v[16:23]}" ()
- call void asm sideeffect "", "~{v[24:31]}" ()
- call void asm sideeffect "", "~{v[32:39]}" ()
- call void asm sideeffect "", "~{v[40:47]}" ()
- call void asm sideeffect "", "~{v[48:55]}" ()
- call void asm sideeffect "", "~{v[56:63]}" ()
- call void asm sideeffect "", "~{v[64:71]}" ()
- call void asm sideeffect "", "~{v[72:79]}" ()
- call void asm sideeffect "", "~{v[80:87]}" ()
- call void asm sideeffect "", "~{v[88:95]}" ()
- call void asm sideeffect "", "~{v[96:103]}" ()
- call void asm sideeffect "", "~{v[104:111]}" ()
- call void asm sideeffect "", "~{v[112:119]}" ()
- call void asm sideeffect "", "~{v[120:127]}" ()
- call void asm sideeffect "", "~{v[128:135]}" ()
- call void asm sideeffect "", "~{v[136:143]}" ()
- call void asm sideeffect "", "~{v[144:151]}" ()
- call void asm sideeffect "", "~{v[152:159]}" ()
- call void asm sideeffect "", "~{v[160:167]}" ()
- call void asm sideeffect "", "~{v[168:175]}" ()
- call void asm sideeffect "", "~{v[176:183]}" ()
- call void asm sideeffect "", "~{v[184:191]}" ()
- call void asm sideeffect "", "~{v[192:199]}" ()
- call void asm sideeffect "", "~{v[200:207]}" ()
- call void asm sideeffect "", "~{v[208:215]}" ()
- call void asm sideeffect "", "~{v[216:223]}" ()
- call void asm sideeffect "", "~{v[224:231]}" ()
- call void asm sideeffect "", "~{v[232:239]}" ()
- call void asm sideeffect "", "~{v[240:247]}" ()
- call void asm sideeffect "", "~{v[248:255]}" ()
-
- store i32 %in, ptr addrspace(1) %out
- ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir
index f8ec6bb5d943f7..080bd052a7391e 100644
--- a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir
+++ b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir
@@ -35,7 +35,7 @@ body: |
; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $sgpr34_sgpr35 = IMPLICIT_DEF
- ; CHECK-NEXT: dead renamable $vgpr0 = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: renamable $sgpr41 = IMPLICIT_DEF
; CHECK-NEXT: renamable $sgpr38_sgpr39 = COPY undef $sgpr8_sgpr9
; CHECK-NEXT: renamable $sgpr36_sgpr37 = IMPLICIT_DEF
@@ -79,9 +79,9 @@ body: |
; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000000F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr38_sgpr39, 40, 0 :: (dereferenceable invariant load (s64), addrspace 4)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef renamable $vgpr0, undef renamable $vgpr0, killed renamable $sgpr6_sgpr7, 0, 0, implicit $exec :: (store (s32), addrspace 1)
- ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef renamable $vgpr0, undef renamable $vgpr0, renamable $sgpr50_sgpr51, 0, 0, implicit $exec :: (store (s32), addrspace 1)
- ; CHECK-NEXT: dead renamable $vgpr0 = COPY killed renamable $sgpr49
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], killed renamable $sgpr6_sgpr7, 0, 0, implicit $exec :: (store (s32), addrspace 1)
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], renamable $sgpr50_sgpr51, 0, 0, implicit $exec :: (store (s32), addrspace 1)
+ ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr49
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: $sgpr6_sgpr7 = COPY killed renamable $sgpr36_sgpr37
; CHECK-NEXT: $sgpr10_sgpr11 = COPY killed renamable $sgpr34_sgpr35
diff --git a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
index 34bcc3f02ac66d..03988c3994992c 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
@@ -2,23 +2,20 @@
; GCN-LABEL: {{^}}spill_csr_s5_copy:
; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
-; GCN: s_xor_saveexec_b64
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
-; GCN-NEXT: s_mov_b64 exec, -1
+; GCN: s_or_saveexec_b64
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec
-; GCN: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 4
+; GCN: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 2
; GCN: s_swappc_b64
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 9
; GCN: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
-; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 4
-; GCN: s_xor_saveexec_b64
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
-; GCN-NEXT: s_mov_b64 exec, -1
+; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 2
+; GCN: s_or_saveexec_b64
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GCN: s_mov_b64 exec
; GCN: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
; GCN: s_setpc_b64
diff --git a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
index e54e5898f8b538..40be0c6b67ee98 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir
@@ -22,14 +22,11 @@ body: |
; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_64 = V_MOV_B32_e32 1786773504, implicit $exec
; CHECK-NEXT: dead [[V_MUL_F32_e32_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e32 0, [[V_MOV_B32_e32_]].sub1, implicit $mode, implicit $exec
; CHECK-NEXT: undef [[V_MAC_F32_e32_:%[0-9]+]].sub1:vreg_64 = V_MAC_F32_e32 0, undef %1:vgpr_32, undef [[V_MAC_F32_e32_]].sub1, implicit $mode, implicit $exec
- ; CHECK-NEXT: SI_SPILL_V64_SAVE [[V_MAC_F32_e32_]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: undef [[V_MOV_B32_e32_1:%[0-9]+]].sub1:vreg_64 = V_MOV_B32_e32 1786773504, implicit $exec
- ; CHECK-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]].sub1
- ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
- ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V64_RESTORE]].sub1
- ; CHECK-NEXT: S_NOP 0, implicit undef %9.sub0:vreg_64
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]].sub1
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_MAC_F32_e32_]].sub1
+ ; CHECK-NEXT: S_NOP 0, implicit undef [[V_MAC_F32_e32_]].sub0
bb.0:
successors: %bb.1
@@ -59,13 +56,13 @@ body: |
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub2:vreg_128 = V_MOV_B32_e32 1786773504, implicit $exec
+ ; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub2:vreg_128 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: undef [[V_MOV_B32_e32_1:%[0-9]+]].sub2:vreg_128 = V_MOV_B32_e32 1786773504, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]].sub2
- ; CHECK-NEXT: S_NOP 0, implicit undef %4.sub0:vreg_128
- ; CHECK-NEXT: undef [[V_MOV_B32_e32_1:%[0-9]+]].sub2:vreg_128 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]].sub2
+ ; CHECK-NEXT: S_NOP 0, implicit undef [[V_MOV_B32_e32_]].sub0
+ ; CHECK-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]].sub2
bb.0:
successors: %bb.1
diff --git a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
index 05e1082de44783..f4edafd9443ab9 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
@@ -25,14 +25,13 @@ body: |
; GCN-NEXT: $sgpr8_sgpr9 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9
- ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 1, killed $vgpr0
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr2, 2, killed $vgpr0
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
+ ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 1, killed $vgpr0
+ ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr2, 2, killed $vgpr0
+ ; GCN-NEXT: dead $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: renamable $sgpr8 = COPY renamable $sgpr1
- ; GCN-NEXT: KILL killed renamable $vgpr0
; GCN-NEXT: $sgpr0_sgpr1 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
@@ -64,13 +63,12 @@ body: |
; GCN-NEXT: $sgpr8_sgpr9 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9
- ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 1, killed $vgpr0
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr2, 2, killed $vgpr0
- ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
- ; GCN-NEXT: KILL killed renamable $vgpr0
+ ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
+ ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 1, killed $vgpr0
+ ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr2, 2, killed $vgpr0
+ ; GCN-NEXT: dead $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
index 11babc82e919b4..dff2bd7f7aef9e 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
@@ -21,8 +21,8 @@ body: |
; GCN-LABEL: name: sgpr32_spill
; GCN: liveins: $sgpr30_sgpr31, $sgpr10
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: S_NOP 0
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
; GCN-NEXT: S_SETPC_B64 $sgpr30_sgpr31
@@ -55,7 +55,6 @@ body: |
; GCN-LABEL: name: sgpr_spill_lane_crossover
; GCN: liveins: $sgpr10, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $vgpr63, $sgpr30_sgpr31, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr64, 0, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr65, 1, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr66, 2, $vgpr63
@@ -89,6 +88,7 @@ body: |
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr94, 30, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr95, 31, $vgpr63
; GCN-NEXT: S_NOP 0
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr64, 1, [[DEF]], implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr65, 2, [[DEF]]
@@ -187,9 +187,9 @@ body: |
; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; GCN-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: S_NOP 0
; GCN-NEXT: S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
@@ -256,7 +256,6 @@ body: |
; GCN-NEXT: successors: %bb.3(0x80000000)
; GCN-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: S_NOP 0
; GCN-NEXT: S_BRANCH %bb.3
; GCN-NEXT: {{ $}}
@@ -264,7 +263,7 @@ body: |
; GCN-NEXT: successors: %bb.2(0x80000000)
; GCN-NEXT: liveins: $sgpr10, $sgpr30_sgpr31
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+ ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR %0, 0
; GCN-NEXT: $sgpr10 = S_ADD_I32 $sgpr10, 15, implicit-def dead $scc
; GCN-NEXT: S_BRANCH %bb.2
; GCN-NEXT: {{ $}}
@@ -272,7 +271,7 @@ body: |
; GCN-NEXT: successors: %bb.3(0x80000000)
; GCN-NEXT: liveins: $sgpr10, $sgpr30_sgpr31
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+ ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR %0, 0
; GCN-NEXT: $sgpr10 = S_ADD_I32 $sgpr10, 20, implicit-def dead $scc
; GCN-NEXT: S_BRANCH %bb.3
; GCN-NEXT: {{ $}}
@@ -281,6 +280,7 @@ body: |
; GCN-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr10 = S_MOV_B32 10
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
; GCN-NEXT: S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir
index 2caaabde38e9d9..9b0f52cb39b018 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir
@@ -19,12 +19,8 @@ body: |
bb.0:
liveins: $sgpr30_sgpr31, $vgpr0
; GCN-LABEL: name: shift_back_exec_copy_reserved_reg
- ; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+ ; GCN: liveins: $sgpr30_sgpr31, $vgpr0
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, killed $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, killed $vgpr0
; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
@@ -60,12 +56,10 @@ body: |
bb.0:
liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr30_sgpr31, $vcc, $vgpr0
; GCN-LABEL: name: spill_exec_copy_reserved_reg
- ; GCN: liveins: $vcc, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31
+ ; GCN: liveins: $vcc, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $vgpr0, $vgpr2, $sgpr30_sgpr31
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr28_sgpr29 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr28_sgpr29
; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 0, undef $vgpr2
; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr35, 1, undef $vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
index b4a981f1db4ec7..882356d994fc68 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
@@ -12,12 +12,10 @@ define amdgpu_kernel void @test_spill_av_class(<4 x i32> %arg) #0 {
; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
; GCN-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2228234 /* regdef:VGPR_32 */, def undef %30.sub0
- ; GCN-NEXT: SI_SPILL_V64_SAVE %30, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
+ ; GCN-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2228234 /* regdef:VGPR_32 */, def undef %12.sub0
; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]]
; GCN-NEXT: GLOBAL_STORE_DWORDX4 undef %22:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1)
- ; GCN-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
- ; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3538953 /* reguse:VReg_64 */, [[SI_SPILL_V64_RESTORE]]
+ ; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3538953 /* reguse:VReg_64 */, %12
; GCN-NEXT: S_ENDPGM 0
%v0 = call i32 asm sideeffect "; def $0", "=v"()
%tmp = insertelement <2 x i32> undef, i32 %v0, i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll
index 81dd2c4457b2fb..4384d1e32cf53f 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll
@@ -11,9 +11,8 @@ define void @test() {
; CHECK: ; %bb.0: ; %bb.0
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
-; CHECK-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; CHECK-NEXT: .LBB0_1: ; %bb.1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_cbranch_scc1 .LBB0_3
@@ -21,42 +20,40 @@ define void @test() {
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: .LBB0_3: ; %bb.3
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1
-; CHECK-NEXT: v_accvgpr_read_b32 v0, a0 ; Reload Reuse
-; CHECK-NEXT: s_mov_b64 exec, s[10:11]
; CHECK-NEXT: ; implicit-def: $sgpr4
-; CHECK-NEXT: v_mov_b32_e32 v1, s4
-; CHECK-NEXT: v_readfirstlane_b32 s6, v1
+; CHECK-NEXT: v_mov_b32_e32 v0, s4
+; CHECK-NEXT: v_readfirstlane_b32 s6, v0
; CHECK-NEXT: s_mov_b64 s[4:5], -1
; CHECK-NEXT: s_mov_b32 s7, 0
; CHECK-NEXT: s_cmp_eq_u32 s6, s7
-; CHECK-NEXT: v_writelane_b32 v0, s4, 0
-; CHECK-NEXT: v_writelane_b32 v0, s5, 1
+; CHECK-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
+; CHECK-NEXT: v_writelane_b32 v1, s4, 0
+; CHECK-NEXT: v_writelane_b32 v1, s5, 1
; CHECK-NEXT: s_mov_b64 s[10:11], exec
; CHECK-NEXT: s_mov_b64 exec, -1
-; CHECK-NEXT: v_accvgpr_write_b32 a0, v0 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v1 ; Reload Reuse
; CHECK-NEXT: s_mov_b64 exec, s[10:11]
; CHECK-NEXT: s_cbranch_scc1 .LBB0_5
; CHECK-NEXT: ; %bb.4: ; %bb.4
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1
-; CHECK-NEXT: v_accvgpr_read_b32 v0, a0 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v1, a0 ; Reload Reuse
; CHECK-NEXT: s_mov_b64 exec, s[10:11]
; CHECK-NEXT: s_mov_b64 s[4:5], 0
-; CHECK-NEXT: v_writelane_b32 v0, s4, 0
-; CHECK-NEXT: v_writelane_b32 v0, s5, 1
+; CHECK-NEXT: v_writelane_b32 v1, s4, 0
+; CHECK-NEXT: v_writelane_b32 v1, s5, 1
; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1
; CHECK-NEXT: s_nop 0
-; CHECK-NEXT: v_accvgpr_write_b32 a0, v0 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_write_b32 a0, v1 ; Reload Reuse
; CHECK-NEXT: s_mov_b64 exec, s[10:11]
; CHECK-NEXT: .LBB0_5: ; %Flow
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1
; CHECK-NEXT: s_nop 0
-; CHECK-NEXT: v_accvgpr_read_b32 v0, a0 ; Reload Reuse
+; CHECK-NEXT: v_accvgpr_read_b32 v1, a0 ; Reload Reuse
; CHECK-NEXT: s_mov_b64 exec, s[10:11]
-; CHECK-NEXT: v_readlane_b32 s4, v0, 0
-; CHECK-NEXT: v_readlane_b32 s5, v0, 1
+; CHECK-NEXT: v_readlane_b32 s4, v1, 0
+; CHECK-NEXT: v_readlane_b32 s5, v1, 1
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; CHECK-NEXT: s_mov_b32 s4, 1
; CHECK-NEXT: ; implicit-def: $sgpr5
@@ -64,12 +61,8 @@ define void @test() {
; CHECK-NEXT: s_and_b64 vcc, exec, s[4:5]
; CHECK-NEXT: s_cbranch_vccnz .LBB0_1
; CHECK-NEXT: ; %bb.6: ; %bb.5
-; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1
-; CHECK-NEXT: v_accvgpr_read_b32 v0, a0 ; Reload Reuse
-; CHECK-NEXT: s_mov_b64 exec, s[10:11]
-; CHECK-NEXT: ; kill: killed $vgpr0
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/spill192.mir b/llvm/test/CodeGen/AMDGPU/spill192.mir
index 5040140a3e9351..be2a31d7ccbaab 100644
--- a/llvm/test/CodeGen/AMDGPU/spill192.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill192.mir
@@ -37,8 +37,8 @@ body: |
; EXPANDED: bb.0:
; EXPANDED-NEXT: successors: %bb.1(0x80000000)
; EXPANDED-NEXT: {{ $}}
- ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
+ ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]]
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]]
diff --git a/llvm/test/CodeGen/AMDGPU/spill224.mir b/llvm/test/CodeGen/AMDGPU/spill224.mir
index 5e53f93df95f74..f4965dcf61e176 100644
--- a/llvm/test/CodeGen/AMDGPU/spill224.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill224.mir
@@ -33,8 +33,8 @@ body: |
; EXPANDED: bb.0:
; EXPANDED-NEXT: successors: %bb.1(0x80000000)
; EXPANDED-NEXT: {{ $}}
- ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
+ ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]]
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]]
diff --git a/llvm/test/CodeGen/AMDGPU/spill288.mir b/llvm/test/CodeGen/AMDGPU/spill288.mir
index 3d5404a9c1ad5b..312531ba5bc996 100644
--- a/llvm/test/CodeGen/AMDGPU/spill288.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill288.mir
@@ -33,8 +33,8 @@ body: |
; EXPANDED: bb.0:
; EXPANDED-NEXT: successors: %bb.1(0x80000000)
; EXPANDED-NEXT: {{ $}}
- ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
+ ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]]
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]]
diff --git a/llvm/test/CodeGen/AMDGPU/spill320.mir b/llvm/test/CodeGen/AMDGPU/spill320.mir
index 4473a4d6648efe..0c0c01760f8ba0 100644
--- a/llvm/test/CodeGen/AMDGPU/spill320.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill320.mir
@@ -33,8 +33,8 @@ body: |
; EXPANDED: bb.0:
; EXPANDED-NEXT: successors: %bb.1(0x80000000)
; EXPANDED-NEXT: {{ $}}
- ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
+ ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]]
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]]
diff --git a/llvm/test/CodeGen/AMDGPU/spill352.mir b/llvm/test/CodeGen/AMDGPU/spill352.mir
index 8fa053a908b60e..8823ba1a8326ec 100644
--- a/llvm/test/CodeGen/AMDGPU/spill352.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill352.mir
@@ -33,8 +33,8 @@ body: |
; EXPANDED: bb.0:
; EXPANDED-NEXT: successors: %bb.1(0x80000000)
; EXPANDED-NEXT: {{ $}}
- ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
+ ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]]
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]]
diff --git a/llvm/test/CodeGen/AMDGPU/spill384.mir b/llvm/test/CodeGen/AMDGPU/spill384.mir
index cd604e4483b9c7..e33641cf892379 100644
--- a/llvm/test/CodeGen/AMDGPU/spill384.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill384.mir
@@ -33,8 +33,8 @@ body: |
; EXPANDED: bb.0:
; EXPANDED-NEXT: successors: %bb.1(0x80000000)
; EXPANDED-NEXT: {{ $}}
- ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]]
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]]
diff --git a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll
index 3c16cd29de8f6a..6b0fbc44c65b7f 100644
--- a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll
+++ b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll
@@ -236,20 +236,15 @@ define void @func_stacksave_nonentry_block(i1 %cond) {
; WAVE32-O0: ; %bb.0: ; %bb0
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE32-O0-NEXT: s_xor_saveexec_b32 s4, -1
-; WAVE32-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; WAVE32-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s4
-; WAVE32-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; WAVE32-O0-NEXT: v_mov_b32_e32 v1, v0
-; WAVE32-O0-NEXT: s_or_saveexec_b32 s7, -1
-; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s7
-; WAVE32-O0-NEXT: v_and_b32_e64 v1, 1, v1
-; WAVE32-O0-NEXT: v_cmp_eq_u32_e64 s5, v1, 1
+; WAVE32-O0-NEXT: v_and_b32_e64 v0, 1, v0
+; WAVE32-O0-NEXT: v_cmp_eq_u32_e64 s5, v0, 1
; WAVE32-O0-NEXT: s_mov_b32 s4, exec_lo
-; WAVE32-O0-NEXT: s_waitcnt vmcnt(0)
-; WAVE32-O0-NEXT: v_writelane_b32 v0, s4, 0
+; WAVE32-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
+; WAVE32-O0-NEXT: v_writelane_b32 v1, s4, 0
; WAVE32-O0-NEXT: s_or_saveexec_b32 s7, -1
-; WAVE32-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; WAVE32-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s7
; WAVE32-O0-NEXT: s_and_b32 s4, s4, s5
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s4
@@ -262,14 +257,13 @@ define void @func_stacksave_nonentry_block(i1 %cond) {
; WAVE32-O0-NEXT: ;;#ASMEND
; WAVE32-O0-NEXT: .LBB4_2: ; %bb2
; WAVE32-O0-NEXT: s_or_saveexec_b32 s7, -1
-; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; WAVE32-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s7
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0)
-; WAVE32-O0-NEXT: v_readlane_b32 s4, v0, 0
+; WAVE32-O0-NEXT: v_readlane_b32 s4, v1, 0
; WAVE32-O0-NEXT: s_or_b32 exec_lo, exec_lo, s4
-; WAVE32-O0-NEXT: ; kill: killed $vgpr0
; WAVE32-O0-NEXT: s_xor_saveexec_b32 s4, -1
-; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; WAVE32-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s4
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0)
; WAVE32-O0-NEXT: s_setpc_b64 s[30:31]
@@ -278,21 +272,16 @@ define void @func_stacksave_nonentry_block(i1 %cond) {
; WAVE64-O0: ; %bb.0: ; %bb0
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; WAVE64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; WAVE64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; WAVE64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; WAVE64-O0-NEXT: s_mov_b64 exec, s[4:5]
-; WAVE64-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; WAVE64-O0-NEXT: v_mov_b32_e32 v1, v0
-; WAVE64-O0-NEXT: s_or_saveexec_b64 s[10:11], -1
-; WAVE64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; WAVE64-O0-NEXT: s_mov_b64 exec, s[10:11]
-; WAVE64-O0-NEXT: v_and_b32_e64 v1, 1, v1
-; WAVE64-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v1, 1
+; WAVE64-O0-NEXT: v_and_b32_e64 v0, 1, v0
+; WAVE64-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v0, 1
; WAVE64-O0-NEXT: s_mov_b64 s[4:5], exec
-; WAVE64-O0-NEXT: s_waitcnt vmcnt(0)
-; WAVE64-O0-NEXT: v_writelane_b32 v0, s4, 0
-; WAVE64-O0-NEXT: v_writelane_b32 v0, s5, 1
+; WAVE64-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
+; WAVE64-O0-NEXT: v_writelane_b32 v1, s4, 0
+; WAVE64-O0-NEXT: v_writelane_b32 v1, s5, 1
; WAVE64-O0-NEXT: s_or_saveexec_b64 s[10:11], -1
-; WAVE64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; WAVE64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
; WAVE64-O0-NEXT: s_mov_b64 exec, s[10:11]
; WAVE64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; WAVE64-O0-NEXT: s_mov_b64 exec, s[4:5]
@@ -305,15 +294,14 @@ define void @func_stacksave_nonentry_block(i1 %cond) {
; WAVE64-O0-NEXT: ;;#ASMEND
; WAVE64-O0-NEXT: .LBB4_2: ; %bb2
; WAVE64-O0-NEXT: s_or_saveexec_b64 s[10:11], -1
-; WAVE64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; WAVE64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
; WAVE64-O0-NEXT: s_mov_b64 exec, s[10:11]
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0)
-; WAVE64-O0-NEXT: v_readlane_b32 s4, v0, 0
-; WAVE64-O0-NEXT: v_readlane_b32 s5, v0, 1
+; WAVE64-O0-NEXT: v_readlane_b32 s4, v1, 0
+; WAVE64-O0-NEXT: v_readlane_b32 s5, v1, 1
; WAVE64-O0-NEXT: s_or_b64 exec, exec, s[4:5]
-; WAVE64-O0-NEXT: ; kill: killed $vgpr0
; WAVE64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; WAVE64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; WAVE64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; WAVE64-O0-NEXT: s_mov_b64 exec, s[4:5]
; WAVE64-O0-NEXT: s_waitcnt vmcnt(0)
; WAVE64-O0-NEXT: s_setpc_b64 s[30:31]
@@ -324,10 +312,10 @@ define void @func_stacksave_nonentry_block(i1 %cond) {
; WAVE32-WWM-PREALLOC-NEXT: s_xor_saveexec_b32 s4, -1
; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 exec_lo, s4
-; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; WAVE32-WWM-PREALLOC-NEXT: v_and_b32_e64 v0, 1, v0
; WAVE32-WWM-PREALLOC-NEXT: v_cmp_eq_u32_e64 s5, v0, 1
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s4, exec_lo
+; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v1, s4, 0
; WAVE32-WWM-PREALLOC-NEXT: s_and_b32 s4, s4, s5
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 exec_lo, s4
@@ -341,7 +329,6 @@ define void @func_stacksave_nonentry_block(i1 %cond) {
; WAVE32-WWM-PREALLOC-NEXT: .LBB4_2: ; %bb2
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s4, v1, 0
; WAVE32-WWM-PREALLOC-NEXT: s_or_b32 exec_lo, exec_lo, s4
-; WAVE32-WWM-PREALLOC-NEXT: ; kill: killed $vgpr1
; WAVE32-WWM-PREALLOC-NEXT: s_xor_saveexec_b32 s4, -1
; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 exec_lo, s4
@@ -941,7 +928,6 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
; WAVE32-O0-NEXT: s_bitset0_b32 s23, 21
; WAVE32-O0-NEXT: s_add_u32 s20, s20, s9
; WAVE32-O0-NEXT: s_addc_u32 s21, s21, 0
-; WAVE32-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
; WAVE32-O0-NEXT: s_mov_b32 s14, s8
; WAVE32-O0-NEXT: s_mov_b32 s13, s7
; WAVE32-O0-NEXT: s_mov_b32 s12, s6
@@ -949,12 +935,10 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
; WAVE32-O0-NEXT: s_mov_b64 s[8:9], s[2:3]
; WAVE32-O0-NEXT: s_mov_b64 s[4:5], s[0:1]
; WAVE32-O0-NEXT: s_mov_b32 s0, s32
-; WAVE32-O0-NEXT: v_writelane_b32 v3, s0, 0
+; WAVE32-O0-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
+; WAVE32-O0-NEXT: v_writelane_b32 v32, s0, 0
; WAVE32-O0-NEXT: s_lshr_b32 s0, s0, 5
-; WAVE32-O0-NEXT: v_writelane_b32 v3, s0, 1
-; WAVE32-O0-NEXT: s_or_saveexec_b32 s19, -1
-; WAVE32-O0-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:128 ; 4-byte Folded Spill
-; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s19
+; WAVE32-O0-NEXT: v_writelane_b32 v32, s0, 1
; WAVE32-O0-NEXT: v_mov_b32_e32 v3, 42
; WAVE32-O0-NEXT: buffer_store_dword v3, off, s[20:23], 0
; WAVE32-O0-NEXT: s_waitcnt_vscnt null, 0x0
@@ -1037,17 +1021,12 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v30, s18
; WAVE32-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; WAVE32-O0-NEXT: s_or_saveexec_b32 s19, -1
-; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:128 ; 4-byte Folded Reload
-; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s19
-; WAVE32-O0-NEXT: s_waitcnt vmcnt(0)
-; WAVE32-O0-NEXT: v_readlane_b32 s1, v0, 1
-; WAVE32-O0-NEXT: v_readlane_b32 s0, v0, 0
+; WAVE32-O0-NEXT: v_readlane_b32 s1, v32, 1
+; WAVE32-O0-NEXT: v_readlane_b32 s0, v32, 0
; WAVE32-O0-NEXT: ;;#ASMSTART
; WAVE32-O0-NEXT: ; use s1
; WAVE32-O0-NEXT: ;;#ASMEND
; WAVE32-O0-NEXT: s_mov_b32 s32, s0
-; WAVE32-O0-NEXT: ; kill: killed $vgpr0
; WAVE32-O0-NEXT: s_endpgm
;
; WAVE64-O0-LABEL: kernel_stacksave_stackrestore_call_with_stack_objects:
@@ -1059,7 +1038,6 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
; WAVE64-O0-NEXT: s_waitcnt lgkmcnt(0)
; WAVE64-O0-NEXT: s_add_u32 s24, s24, s9
; WAVE64-O0-NEXT: s_addc_u32 s25, s25, 0
-; WAVE64-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
; WAVE64-O0-NEXT: s_mov_b32 s14, s8
; WAVE64-O0-NEXT: s_mov_b32 s13, s7
; WAVE64-O0-NEXT: s_mov_b32 s12, s6
@@ -1067,12 +1045,10 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
; WAVE64-O0-NEXT: s_mov_b64 s[8:9], s[2:3]
; WAVE64-O0-NEXT: s_mov_b64 s[4:5], s[0:1]
; WAVE64-O0-NEXT: s_mov_b32 s0, s32
-; WAVE64-O0-NEXT: v_writelane_b32 v3, s0, 0
+; WAVE64-O0-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
+; WAVE64-O0-NEXT: v_writelane_b32 v32, s0, 0
; WAVE64-O0-NEXT: s_lshr_b32 s0, s0, 6
-; WAVE64-O0-NEXT: v_writelane_b32 v3, s0, 1
-; WAVE64-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; WAVE64-O0-NEXT: buffer_store_dword v3, off, s[24:27], 0 offset:128 ; 4-byte Folded Spill
-; WAVE64-O0-NEXT: s_mov_b64 exec, s[20:21]
+; WAVE64-O0-NEXT: v_writelane_b32 v32, s0, 1
; WAVE64-O0-NEXT: v_mov_b32_e32 v3, 42
; WAVE64-O0-NEXT: buffer_store_dword v3, off, s[24:27], 0
; WAVE64-O0-NEXT: s_waitcnt_vscnt null, 0x0
@@ -1155,17 +1131,12 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v30, s18
; WAVE64-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; WAVE64-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; WAVE64-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:128 ; 4-byte Folded Reload
-; WAVE64-O0-NEXT: s_mov_b64 exec, s[20:21]
-; WAVE64-O0-NEXT: s_waitcnt vmcnt(0)
-; WAVE64-O0-NEXT: v_readlane_b32 s1, v0, 1
-; WAVE64-O0-NEXT: v_readlane_b32 s0, v0, 0
+; WAVE64-O0-NEXT: v_readlane_b32 s1, v32, 1
+; WAVE64-O0-NEXT: v_readlane_b32 s0, v32, 0
; WAVE64-O0-NEXT: ;;#ASMSTART
; WAVE64-O0-NEXT: ; use s1
; WAVE64-O0-NEXT: ;;#ASMEND
; WAVE64-O0-NEXT: s_mov_b32 s32, s0
-; WAVE64-O0-NEXT: ; kill: killed $vgpr0
; WAVE64-O0-NEXT: s_endpgm
;
; WAVE32-WWM-PREALLOC-LABEL: kernel_stacksave_stackrestore_call_with_stack_objects:
@@ -1178,7 +1149,6 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
; WAVE32-WWM-PREALLOC-NEXT: s_bitset0_b32 s23, 21
; WAVE32-WWM-PREALLOC-NEXT: s_add_u32 s20, s20, s9
; WAVE32-WWM-PREALLOC-NEXT: s_addc_u32 s21, s21, 0
-; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s14, s8
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s13, s7
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s12, s6
@@ -1186,6 +1156,7 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[8:9], s[2:3]
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[4:5], s[0:1]
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s0, s32
+; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v32, s0, 0
; WAVE32-WWM-PREALLOC-NEXT: s_lshr_b32 s0, s0, 5
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v32, s0, 1
@@ -1277,7 +1248,6 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects
; WAVE32-WWM-PREALLOC-NEXT: ; use s1
; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMEND
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s0
-; WAVE32-WWM-PREALLOC-NEXT: ; kill: killed $vgpr32
; WAVE32-WWM-PREALLOC-NEXT: s_endpgm
%alloca = alloca [32 x i32], addrspace(5)
%stacksave = call ptr addrspace(5) @llvm.stacksave.p5()
@@ -1362,23 +1332,20 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
; WAVE32-O0-LABEL: func_stacksave_stackrestore_call_with_stack_objects:
; WAVE32-O0: ; %bb.0:
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; WAVE32-O0-NEXT: s_mov_b32 s25, s33
+; WAVE32-O0-NEXT: s_mov_b32 s24, s33
; WAVE32-O0-NEXT: s_mov_b32 s33, s32
; WAVE32-O0-NEXT: s_xor_saveexec_b32 s16, -1
; WAVE32-O0-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
-; WAVE32-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
+; WAVE32-O0-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s16
; WAVE32-O0-NEXT: s_add_i32 s32, s32, 0x1200
-; WAVE32-O0-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; WAVE32-O0-NEXT: v_writelane_b32 v32, s30, 0
; WAVE32-O0-NEXT: v_writelane_b32 v32, s31, 1
; WAVE32-O0-NEXT: s_mov_b32 s16, s32
-; WAVE32-O0-NEXT: v_writelane_b32 v0, s16, 0
+; WAVE32-O0-NEXT: ; implicit-def: $vgpr33 : SGPR spill to VGPR lane
+; WAVE32-O0-NEXT: v_writelane_b32 v33, s16, 0
; WAVE32-O0-NEXT: s_lshr_b32 s16, s16, 5
-; WAVE32-O0-NEXT: v_writelane_b32 v0, s16, 1
-; WAVE32-O0-NEXT: s_or_saveexec_b32 s24, -1
-; WAVE32-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
-; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s24
+; WAVE32-O0-NEXT: v_writelane_b32 v33, s16, 1
; WAVE32-O0-NEXT: v_mov_b32_e32 v0, 42
; WAVE32-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33
; WAVE32-O0-NEXT: s_waitcnt_vscnt null, 0x0
@@ -1456,25 +1423,20 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
; WAVE32-O0-NEXT: ; implicit-def: $sgpr18
; WAVE32-O0-NEXT: v_mov_b32_e32 v30, s18
; WAVE32-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; WAVE32-O0-NEXT: s_or_saveexec_b32 s24, -1
-; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
-; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s24
-; WAVE32-O0-NEXT: s_waitcnt vmcnt(0)
-; WAVE32-O0-NEXT: v_readlane_b32 s5, v0, 1
-; WAVE32-O0-NEXT: v_readlane_b32 s4, v0, 0
+; WAVE32-O0-NEXT: v_readlane_b32 s5, v33, 1
+; WAVE32-O0-NEXT: v_readlane_b32 s4, v33, 0
; WAVE32-O0-NEXT: ;;#ASMSTART
; WAVE32-O0-NEXT: ; use s5
; WAVE32-O0-NEXT: ;;#ASMEND
; WAVE32-O0-NEXT: s_mov_b32 s32, s4
; WAVE32-O0-NEXT: v_readlane_b32 s31, v32, 1
; WAVE32-O0-NEXT: v_readlane_b32 s30, v32, 0
-; WAVE32-O0-NEXT: ; kill: killed $vgpr0
; WAVE32-O0-NEXT: s_xor_saveexec_b32 s4, -1
; WAVE32-O0-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
-; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
+; WAVE32-O0-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s4
; WAVE32-O0-NEXT: s_add_i32 s32, s32, 0xffffee00
-; WAVE32-O0-NEXT: s_mov_b32 s33, s25
+; WAVE32-O0-NEXT: s_mov_b32 s33, s24
; WAVE32-O0-NEXT: s_waitcnt vmcnt(0)
; WAVE32-O0-NEXT: s_setpc_b64 s[30:31]
;
@@ -1485,19 +1447,16 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
; WAVE64-O0-NEXT: s_mov_b32 s33, s32
; WAVE64-O0-NEXT: s_xor_saveexec_b64 s[16:17], -1
; WAVE64-O0-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
-; WAVE64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill
+; WAVE64-O0-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
; WAVE64-O0-NEXT: s_mov_b64 exec, s[16:17]
; WAVE64-O0-NEXT: s_add_i32 s32, s32, 0x2400
-; WAVE64-O0-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; WAVE64-O0-NEXT: v_writelane_b32 v32, s30, 0
; WAVE64-O0-NEXT: v_writelane_b32 v32, s31, 1
; WAVE64-O0-NEXT: s_mov_b32 s16, s32
-; WAVE64-O0-NEXT: v_writelane_b32 v0, s16, 0
+; WAVE64-O0-NEXT: ; implicit-def: $vgpr33 : SGPR spill to VGPR lane
+; WAVE64-O0-NEXT: v_writelane_b32 v33, s16, 0
; WAVE64-O0-NEXT: s_lshr_b32 s16, s16, 6
-; WAVE64-O0-NEXT: v_writelane_b32 v0, s16, 1
-; WAVE64-O0-NEXT: s_or_saveexec_b64 s[26:27], -1
-; WAVE64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
-; WAVE64-O0-NEXT: s_mov_b64 exec, s[26:27]
+; WAVE64-O0-NEXT: v_writelane_b32 v33, s16, 1
; WAVE64-O0-NEXT: v_mov_b32_e32 v0, 42
; WAVE64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33
; WAVE64-O0-NEXT: s_waitcnt_vscnt null, 0x0
@@ -1575,22 +1534,17 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
; WAVE64-O0-NEXT: ; implicit-def: $sgpr18
; WAVE64-O0-NEXT: v_mov_b32_e32 v30, s18
; WAVE64-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; WAVE64-O0-NEXT: s_or_saveexec_b64 s[26:27], -1
-; WAVE64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
-; WAVE64-O0-NEXT: s_mov_b64 exec, s[26:27]
-; WAVE64-O0-NEXT: s_waitcnt vmcnt(0)
-; WAVE64-O0-NEXT: v_readlane_b32 s5, v0, 1
-; WAVE64-O0-NEXT: v_readlane_b32 s4, v0, 0
+; WAVE64-O0-NEXT: v_readlane_b32 s5, v33, 1
+; WAVE64-O0-NEXT: v_readlane_b32 s4, v33, 0
; WAVE64-O0-NEXT: ;;#ASMSTART
; WAVE64-O0-NEXT: ; use s5
; WAVE64-O0-NEXT: ;;#ASMEND
; WAVE64-O0-NEXT: s_mov_b32 s32, s4
; WAVE64-O0-NEXT: v_readlane_b32 s31, v32, 1
; WAVE64-O0-NEXT: v_readlane_b32 s30, v32, 0
-; WAVE64-O0-NEXT: ; kill: killed $vgpr0
; WAVE64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1
; WAVE64-O0-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
-; WAVE64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload
+; WAVE64-O0-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
; WAVE64-O0-NEXT: s_mov_b64 exec, s[4:5]
; WAVE64-O0-NEXT: s_add_i32 s32, s32, 0xffffdc00
; WAVE64-O0-NEXT: s_mov_b32 s33, s19
@@ -1603,14 +1557,14 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s24, s33
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s33, s32
; WAVE32-WWM-PREALLOC-NEXT: s_xor_saveexec_b32 s16, -1
-; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
-; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
+; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
+; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 exec_lo, s16
; WAVE32-WWM-PREALLOC-NEXT: s_add_i32 s32, s32, 0x1200
-; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v33, s30, 0
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v33, s31, 1
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s16, s32
+; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v32, s16, 0
; WAVE32-WWM-PREALLOC-NEXT: s_lshr_b32 s16, s16, 5
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v32, s16, 1
@@ -1699,10 +1653,9 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s4
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s31, v33, 1
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s30, v33, 0
-; WAVE32-WWM-PREALLOC-NEXT: ; kill: killed $vgpr32
; WAVE32-WWM-PREALLOC-NEXT: s_xor_saveexec_b32 s4, -1
-; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
-; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
+; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
+; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 exec_lo, s4
; WAVE32-WWM-PREALLOC-NEXT: s_add_i32 s32, s32, 0xffffee00
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s33, s24
diff --git a/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir b/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir
index 2c4a5dba3520cd..cc261b0da4a8f4 100644
--- a/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir
+++ b/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir
@@ -17,10 +17,10 @@ body: |
bb.0:
liveins: $sgpr20, $vgpr1
; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_reg
- ; GCN: liveins: $sgpr20, $vgpr0, $vgpr1
+ ; GCN: liveins: $sgpr20, $vgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
@@ -50,11 +50,11 @@ body: |
bb.0:
liveins: $sgpr20, $sgpr21, $vgpr1
; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_tuple_subreg
- ; GCN: liveins: $sgpr20, $sgpr21, $vgpr0, $vgpr1, $vgpr2
+ ; GCN: liveins: $sgpr20, $sgpr21, $vgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
@@ -89,10 +89,10 @@ body: |
bb.0:
liveins: $sgpr20, $vgpr1
; GCN-LABEL: name: wwm_scratch_reg_spill_reload_
diff erent_outgoing_reg
- ; GCN: liveins: $sgpr20, $vgpr1, $vgpr2
+ ; GCN: liveins: $sgpr20, $vgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2
@@ -121,17 +121,17 @@ body: |
bb.0:
liveins: $sgpr20, $vgpr1
; GCN-LABEL: name: wwm_csr_spill_reload
- ; GCN: liveins: $sgpr20, $vgpr1, $vgpr40
+ ; GCN: liveins: $sgpr20, $vgpr1
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+ ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
- ; GCN-NEXT: $vgpr40 = IMPLICIT_DEF
- ; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr40
- ; GCN-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 0, implicit $exec
+ ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
+ ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2
+ ; GCN-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0, implicit $exec
; GCN-NEXT: $vgpr0 = COPY killed $vgpr1, implicit $exec
- ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GCN-NEXT: SI_RETURN implicit $vgpr0
$vgpr40 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/trap-abis.ll b/llvm/test/CodeGen/AMDGPU/trap-abis.ll
index 52370f6a2ef054..4dfd4c095c87a0 100644
--- a/llvm/test/CodeGen/AMDGPU/trap-abis.ll
+++ b/llvm/test/CodeGen/AMDGPU/trap-abis.ll
@@ -206,14 +206,14 @@ define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %a
;
; HSA-TRAP-GFX1100-O0-LABEL: non_entry_trap:
; HSA-TRAP-GFX1100-O0: ; %bb.0: ; %entry
-; HSA-TRAP-GFX1100-O0-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; HSA-TRAP-GFX1100-O0-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt lgkmcnt(0)
; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b64 s[2:3], s[0:1]
-; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v0, s2, 0
-; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v0, s3, 1
+; HSA-TRAP-GFX1100-O0-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
+; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v2, s2, 0
+; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v2, s3, 1
; HSA-TRAP-GFX1100-O0-NEXT: s_or_saveexec_b32 s6, -1
-; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v0, off ; 4-byte Folded Spill
+; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v2, off ; 4-byte Folded Spill
; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 exec_lo, s6
; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v0, 0
; HSA-TRAP-GFX1100-O0-NEXT: global_load_b32 v0, v0, s[0:1] glc dlc
@@ -236,16 +236,15 @@ define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %a
; HSA-TRAP-GFX1100-O0-NEXT: s_branch .LBB1_3
; HSA-TRAP-GFX1100-O0-NEXT: .LBB1_2: ; %ret
; HSA-TRAP-GFX1100-O0-NEXT: s_or_saveexec_b32 s6, -1
-; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v0, off, off ; 4-byte Folded Reload
+; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v2, off, off ; 4-byte Folded Reload
; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 exec_lo, s6
; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s0, v0, 0
-; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s1, v0, 1
-; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v1, 0
-; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v2, 3
-; HSA-TRAP-GFX1100-O0-NEXT: global_store_b32 v1, v2, s[0:1] dlc
+; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s0, v2, 0
+; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s1, v2, 1
+; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v0, 0
+; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v1, 3
+; HSA-TRAP-GFX1100-O0-NEXT: global_store_b32 v0, v1, s[0:1] dlc
; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt_vscnt null, 0x0
-; HSA-TRAP-GFX1100-O0-NEXT: ; kill: killed $vgpr0
; HSA-TRAP-GFX1100-O0-NEXT: s_endpgm
; HSA-TRAP-GFX1100-O0-NEXT: .LBB1_3: ; =>This Inner Loop Header: Depth=1
; HSA-TRAP-GFX1100-O0-NEXT: s_sethalt 5
@@ -352,34 +351,32 @@ define amdgpu_kernel void @trap_with_use_after(ptr addrspace(1) %arg0, ptr addrs
;
; HSA-TRAP-GFX1100-O0-LABEL: trap_with_use_after:
; HSA-TRAP-GFX1100-O0: ; %bb.0:
-; HSA-TRAP-GFX1100-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v0, 0
; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v0, off offset:8 ; 4-byte Folded Spill
; HSA-TRAP-GFX1100-O0-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; HSA-TRAP-GFX1100-O0-NEXT: s_load_b64 s[2:3], s[4:5], 0x8
+; HSA-TRAP-GFX1100-O0-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane
; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v1, s2, 0
-; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v1, s3, 1
+; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v2, s2, 0
+; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v2, s3, 1
; HSA-TRAP-GFX1100-O0-NEXT: s_or_saveexec_b32 s6, -1
-; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v1, off offset:4 ; 4-byte Folded Spill
+; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v2, off ; 4-byte Folded Spill
; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 exec_lo, s6
; HSA-TRAP-GFX1100-O0-NEXT: global_load_b32 v0, v0, s[0:1] glc dlc
; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v0, off ; 4-byte Folded Spill
+; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v0, off offset:4 ; 4-byte Folded Spill
; HSA-TRAP-GFX1100-O0-NEXT: s_cbranch_execnz .LBB2_2
; HSA-TRAP-GFX1100-O0-NEXT: ; %bb.1:
+; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v0, off, off offset:8 ; 4-byte Folded Reload
+; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v1, off, off offset:4 ; 4-byte Folded Reload
; HSA-TRAP-GFX1100-O0-NEXT: s_or_saveexec_b32 s6, -1
-; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v0, off, off offset:4 ; 4-byte Folded Reload
-; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 exec_lo, s6
-; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s0, v0, 0
-; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s1, v0, 1
-; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v1, off, off offset:8 ; 4-byte Folded Reload
; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v2, off, off ; 4-byte Folded Reload
+; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 exec_lo, s6
; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX1100-O0-NEXT: global_store_b32 v1, v2, s[0:1] dlc
+; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s0, v2, 0
+; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s1, v2, 1
+; HSA-TRAP-GFX1100-O0-NEXT: global_store_b32 v0, v1, s[0:1] dlc
; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt_vscnt null, 0x0
-; HSA-TRAP-GFX1100-O0-NEXT: ; kill: killed $vgpr0
; HSA-TRAP-GFX1100-O0-NEXT: s_endpgm
; HSA-TRAP-GFX1100-O0-NEXT: .LBB2_2:
; HSA-TRAP-GFX1100-O0-NEXT: s_trap 2
diff --git a/llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir b/llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir
index c73b023f18652e..4c2d0d2fa0d77b 100644
--- a/llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir
+++ b/llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -start-before=greedy,0 -stop-after=virtregrewriter,2 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
--- |
define amdgpu_ps void @e32() #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
index 15a83475f368e9..a827ebe96cfcf4 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
@@ -13,41 +13,37 @@ define amdgpu_kernel void @__omp_offloading_16_dd2df_main_l9() {
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_add_u32 s0, s0, s13
; CHECK-NEXT: s_addc_u32 s1, s1, 0
-; CHECK-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
-; CHECK-NEXT: v_mov_b32_e32 v2, v0
-; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1
-; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], 0 ; 4-byte Folded Reload
-; CHECK-NEXT: s_mov_b64 exec, s[8:9]
-; CHECK-NEXT: v_mov_b32_e32 v1, 0
-; CHECK-NEXT: global_load_ushort v3, v1, s[4:5] offset:4
+; CHECK-NEXT: v_mov_b32_e32 v1, v0
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: global_load_ushort v2, v0, s[4:5] offset:4
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: ; implicit-def: $sgpr4
; CHECK-NEXT: s_mov_b32 s4, 0
-; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], v2, s4
-; CHECK-NEXT: v_mov_b32_e32 v2, 0
-; CHECK-NEXT: ds_write_b8 v1, v2
+; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], v1, s4
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: ds_write_b8 v0, v1
; CHECK-NEXT: s_mov_b64 s[4:5], exec
-; CHECK-NEXT: v_writelane_b32 v0, s4, 0
-; CHECK-NEXT: v_writelane_b32 v0, s5, 1
+; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
+; CHECK-NEXT: v_writelane_b32 v3, s4, 0
+; CHECK-NEXT: v_writelane_b32 v3, s5, 1
; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1
-; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; 4-byte Folded Spill
+; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], 0 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[8:9]
; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_cbranch_execz .LBB0_2
; CHECK-NEXT: ; %bb.1: ; %bb193
; CHECK-NEXT: .LBB0_2: ; %bb194
+; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1
-; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], 0 ; 4-byte Folded Reload
+; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], 0 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[8:9]
; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: v_readlane_b32 s4, v1, 0
-; CHECK-NEXT: v_readlane_b32 s5, v1, 1
+; CHECK-NEXT: v_readlane_b32 s4, v3, 0
+; CHECK-NEXT: v_readlane_b32 s5, v3, 1
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
-; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b32 s4, 0xffff
-; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_and_b32_e64 v0, s4, v0
; CHECK-NEXT: s_mov_b32 s4, 0
; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, s4
@@ -66,10 +62,6 @@ define amdgpu_kernel void @__omp_offloading_16_dd2df_main_l9() {
; CHECK-NEXT: s_trap 2
; CHECK-NEXT: ; divergent unreachable
; CHECK-NEXT: .LBB0_4: ; %UnifiedReturnBlock
-; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1
-; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], 0 ; 4-byte Folded Reload
-; CHECK-NEXT: s_mov_b64 exec, s[8:9]
-; CHECK-NEXT: ; kill: killed $vgpr0
; CHECK-NEXT: s_endpgm
bb:
%i10 = tail call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
index dd3572c027c86d..e5caa509835c3a 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
@@ -413,7 +413,7 @@ body: |
; MUBUF-LABEL: name: vgpr32_restore_clobber_scc_emergency_stack_slot
; MUBUF: bb.0:
; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -424,7 +424,7 @@ body: |
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: bb.1:
; MUBUF-NEXT: successors: %bb.2(0x80000000)
- ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: S_NOP 0
; MUBUF-NEXT: {{ $}}
@@ -434,7 +434,7 @@ body: |
; GFX9-FLATSCR-LABEL: name: vgpr32_restore_clobber_scc_emergency_stack_slot
; GFX9-FLATSCR: bb.0:
; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -446,7 +446,7 @@ body: |
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: bb.1:
; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000)
- ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: S_NOP 0
; GFX9-FLATSCR-NEXT: {{ $}}
@@ -456,7 +456,7 @@ body: |
; GFX10-FLATSCR-LABEL: name: vgpr32_restore_clobber_scc_emergency_stack_slot
; GFX10-FLATSCR: bb.0:
; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -467,7 +467,7 @@ body: |
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: bb.1:
; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000)
- ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: S_NOP 0
; GFX10-FLATSCR-NEXT: {{ $}}
@@ -477,7 +477,7 @@ body: |
; VMEM-GFX8-LABEL: name: vgpr32_restore_clobber_scc_emergency_stack_slot
; VMEM-GFX8: bb.0:
; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -488,7 +488,7 @@ body: |
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: bb.1:
; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000)
- ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_NOP 0
; VMEM-GFX8-NEXT: {{ $}}
@@ -525,7 +525,7 @@ body: |
; MUBUF-LABEL: name: vgpr64_restore_clobber_scc_emergency_stack_slot
; MUBUF: bb.0:
; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -537,7 +537,7 @@ body: |
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: bb.1:
; MUBUF-NEXT: successors: %bb.2(0x80000000)
- ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: S_NOP 0
; MUBUF-NEXT: {{ $}}
@@ -547,7 +547,7 @@ body: |
; GFX9-FLATSCR-LABEL: name: vgpr64_restore_clobber_scc_emergency_stack_slot
; GFX9-FLATSCR: bb.0:
; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -559,7 +559,7 @@ body: |
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: bb.1:
; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000)
- ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: S_NOP 0
; GFX9-FLATSCR-NEXT: {{ $}}
@@ -569,7 +569,7 @@ body: |
; GFX10-FLATSCR-LABEL: name: vgpr64_restore_clobber_scc_emergency_stack_slot
; GFX10-FLATSCR: bb.0:
; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -580,7 +580,7 @@ body: |
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: bb.1:
; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000)
- ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: S_NOP 0
; GFX10-FLATSCR-NEXT: {{ $}}
@@ -590,7 +590,7 @@ body: |
; VMEM-GFX8-LABEL: name: vgpr64_restore_clobber_scc_emergency_stack_slot
; VMEM-GFX8: bb.0:
; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -602,7 +602,7 @@ body: |
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: bb.1:
; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000)
- ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_NOP 0
; VMEM-GFX8-NEXT: {{ $}}
@@ -639,7 +639,7 @@ body: |
; MUBUF-LABEL: name: vgpr96_restore_clobber_scc_emergency_stack_slot
; MUBUF: bb.0:
; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -652,7 +652,7 @@ body: |
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: bb.1:
; MUBUF-NEXT: successors: %bb.2(0x80000000)
- ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: S_NOP 0
; MUBUF-NEXT: {{ $}}
@@ -662,7 +662,7 @@ body: |
; GFX9-FLATSCR-LABEL: name: vgpr96_restore_clobber_scc_emergency_stack_slot
; GFX9-FLATSCR: bb.0:
; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -674,7 +674,7 @@ body: |
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: bb.1:
; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000)
- ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: S_NOP 0
; GFX9-FLATSCR-NEXT: {{ $}}
@@ -684,7 +684,7 @@ body: |
; GFX10-FLATSCR-LABEL: name: vgpr96_restore_clobber_scc_emergency_stack_slot
; GFX10-FLATSCR: bb.0:
; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -695,7 +695,7 @@ body: |
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: bb.1:
; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000)
- ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: S_NOP 0
; GFX10-FLATSCR-NEXT: {{ $}}
@@ -705,7 +705,7 @@ body: |
; VMEM-GFX8-LABEL: name: vgpr96_restore_clobber_scc_emergency_stack_slot
; VMEM-GFX8: bb.0:
; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -718,7 +718,7 @@ body: |
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: bb.1:
; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000)
- ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_NOP 0
; VMEM-GFX8-NEXT: {{ $}}
@@ -755,7 +755,7 @@ body: |
; MUBUF-LABEL: name: vgpr32_save_clobber_scc_emergency_stack_slot
; MUBUF: bb.0:
; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -766,7 +766,7 @@ body: |
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: bb.1:
; MUBUF-NEXT: successors: %bb.2(0x80000000)
- ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: S_NOP 0
; MUBUF-NEXT: {{ $}}
@@ -776,7 +776,7 @@ body: |
; GFX9-FLATSCR-LABEL: name: vgpr32_save_clobber_scc_emergency_stack_slot
; GFX9-FLATSCR: bb.0:
; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -788,7 +788,7 @@ body: |
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: bb.1:
; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000)
- ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: S_NOP 0
; GFX9-FLATSCR-NEXT: {{ $}}
@@ -798,7 +798,7 @@ body: |
; GFX10-FLATSCR-LABEL: name: vgpr32_save_clobber_scc_emergency_stack_slot
; GFX10-FLATSCR: bb.0:
; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -809,7 +809,7 @@ body: |
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: bb.1:
; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000)
- ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: S_NOP 0
; GFX10-FLATSCR-NEXT: {{ $}}
@@ -819,7 +819,7 @@ body: |
; VMEM-GFX8-LABEL: name: vgpr32_save_clobber_scc_emergency_stack_slot
; VMEM-GFX8: bb.0:
; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -830,7 +830,7 @@ body: |
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: bb.1:
; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000)
- ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_NOP 0
; VMEM-GFX8-NEXT: {{ $}}
@@ -867,7 +867,7 @@ body: |
; MUBUF-LABEL: name: vgpr64_save_clobber_scc_emergency_stack_slot
; MUBUF: bb.0:
; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -879,7 +879,7 @@ body: |
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: bb.1:
; MUBUF-NEXT: successors: %bb.2(0x80000000)
- ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: S_NOP 0
; MUBUF-NEXT: {{ $}}
@@ -889,7 +889,7 @@ body: |
; GFX9-FLATSCR-LABEL: name: vgpr64_save_clobber_scc_emergency_stack_slot
; GFX9-FLATSCR: bb.0:
; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -901,7 +901,7 @@ body: |
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: bb.1:
; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000)
- ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: S_NOP 0
; GFX9-FLATSCR-NEXT: {{ $}}
@@ -911,7 +911,7 @@ body: |
; GFX10-FLATSCR-LABEL: name: vgpr64_save_clobber_scc_emergency_stack_slot
; GFX10-FLATSCR: bb.0:
; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -922,7 +922,7 @@ body: |
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: bb.1:
; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000)
- ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: S_NOP 0
; GFX10-FLATSCR-NEXT: {{ $}}
@@ -932,7 +932,7 @@ body: |
; VMEM-GFX8-LABEL: name: vgpr64_save_clobber_scc_emergency_stack_slot
; VMEM-GFX8: bb.0:
; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -944,7 +944,7 @@ body: |
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: bb.1:
; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000)
- ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_NOP 0
; VMEM-GFX8-NEXT: {{ $}}
@@ -981,7 +981,7 @@ body: |
; MUBUF-LABEL: name: vgpr96_save_clobber_scc_emergency_stack_slot
; MUBUF: bb.0:
; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -994,7 +994,7 @@ body: |
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: bb.1:
; MUBUF-NEXT: successors: %bb.2(0x80000000)
- ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: S_NOP 0
; MUBUF-NEXT: {{ $}}
@@ -1004,7 +1004,7 @@ body: |
; GFX9-FLATSCR-LABEL: name: vgpr96_save_clobber_scc_emergency_stack_slot
; GFX9-FLATSCR: bb.0:
; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -1016,7 +1016,7 @@ body: |
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: bb.1:
; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000)
- ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: S_NOP 0
; GFX9-FLATSCR-NEXT: {{ $}}
@@ -1026,7 +1026,7 @@ body: |
; GFX10-FLATSCR-LABEL: name: vgpr96_save_clobber_scc_emergency_stack_slot
; GFX10-FLATSCR: bb.0:
; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -1037,7 +1037,7 @@ body: |
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: bb.1:
; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000)
- ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: S_NOP 0
; GFX10-FLATSCR-NEXT: {{ $}}
@@ -1047,7 +1047,7 @@ body: |
; VMEM-GFX8-LABEL: name: vgpr96_save_clobber_scc_emergency_stack_slot
; VMEM-GFX8: bb.0:
; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -1060,7 +1060,7 @@ body: |
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: bb.1:
; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000)
- ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_NOP 0
; VMEM-GFX8-NEXT: {{ $}}
@@ -1200,7 +1200,7 @@ body: |
; MUBUF-LABEL: name: mubuf_load_restore_clobber_scc_no_vgprs_emergency_stack_slot
; MUBUF: bb.0:
; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -1211,7 +1211,7 @@ body: |
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: bb.1:
; MUBUF-NEXT: successors: %bb.2(0x80000000)
- ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: S_NOP 0
; MUBUF-NEXT: {{ $}}
@@ -1221,7 +1221,7 @@ body: |
; GFX9-FLATSCR-LABEL: name: mubuf_load_restore_clobber_scc_no_vgprs_emergency_stack_slot
; GFX9-FLATSCR: bb.0:
; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -1235,7 +1235,7 @@ body: |
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: bb.1:
; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000)
- ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: S_NOP 0
; GFX9-FLATSCR-NEXT: {{ $}}
@@ -1245,7 +1245,7 @@ body: |
; GFX10-FLATSCR-LABEL: name: mubuf_load_restore_clobber_scc_no_vgprs_emergency_stack_slot
; GFX10-FLATSCR: bb.0:
; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
@@ -1259,7 +1259,7 @@ body: |
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: bb.1:
; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000)
- ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; GFX10-FLATSCR-NEXT: {{ $}}
; GFX10-FLATSCR-NEXT: S_NOP 0
; GFX10-FLATSCR-NEXT: {{ $}}
@@ -1269,7 +1269,7 @@ body: |
; VMEM-GFX8-LABEL: name: mubuf_load_restore_clobber_scc_no_vgprs_emergency_stack_slot
; VMEM-GFX8: bb.0:
; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
@@ -1280,7 +1280,7 @@ body: |
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: bb.1:
; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000)
- ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+ ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
; VMEM-GFX8-NEXT: {{ $}}
; VMEM-GFX8-NEXT: S_NOP 0
; VMEM-GFX8-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
index 6659e953237692..fa0922590712a4 100644
--- a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir
@@ -30,7 +30,7 @@ machineFunctionInfo:
body: |
bb.0:
; CHECK-LABEL: name: undef_identity_copy
- ; CHECK: renamable $vgpr40_vgpr41_vgpr42_vgpr43 = FLAT_LOAD_DWORDX4 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128), addrspace 1)
+ ; CHECK: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128), addrspace 1)
; CHECK-NEXT: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95, implicit-def $scc
; CHECK-NEXT: $sgpr4 = COPY $sgpr95
@@ -39,13 +39,14 @@ body: |
; CHECK-NEXT: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @bar + 4, target-flags(amdgpu-rel32-hi) @bar + 4, implicit-def dead $scc
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
; CHECK-NEXT: $sgpr4 = COPY $sgpr95
- ; CHECK-NEXT: $vgpr0 = COPY renamable $vgpr40
- ; CHECK-NEXT: $vgpr1 = COPY renamable $vgpr41
- ; CHECK-NEXT: $vgpr2 = COPY killed renamable $vgpr42
- ; CHECK-NEXT: $vgpr3 = KILL undef renamable $vgpr3
+ ; CHECK-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORDX4_]].sub0
+ ; CHECK-NEXT: $vgpr1 = COPY [[FLAT_LOAD_DWORDX4_]].sub1
+ ; CHECK-NEXT: $vgpr2 = COPY [[FLAT_LOAD_DWORDX4_]].sub2
+ ; CHECK-NEXT: $vgpr3 = COPY undef %4:vgpr_32
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @bar, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
- ; CHECK-NEXT: FLAT_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
+ ; CHECK-NEXT: FLAT_STORE_DWORD undef %6:vreg_64, [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%0:vreg_128 = FLAT_LOAD_DWORDX4 undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128), addrspace 1)
%2:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc
diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll
index 8c285f37b4878a..d1ee82e74b3de5 100644
--- a/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll
@@ -13,24 +13,24 @@ define void @vector_reg_liverange_split() #0 {
; GFX90A-NEXT: s_mov_b32 s16, s33
; GFX90A-NEXT: s_mov_b32 s33, s32
; GFX90A-NEXT: s_xor_saveexec_b64 s[18:19], -1
-; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GFX90A-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX90A-NEXT: s_mov_b64 exec, -1
-; GFX90A-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX90A-NEXT: buffer_store_dword a32, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GFX90A-NEXT: buffer_store_dword a32, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX90A-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX90A-NEXT: s_mov_b64 exec, s[18:19]
; GFX90A-NEXT: v_writelane_b32 v40, s16, 4
; GFX90A-NEXT: v_writelane_b32 v40, s28, 2
; GFX90A-NEXT: v_writelane_b32 v40, s29, 3
-; GFX90A-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GFX90A-NEXT: v_writelane_b32 v40, s30, 0
+; GFX90A-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane
; GFX90A-NEXT: s_addk_i32 s32, 0x400
; GFX90A-NEXT: v_writelane_b32 v40, s31, 1
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s20
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_writelane_b32 v0, s20, 0
+; GFX90A-NEXT: v_writelane_b32 v39, s20, 0
; GFX90A-NEXT: s_or_saveexec_b64 s[28:29], -1
-; GFX90A-NEXT: v_accvgpr_write_b32 a32, v0
+; GFX90A-NEXT: v_accvgpr_write_b32 a32, v39
; GFX90A-NEXT: s_mov_b64 exec, s[28:29]
; GFX90A-NEXT: s_getpc_b64 s[16:17]
; GFX90A-NEXT: s_add_u32 s16, s16, foo at gotpcrel32@lo+4
@@ -39,23 +39,22 @@ define void @vector_reg_liverange_split() #0 {
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX90A-NEXT: s_or_saveexec_b64 s[28:29], -1
-; GFX90A-NEXT: v_accvgpr_read_b32 v0, a32
+; GFX90A-NEXT: v_accvgpr_read_b32 v39, a32
; GFX90A-NEXT: s_mov_b64 exec, s[28:29]
-; GFX90A-NEXT: v_readlane_b32 s20, v0, 0
+; GFX90A-NEXT: v_readlane_b32 s20, v39, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s20
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_readlane_b32 s31, v40, 1
; GFX90A-NEXT: v_readlane_b32 s30, v40, 0
-; GFX90A-NEXT: ; kill: killed $vgpr0
; GFX90A-NEXT: v_readlane_b32 s4, v40, 4
; GFX90A-NEXT: v_readlane_b32 s28, v40, 2
; GFX90A-NEXT: v_readlane_b32 s29, v40, 3
; GFX90A-NEXT: s_xor_saveexec_b64 s[6:7], -1
-; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; GFX90A-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX90A-NEXT: s_mov_b64 exec, -1
-; GFX90A-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX90A-NEXT: buffer_load_dword a32, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GFX90A-NEXT: buffer_load_dword a32, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX90A-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX90A-NEXT: s_mov_b64 exec, s[6:7]
; GFX90A-NEXT: s_addk_i32 s32, 0xfc00
; GFX90A-NEXT: s_mov_b32 s33, s4
diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll
index 5608ea85635488..4837efe6606b82 100644
--- a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll
@@ -19,24 +19,23 @@ define void @test() #0 {
; GCN-NEXT: s_mov_b32 s16, s33
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_xor_saveexec_b64 s[18:19], -1
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, -1
-; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[18:19]
; GCN-NEXT: v_writelane_b32 v40, s16, 4
; GCN-NEXT: v_writelane_b32 v40, s28, 2
; GCN-NEXT: v_writelane_b32 v40, s29, 3
-; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN-NEXT: v_writelane_b32 v40, s30, 0
-; GCN-NEXT: s_addk_i32 s32, 0x800
+; GCN-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane
+; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: v_writelane_b32 v40, s31, 1
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s16
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: v_writelane_b32 v0, s16, 0
+; GCN-NEXT: v_writelane_b32 v39, s16, 0
; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
-; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[28:29]
; GCN-NEXT: s_getpc_b64 s[16:17]
; GCN-NEXT: s_add_u32 s16, s16, ext_func at gotpcrel32@lo+4
@@ -45,26 +44,24 @@ define void @test() #0 {
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1
-; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v39, off, s[0:3], s33 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[28:29]
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_readlane_b32 s4, v1, 0
+; GCN-NEXT: v_readlane_b32 s4, v39, 0
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: global_store_dword v[0:1], v0, off
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_readlane_b32 s31, v40, 1
; GCN-NEXT: v_readlane_b32 s30, v40, 0
-; GCN-NEXT: ; kill: killed $vgpr1
; GCN-NEXT: v_readlane_b32 s4, v40, 4
; GCN-NEXT: v_readlane_b32 s28, v40, 2
; GCN-NEXT: v_readlane_b32 s29, v40, 3
; GCN-NEXT: s_xor_saveexec_b64 s[6:7], -1
-; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, -1
-; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[6:7]
-; GCN-NEXT: s_addk_i32 s32, 0xf800
+; GCN-NEXT: s_addk_i32 s32, 0xfc00
; GCN-NEXT: s_mov_b32 s33, s4
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -75,23 +72,23 @@ define void @test() #0 {
; GCN-O0-NEXT: s_mov_b32 s16, s33
; GCN-O0-NEXT: s_mov_b32 s33, s32
; GCN-O0-NEXT: s_xor_saveexec_b64 s[18:19], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, -1
-; GCN-O0-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[18:19]
; GCN-O0-NEXT: v_writelane_b32 v40, s16, 4
; GCN-O0-NEXT: v_writelane_b32 v40, s28, 2
; GCN-O0-NEXT: v_writelane_b32 v40, s29, 3
; GCN-O0-NEXT: s_add_i32 s32, s32, 0x400
-; GCN-O0-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
; GCN-O0-NEXT: v_writelane_b32 v40, s30, 0
; GCN-O0-NEXT: v_writelane_b32 v40, s31, 1
; GCN-O0-NEXT: ;;#ASMSTART
; GCN-O0-NEXT: ; def s16
; GCN-O0-NEXT: ;;#ASMEND
-; GCN-O0-NEXT: v_writelane_b32 v0, s16, 0
+; GCN-O0-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane
+; GCN-O0-NEXT: v_writelane_b32 v39, s16, 0
; GCN-O0-NEXT: s_or_saveexec_b64 s[28:29], -1
-; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN-O0-NEXT: buffer_store_dword v39, off, s[0:3], s33 ; 4-byte Folded Spill
; GCN-O0-NEXT: s_mov_b64 exec, s[28:29]
; GCN-O0-NEXT: s_getpc_b64 s[16:17]
; GCN-O0-NEXT: s_add_u32 s16, s16, ext_func at gotpcrel32@lo+4
@@ -104,26 +101,25 @@ define void @test() #0 {
; GCN-O0-NEXT: s_waitcnt lgkmcnt(0)
; GCN-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-O0-NEXT: s_or_saveexec_b64 s[28:29], -1
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v39, off, s[0:3], s33 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[28:29]
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
-; GCN-O0-NEXT: v_readlane_b32 s4, v0, 0
+; GCN-O0-NEXT: v_readlane_b32 s4, v39, 0
; GCN-O0-NEXT: ; implicit-def: $sgpr6_sgpr7
-; GCN-O0-NEXT: v_mov_b32_e32 v1, s6
-; GCN-O0-NEXT: v_mov_b32_e32 v2, s7
-; GCN-O0-NEXT: v_mov_b32_e32 v3, s4
-; GCN-O0-NEXT: global_store_dword v[1:2], v3, off
+; GCN-O0-NEXT: v_mov_b32_e32 v0, s6
+; GCN-O0-NEXT: v_mov_b32_e32 v1, s7
+; GCN-O0-NEXT: v_mov_b32_e32 v2, s4
+; GCN-O0-NEXT: global_store_dword v[0:1], v2, off
; GCN-O0-NEXT: s_waitcnt vmcnt(0)
; GCN-O0-NEXT: v_readlane_b32 s31, v40, 1
; GCN-O0-NEXT: v_readlane_b32 s30, v40, 0
-; GCN-O0-NEXT: ; kill: killed $vgpr0
; GCN-O0-NEXT: v_readlane_b32 s4, v40, 4
; GCN-O0-NEXT: v_readlane_b32 s28, v40, 2
; GCN-O0-NEXT: v_readlane_b32 s29, v40, 3
; GCN-O0-NEXT: s_xor_saveexec_b64 s[6:7], -1
-; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, -1
-; GCN-O0-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN-O0-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
; GCN-O0-NEXT: s_add_i32 s32, s32, 0xfffffc00
; GCN-O0-NEXT: s_mov_b32 s33, s4
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll b/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll
new file mode 100644
index 00000000000000..145f1e483cd997
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll
@@ -0,0 +1,29 @@
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stress-regalloc=2 -filetype=null %s 2>&1 | FileCheck %s
+
+; A negative test to capture the expected error when the VGPRs are insufficient for wwm-regalloc.
+
+; CHECK: error: can't find enough VGPRs for wwm-regalloc
+
+define amdgpu_kernel void @test(i32 %in) {
+entry:
+ call void asm sideeffect "", "~{v[0:7]}" ()
+ call void asm sideeffect "", "~{v[8:15]}" ()
+ call void asm sideeffect "", "~{v[16:23]}" ()
+ call void asm sideeffect "", "~{v[24:31]}" ()
+ call void asm sideeffect "", "~{v[32:39]}" ()
+ call void asm sideeffect "", "~{v[40:47]}" ()
+ call void asm sideeffect "", "~{v[48:55]}" ()
+ call void asm sideeffect "", "~{v[56:63]}" ()
+ %val0 = call i32 asm sideeffect "; def $0", "=s" ()
+ %val1 = call i32 asm sideeffect "; def $0", "=s" ()
+ %val2 = call i32 asm sideeffect "; def $0", "=s" ()
+ %cmp = icmp eq i32 %in, 0
+ br i1 %cmp, label %bb0, label %ret
+bb0:
+ call void asm sideeffect "; use $0", "s"(i32 %val0)
+ call void asm sideeffect "; use $0", "s"(i32 %val1)
+ call void asm sideeffect "; use $0", "s"(i32 %val2)
+ br label %ret
+ret:
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
index c295a056eb9e74..025381d5c16df8 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
@@ -134,15 +134,10 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
; GFX9-O0: ; %bb.0: ; %entry
; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
-; GFX9-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[46:47]
; GFX9-O0-NEXT: s_mov_b32 s40, s6
; GFX9-O0-NEXT: s_mov_b32 s34, s4
; GFX9-O0-NEXT: ; kill: def $sgpr40 killed $sgpr40 def $sgpr40_sgpr41
@@ -157,38 +152,38 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
; GFX9-O0-NEXT: s_mov_b32 s37, s44
; GFX9-O0-NEXT: s_mov_b32 s38, s43
; GFX9-O0-NEXT: s_mov_b32 s39, s42
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_writelane_b32 v0, s40, 0
-; GFX9-O0-NEXT: v_writelane_b32 v0, s41, 1
-; GFX9-O0-NEXT: v_writelane_b32 v0, s34, 2
-; GFX9-O0-NEXT: v_writelane_b32 v0, s35, 3
+; GFX9-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT: v_writelane_b32 v5, s40, 0
+; GFX9-O0-NEXT: v_writelane_b32 v5, s41, 1
+; GFX9-O0-NEXT: v_writelane_b32 v5, s34, 2
+; GFX9-O0-NEXT: v_writelane_b32 v5, s35, 3
; GFX9-O0-NEXT: s_mov_b32 s34, 0
; GFX9-O0-NEXT: s_nop 2
-; GFX9-O0-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], s34
+; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], off, s[36:39], s34
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37
; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37
; GFX9-O0-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-O0-NEXT: v_mov_b32_e32 v1, s34
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[36:37]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[36:37]
; GFX9-O0-NEXT: v_mov_b32_e32 v2, s34
; GFX9-O0-NEXT: s_nop 1
; GFX9-O0-NEXT: v_mov_b32_dpp v2, v1 row_bcast:31 row_mask:0xc bank_mask:0xf
; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2
; GFX9-O0-NEXT: s_mov_b64 exec, s[36:37]
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[36:37], v3, s34
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, s34
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[36:37], v0, s34
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, s34
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 s[34:35], exec
-; GFX9-O0-NEXT: v_writelane_b32 v0, s34, 4
-; GFX9-O0-NEXT: v_writelane_b32 v0, s35, 5
+; GFX9-O0-NEXT: v_writelane_b32 v5, s34, 4
+; GFX9-O0-NEXT: v_writelane_b32 v5, s35, 5
; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[46:47]
; GFX9-O0-NEXT: s_and_b64 s[34:35], s[34:35], s[36:37]
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
@@ -211,26 +206,26 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1
; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GFX9-O0-NEXT: .LBB1_2: ; %merge
+; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_nop 0
+; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[46:47]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s36, v0, 4
-; GFX9-O0-NEXT: v_readlane_b32 s37, v0, 5
+; GFX9-O0-NEXT: v_readlane_b32 s36, v5, 4
+; GFX9-O0-NEXT: v_readlane_b32 s37, v5, 5
; GFX9-O0-NEXT: s_or_b64 exec, exec, s[36:37]
-; GFX9-O0-NEXT: v_readlane_b32 s38, v0, 0
-; GFX9-O0-NEXT: v_readlane_b32 s39, v0, 1
-; GFX9-O0-NEXT: v_readlane_b32 s34, v0, 2
-; GFX9-O0-NEXT: v_readlane_b32 s35, v0, 3
-; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[36:37], v3, v4
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[36:37]
+; GFX9-O0-NEXT: v_readlane_b32 s38, v5, 0
+; GFX9-O0-NEXT: v_readlane_b32 s39, v5, 1
+; GFX9-O0-NEXT: v_readlane_b32 s34, v5, 2
+; GFX9-O0-NEXT: v_readlane_b32 s35, v5, 3
+; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[36:37], v0, v3
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[36:37]
; GFX9-O0-NEXT: s_mov_b32 s36, 1
-; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s36, v3
+; GFX9-O0-NEXT: v_lshlrev_b32_e64 v0, s36, v0
; GFX9-O0-NEXT: s_mov_b32 s36, 2
-; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s36
+; GFX9-O0-NEXT: v_and_b32_e64 v0, v0, s36
; GFX9-O0-NEXT: s_mov_b32 s40, s35
; GFX9-O0-NEXT: s_mov_b32 s36, s34
; GFX9-O0-NEXT: s_mov_b32 s34, s39
@@ -240,12 +235,11 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg)
; GFX9-O0-NEXT: s_mov_b32 s38, s35
; GFX9-O0-NEXT: s_mov_b32 s39, s34
; GFX9-O0-NEXT: s_mov_b32 s34, 0
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[36:39], s34 offset:4
-; GFX9-O0-NEXT: ; kill: killed $vgpr0
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[36:39], s34 offset:4
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: s_setpc_b64 s[30:31]
@@ -353,9 +347,9 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg
; GFX9-O0-NEXT: s_mov_b32 s48, s33
; GFX9-O0-NEXT: s_mov_b32 s33, s32
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-O0-NEXT: s_add_i32 s32, s32, 0x400
; GFX9-O0-NEXT: v_writelane_b32 v3, s30, 0
@@ -397,9 +391,9 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg
; GFX9-O0-NEXT: v_readlane_b32 s31, v3, 1
; GFX9-O0-NEXT: v_readlane_b32 s30, v3, 0
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffffc00
; GFX9-O0-NEXT: s_mov_b32 s33, s48
@@ -412,9 +406,9 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg
; GFX9-O3-NEXT: s_mov_b32 s38, s33
; GFX9-O3-NEXT: s_mov_b32 s33, s32
; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; GFX9-O3-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-O3-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-O3-NEXT: v_writelane_b32 v3, s30, 0
; GFX9-O3-NEXT: s_addk_i32 s32, 0x400
@@ -435,9 +429,9 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg
; GFX9-O3-NEXT: v_readlane_b32 s31, v3, 1
; GFX9-O3-NEXT: v_readlane_b32 s30, v3, 0
; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-O3-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-O3-NEXT: s_mov_b32 s33, s38
@@ -539,28 +533,26 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
; GFX9-O0-LABEL: strict_wwm_call_i64:
; GFX9-O0: ; %bb.0:
; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-O0-NEXT: s_mov_b32 s48, s33
+; GFX9-O0-NEXT: s_mov_b32 s46, s33
; GFX9-O0-NEXT: s_mov_b32 s33, s32
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-O0-NEXT: s_add_i32 s32, s32, 0x1000
-; GFX9-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; GFX9-O0-NEXT: v_writelane_b32 v10, s30, 0
; GFX9-O0-NEXT: v_writelane_b32 v10, s31, 1
; GFX9-O0-NEXT: s_mov_b32 s34, s8
@@ -578,10 +570,11 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
; GFX9-O0-NEXT: s_mov_b32 s41, s45
; GFX9-O0-NEXT: s_mov_b32 s42, s44
; GFX9-O0-NEXT: s_mov_b32 s43, s35
-; GFX9-O0-NEXT: v_writelane_b32 v1, s40, 0
-; GFX9-O0-NEXT: v_writelane_b32 v1, s41, 1
-; GFX9-O0-NEXT: v_writelane_b32 v1, s42, 2
-; GFX9-O0-NEXT: v_writelane_b32 v1, s43, 3
+; GFX9-O0-NEXT: ; implicit-def: $vgpr11 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT: v_writelane_b32 v11, s40, 0
+; GFX9-O0-NEXT: v_writelane_b32 v11, s41, 1
+; GFX9-O0-NEXT: v_writelane_b32 v11, s42, 2
+; GFX9-O0-NEXT: v_writelane_b32 v11, s43, 3
; GFX9-O0-NEXT: ; kill: def $sgpr34 killed $sgpr34 def $sgpr34_sgpr35
; GFX9-O0-NEXT: s_mov_b32 s35, s9
; GFX9-O0-NEXT: ; kill: def $sgpr36_sgpr37 killed $sgpr34_sgpr35
@@ -599,11 +592,8 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
; GFX9-O0-NEXT: ; implicit-def: $sgpr38_sgpr39
; GFX9-O0-NEXT: v_mov_b32_e32 v0, s34
; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1
-; GFX9-O0-NEXT: v_writelane_b32 v1, s34, 4
-; GFX9-O0-NEXT: v_writelane_b32 v1, s35, 5
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_mov_b64 exec, s[46:47]
+; GFX9-O0-NEXT: v_writelane_b32 v11, s34, 4
+; GFX9-O0-NEXT: v_writelane_b32 v11, s35, 5
; GFX9-O0-NEXT: v_mov_b32_e32 v3, s36
; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v0, s[34:35]
; GFX9-O0-NEXT: ; implicit-def: $sgpr34
@@ -625,20 +615,13 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2
; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[34:35]
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1
-; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[46:47]
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s34, v6, 4
-; GFX9-O0-NEXT: v_readlane_b32 s35, v6, 5
-; GFX9-O0-NEXT: v_readlane_b32 s36, v6, 0
-; GFX9-O0-NEXT: v_readlane_b32 s37, v6, 1
-; GFX9-O0-NEXT: v_readlane_b32 s38, v6, 2
-; GFX9-O0-NEXT: v_readlane_b32 s39, v6, 3
+; GFX9-O0-NEXT: v_readlane_b32 s34, v11, 4
+; GFX9-O0-NEXT: v_readlane_b32 s35, v11, 5
+; GFX9-O0-NEXT: v_readlane_b32 s36, v11, 0
+; GFX9-O0-NEXT: v_readlane_b32 s37, v11, 1
+; GFX9-O0-NEXT: v_readlane_b32 s38, v11, 2
+; GFX9-O0-NEXT: v_readlane_b32 s39, v11, 3
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[46:47]
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
; GFX9-O0-NEXT: ; implicit-def: $sgpr40
; GFX9-O0-NEXT: ; implicit-def: $sgpr40
@@ -647,30 +630,28 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
; GFX9-O0-NEXT: v_add_co_u32_e64 v2, s[40:41], v2, v4
; GFX9-O0-NEXT: v_addc_co_u32_e64 v3, s[40:41], v3, v5, s[40:41]
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
; GFX9-O0-NEXT: s_mov_b32 s34, 0
-; GFX9-O0-NEXT: buffer_store_dwordx2 v[6:7], off, s[36:39], s34 offset:4
+; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[36:39], s34 offset:4
; GFX9-O0-NEXT: v_readlane_b32 s31, v10, 1
; GFX9-O0-NEXT: v_readlane_b32 s30, v10, 0
-; GFX9-O0-NEXT: ; kill: killed $vgpr0
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffff000
-; GFX9-O0-NEXT: s_mov_b32 s33, s48
+; GFX9-O0-NEXT: s_mov_b32 s33, s46
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: s_setpc_b64 s[30:31]
;
@@ -680,14 +661,14 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
; GFX9-O3-NEXT: s_mov_b32 s38, s33
; GFX9-O3-NEXT: s_mov_b32 s33, s32
; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O3-NEXT: buffer_store_dword v8, off, s[0:3], s33 ; 4-byte Folded Spill
-; GFX9-O3-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; GFX9-O3-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
-; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
-; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
-; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
+; GFX9-O3-NEXT: buffer_store_dword v6, off, s[0:3], s33 ; 4-byte Folded Spill
+; GFX9-O3-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
+; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
+; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
; GFX9-O3-NEXT: s_nop 0
-; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
+; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
+; GFX9-O3-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-O3-NEXT: v_writelane_b32 v8, s30, 0
; GFX9-O3-NEXT: s_addk_i32 s32, 0x800
@@ -718,13 +699,13 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
; GFX9-O3-NEXT: v_readlane_b32 s31, v8, 1
; GFX9-O3-NEXT: v_readlane_b32 s30, v8, 0
; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1
-; GFX9-O3-NEXT: buffer_load_dword v8, off, s[0:3], s33 ; 4-byte Folded Reload
-; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GFX9-O3-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
-; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
-; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
-; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
-; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
+; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s33 ; 4-byte Folded Reload
+; GFX9-O3-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
+; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
+; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
+; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
+; GFX9-O3-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-O3-NEXT: s_addk_i32 s32, 0xf800
; GFX9-O3-NEXT: s_mov_b32 s33, s38
@@ -924,7 +905,7 @@ define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, pt
; GFX9-O0-NEXT: s_nop 0
; GFX9-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, -1
-; GFX9-O0-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-O0-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
@@ -937,35 +918,35 @@ define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, pt
; GFX9-O0-NEXT: v_writelane_b32 v47, s65, 1
; GFX9-O0-NEXT: v_writelane_b32 v47, s66, 2
; GFX9-O0-NEXT: v_writelane_b32 v47, s67, 3
-; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:16
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
; GFX9-O0-NEXT: ; implicit-def: $sgpr34
; GFX9-O0-NEXT: ; implicit-def: $sgpr34
; GFX9-O0-NEXT: ; implicit-def: $sgpr34
@@ -975,36 +956,36 @@ define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, pt
; GFX9-O0-NEXT: v_mov_b32_e32 v0, s4
; GFX9-O0-NEXT: v_mov_b32_e32 v35, s5
; GFX9-O0-NEXT: v_mov_b32_e32 v1, s6
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s7
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s8
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s7
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s9
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s8
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s9
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s11
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s12
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s11
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s13
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s12
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s14
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s13
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s15
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s14
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s16
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s15
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, s17
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s16
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s17
+; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v40, s18
; GFX9-O0-NEXT: v_mov_b32_e32 v39, s19
; GFX9-O0-NEXT: v_mov_b32_e32 v38, s20
; GFX9-O0-NEXT: v_mov_b32_e32 v37, s21
; GFX9-O0-NEXT: v_mov_b32_e32 v36, s22
; GFX9-O0-NEXT: v_mov_b32_e32 v1, s23
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v46, s24
; GFX9-O0-NEXT: v_mov_b32_e32 v45, s25
; GFX9-O0-NEXT: v_mov_b32_e32 v44, s26
@@ -1013,56 +994,56 @@ define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, pt
; GFX9-O0-NEXT: v_mov_b32_e32 v41, s29
; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v35
-; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v35
-; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v35
-; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v35
-; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v35
-; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v35
-; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v35
-; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v35
-; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v35
-; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v35
-; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_mov_b32_e32 v11, v35
-; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v35
-; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v35
-; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v40
-; GFX9-O0-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
; GFX9-O0-NEXT: v_mov_b32_e32 v15, v39
-; GFX9-O0-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
; GFX9-O0-NEXT: v_mov_b32_e32 v16, v38
-; GFX9-O0-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
; GFX9-O0-NEXT: v_mov_b32_e32 v17, v37
-; GFX9-O0-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
; GFX9-O0-NEXT: v_mov_b32_e32 v18, v36
-; GFX9-O0-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_waitcnt vmcnt(5)
; GFX9-O0-NEXT: v_mov_b32_e32 v19, v35
-; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
; GFX9-O0-NEXT: v_mov_b32_e32 v20, v46
; GFX9-O0-NEXT: v_mov_b32_e32 v21, v45
; GFX9-O0-NEXT: v_mov_b32_e32 v22, v44
@@ -1080,23 +1061,23 @@ define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, pt
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
; GFX9-O0-NEXT: v_mov_b32_e32 v30, v36
; GFX9-O0-NEXT: ; kill: def $vgpr31 killed $vgpr35 killed $exec
-; GFX9-O0-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
; GFX9-O0-NEXT: ; implicit-def: $sgpr34
; GFX9-O0-NEXT: ; implicit-def: $sgpr34
; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
@@ -1276,7 +1257,7 @@ define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, pt
; GFX9-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, -1
-; GFX9-O0-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
index ee9174822a9602..312628c7b5451e 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
@@ -114,15 +114,10 @@ define amdgpu_cs void @cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
; GFX9-O0-NEXT: s_mov_b32 s19, 0xe00000
; GFX9-O0-NEXT: s_add_u32 s16, s16, s4
; GFX9-O0-NEXT: s_addc_u32 s17, s17, 0
-; GFX9-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13]
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_writelane_b32 v0, s3, 0
+; GFX9-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT: v_writelane_b32 v5, s3, 0
; GFX9-O0-NEXT: s_mov_b32 s4, s1
-; GFX9-O0-NEXT: v_readlane_b32 s1, v0, 0
+; GFX9-O0-NEXT: v_readlane_b32 s1, v5, 0
; GFX9-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3
; GFX9-O0-NEXT: s_mov_b32 s3, s1
; GFX9-O0-NEXT: s_mov_b32 s8, s3
@@ -135,37 +130,37 @@ define amdgpu_cs void @cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
; GFX9-O0-NEXT: s_mov_b32 s5, s10
; GFX9-O0-NEXT: s_mov_b32 s6, s9
; GFX9-O0-NEXT: s_mov_b32 s7, s8
-; GFX9-O0-NEXT: v_writelane_b32 v0, s2, 1
-; GFX9-O0-NEXT: v_writelane_b32 v0, s3, 2
-; GFX9-O0-NEXT: v_writelane_b32 v0, s0, 3
-; GFX9-O0-NEXT: v_writelane_b32 v0, s1, 4
+; GFX9-O0-NEXT: v_writelane_b32 v5, s2, 1
+; GFX9-O0-NEXT: v_writelane_b32 v5, s3, 2
+; GFX9-O0-NEXT: v_writelane_b32 v5, s0, 3
+; GFX9-O0-NEXT: v_writelane_b32 v5, s1, 4
; GFX9-O0-NEXT: s_mov_b32 s0, 0
; GFX9-O0-NEXT: s_nop 2
-; GFX9-O0-NEXT: buffer_load_dwordx2 v[4:5], off, s[4:7], s0
+; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], off, s[4:7], s0
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill
; GFX9-O0-NEXT: ; implicit-def: $sgpr2_sgpr3
; GFX9-O0-NEXT: ; implicit-def: $sgpr2_sgpr3
; GFX9-O0-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX9-O0-NEXT: v_mov_b32_e32 v1, s0
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[2:3]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[2:3]
; GFX9-O0-NEXT: v_mov_b32_e32 v2, s0
; GFX9-O0-NEXT: s_nop 1
; GFX9-O0-NEXT: v_mov_b32_dpp v2, v1 row_bcast:31 row_mask:0xc bank_mask:0xf
; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2
; GFX9-O0-NEXT: s_mov_b64 exec, s[2:3]
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v3, s0
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, s0
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v0, s0
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 s[0:1], exec
-; GFX9-O0-NEXT: v_writelane_b32 v0, s0, 5
-; GFX9-O0-NEXT: v_writelane_b32 v0, s1, 6
+; GFX9-O0-NEXT: v_writelane_b32 v5, s0, 5
+; GFX9-O0-NEXT: v_writelane_b32 v5, s1, 6
; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[16:19], 0 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13]
; GFX9-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; GFX9-O0-NEXT: s_mov_b64 exec, s[0:1]
@@ -188,26 +183,26 @@ define amdgpu_cs void @cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1
; GFX9-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill
; GFX9-O0-NEXT: .LBB1_2: ; %merge
+; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_nop 0
+; GFX9-O0-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v5, off, s[16:19], 0 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s4, v0, 5
-; GFX9-O0-NEXT: v_readlane_b32 s5, v0, 6
+; GFX9-O0-NEXT: v_readlane_b32 s4, v5, 5
+; GFX9-O0-NEXT: v_readlane_b32 s5, v5, 6
; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
-; GFX9-O0-NEXT: v_readlane_b32 s2, v0, 1
-; GFX9-O0-NEXT: v_readlane_b32 s3, v0, 2
-; GFX9-O0-NEXT: v_readlane_b32 s0, v0, 3
-; GFX9-O0-NEXT: v_readlane_b32 s1, v0, 4
-; GFX9-O0-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v4, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v4
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5]
+; GFX9-O0-NEXT: v_readlane_b32 s2, v5, 1
+; GFX9-O0-NEXT: v_readlane_b32 s3, v5, 2
+; GFX9-O0-NEXT: v_readlane_b32 s0, v5, 3
+; GFX9-O0-NEXT: v_readlane_b32 s1, v5, 4
+; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v3
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX9-O0-NEXT: s_mov_b32 s4, 1
-; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s4, v3
+; GFX9-O0-NEXT: v_lshlrev_b32_e64 v0, s4, v0
; GFX9-O0-NEXT: s_mov_b32 s4, 2
-; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s4
+; GFX9-O0-NEXT: v_and_b32_e64 v0, v0, s4
; GFX9-O0-NEXT: s_mov_b32 s6, s1
; GFX9-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
; GFX9-O0-NEXT: s_mov_b32 s4, s3
@@ -217,8 +212,7 @@ define amdgpu_cs void @cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
; GFX9-O0-NEXT: s_mov_b32 s2, s5
; GFX9-O0-NEXT: s_mov_b32 s3, s4
; GFX9-O0-NEXT: s_mov_b32 s4, 0
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s4 offset:4
-; GFX9-O0-NEXT: ; kill: killed $vgpr0
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s4 offset:4
; GFX9-O0-NEXT: s_endpgm
;
; GFX9-O3-LABEL: cfg:
@@ -310,38 +304,32 @@ define hidden i32 @called(i32 %a) noinline {
define amdgpu_kernel void @call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) {
; GFX9-O0-LABEL: call:
; GFX9-O0: ; %bb.0:
-; GFX9-O0-NEXT: s_mov_b32 s32, 0x400
+; GFX9-O0-NEXT: s_mov_b32 s32, 0
; GFX9-O0-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0
; GFX9-O0-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1
; GFX9-O0-NEXT: s_mov_b32 s26, -1
; GFX9-O0-NEXT: s_mov_b32 s27, 0xe00000
; GFX9-O0-NEXT: s_add_u32 s24, s24, s9
; GFX9-O0-NEXT: s_addc_u32 s25, s25, 0
-; GFX9-O0-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane
; GFX9-O0-NEXT: s_or_saveexec_b64 s[10:11], -1
-; GFX9-O0-NEXT: v_writelane_b32 v7, s10, 0
-; GFX9-O0-NEXT: v_writelane_b32 v7, s11, 1
+; GFX9-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT: v_writelane_b32 v3, s10, 0
+; GFX9-O0-NEXT: v_writelane_b32 v3, s11, 1
; GFX9-O0-NEXT: s_mov_b32 s14, s8
; GFX9-O0-NEXT: s_mov_b32 s13, s7
; GFX9-O0-NEXT: s_mov_b32 s12, s6
; GFX9-O0-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[2:3]
-; GFX9-O0-NEXT: v_readlane_b32 s2, v7, 0
-; GFX9-O0-NEXT: v_readlane_b32 s3, v7, 1
-; GFX9-O0-NEXT: v_writelane_b32 v7, s4, 2
-; GFX9-O0-NEXT: v_writelane_b32 v7, s5, 3
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_store_dword v7, off, s[24:27], 0 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
+; GFX9-O0-NEXT: v_readlane_b32 s2, v3, 0
+; GFX9-O0-NEXT: v_readlane_b32 s3, v3, 1
+; GFX9-O0-NEXT: v_writelane_b32 v3, s4, 2
+; GFX9-O0-NEXT: v_writelane_b32 v3, s5, 3
; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[0:1]
-; GFX9-O0-NEXT: v_readlane_b32 s0, v7, 2
-; GFX9-O0-NEXT: v_readlane_b32 s1, v7, 3
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0
+; GFX9-O0-NEXT: v_readlane_b32 s0, v3, 2
+; GFX9-O0-NEXT: v_readlane_b32 s1, v3, 3
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v0
; GFX9-O0-NEXT: s_mov_b64 exec, s[2:3]
; GFX9-O0-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
; GFX9-O0-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x2c
@@ -355,23 +343,19 @@ define amdgpu_kernel void @call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) {
; GFX9-O0-NEXT: s_mov_b32 s17, s7
; GFX9-O0-NEXT: s_mov_b32 s18, s6
; GFX9-O0-NEXT: s_mov_b32 s19, s3
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_writelane_b32 v1, s16, 4
-; GFX9-O0-NEXT: v_writelane_b32 v1, s17, 5
-; GFX9-O0-NEXT: v_writelane_b32 v1, s18, 6
-; GFX9-O0-NEXT: v_writelane_b32 v1, s19, 7
+; GFX9-O0-NEXT: v_writelane_b32 v3, s16, 4
+; GFX9-O0-NEXT: v_writelane_b32 v3, s17, 5
+; GFX9-O0-NEXT: v_writelane_b32 v3, s18, 6
+; GFX9-O0-NEXT: v_writelane_b32 v3, s19, 7
; GFX9-O0-NEXT: s_mov_b32 s6, 0
-; GFX9-O0-NEXT: v_writelane_b32 v1, s6, 8
+; GFX9-O0-NEXT: v_writelane_b32 v3, s6, 8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX9-O0-NEXT: v_mov_b32_e32 v0, s2
; GFX9-O0-NEXT: s_or_saveexec_b64 s[2:3], -1
-; GFX9-O0-NEXT: v_writelane_b32 v1, s2, 9
-; GFX9-O0-NEXT: v_writelane_b32 v1, s3, 10
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[24:27], 0 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, s6
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v0, s[2:3]
+; GFX9-O0-NEXT: v_writelane_b32 v3, s2, 9
+; GFX9-O0-NEXT: v_writelane_b32 v3, s3, 10
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, s6
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[2:3]
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 56
; GFX9-O0-NEXT: s_mov_b32 s2, s0
; GFX9-O0-NEXT: s_mov_b32 s0, s1
@@ -387,35 +371,28 @@ define amdgpu_kernel void @call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) {
; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[24:25]
; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[26:27]
; GFX9-O0-NEXT: s_mov_b32 s6, 20
-; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s6, v3
-; GFX9-O0-NEXT: s_mov_b32 s6, 10
; GFX9-O0-NEXT: v_lshlrev_b32_e64 v4, s6, v4
-; GFX9-O0-NEXT: v_or3_b32 v3, v5, v4, v3
+; GFX9-O0-NEXT: s_mov_b32 s6, 10
+; GFX9-O0-NEXT: v_lshlrev_b32_e64 v5, s6, v5
+; GFX9-O0-NEXT: v_or3_b32 v4, v6, v5, v4
; GFX9-O0-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX9-O0-NEXT: ; implicit-def: $sgpr15
-; GFX9-O0-NEXT: v_mov_b32_e32 v31, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v31, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7
; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s0, v1, 4
-; GFX9-O0-NEXT: v_readlane_b32 s1, v1, 5
-; GFX9-O0-NEXT: v_readlane_b32 s2, v1, 6
-; GFX9-O0-NEXT: v_readlane_b32 s3, v1, 7
-; GFX9-O0-NEXT: v_readlane_b32 s6, v1, 9
-; GFX9-O0-NEXT: v_readlane_b32 s7, v1, 10
-; GFX9-O0-NEXT: v_readlane_b32 s4, v1, 8
+; GFX9-O0-NEXT: v_readlane_b32 s0, v3, 4
+; GFX9-O0-NEXT: v_readlane_b32 s1, v3, 5
+; GFX9-O0-NEXT: v_readlane_b32 s2, v3, 6
+; GFX9-O0-NEXT: v_readlane_b32 s3, v3, 7
+; GFX9-O0-NEXT: v_readlane_b32 s6, v3, 9
+; GFX9-O0-NEXT: v_readlane_b32 s7, v3, 10
+; GFX9-O0-NEXT: v_readlane_b32 s4, v3, 8
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-O0-NEXT: v_add_u32_e64 v3, v3, v6
+; GFX9-O0-NEXT: v_add_u32_e64 v3, v3, v7
; GFX9-O0-NEXT: s_mov_b64 exec, s[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s4 offset:4
-; GFX9-O0-NEXT: ; kill: killed $vgpr0
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v3
+; GFX9-O0-NEXT: s_nop 0
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s4 offset:4
; GFX9-O0-NEXT: s_endpgm
;
; GFX9-O3-LABEL: call:
@@ -559,37 +536,31 @@ define i64 @called_i64(i64 %a) noinline {
define amdgpu_kernel void @call_i64(ptr addrspace(8) inreg %tmp14, i64 inreg %arg) {
; GFX9-O0-LABEL: call_i64:
; GFX9-O0: ; %bb.0:
-; GFX9-O0-NEXT: s_mov_b32 s32, 0x400
+; GFX9-O0-NEXT: s_mov_b32 s32, 0
; GFX9-O0-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0
; GFX9-O0-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1
; GFX9-O0-NEXT: s_mov_b32 s26, -1
; GFX9-O0-NEXT: s_mov_b32 s27, 0xe00000
; GFX9-O0-NEXT: s_add_u32 s24, s24, s9
; GFX9-O0-NEXT: s_addc_u32 s25, s25, 0
-; GFX9-O0-NEXT: ; implicit-def: $vgpr12 : SGPR spill to VGPR lane
; GFX9-O0-NEXT: s_or_saveexec_b64 s[10:11], -1
-; GFX9-O0-NEXT: v_writelane_b32 v12, s10, 0
-; GFX9-O0-NEXT: v_writelane_b32 v12, s11, 1
+; GFX9-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT: v_writelane_b32 v8, s10, 0
+; GFX9-O0-NEXT: v_writelane_b32 v8, s11, 1
; GFX9-O0-NEXT: s_mov_b32 s14, s8
; GFX9-O0-NEXT: s_mov_b32 s13, s7
; GFX9-O0-NEXT: s_mov_b32 s12, s6
; GFX9-O0-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[2:3]
-; GFX9-O0-NEXT: v_readlane_b32 s2, v12, 0
-; GFX9-O0-NEXT: v_readlane_b32 s3, v12, 1
-; GFX9-O0-NEXT: v_writelane_b32 v12, s4, 2
-; GFX9-O0-NEXT: v_writelane_b32 v12, s5, 3
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_store_dword v12, off, s[24:27], 0 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
+; GFX9-O0-NEXT: v_readlane_b32 s2, v8, 0
+; GFX9-O0-NEXT: v_readlane_b32 s3, v8, 1
+; GFX9-O0-NEXT: v_writelane_b32 v8, s4, 2
+; GFX9-O0-NEXT: v_writelane_b32 v8, s5, 3
; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[0:1]
-; GFX9-O0-NEXT: v_readlane_b32 s0, v12, 2
-; GFX9-O0-NEXT: v_readlane_b32 s1, v12, 3
+; GFX9-O0-NEXT: v_readlane_b32 s0, v8, 2
+; GFX9-O0-NEXT: v_readlane_b32 s1, v8, 3
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0
; GFX9-O0-NEXT: s_mov_b64 exec, s[2:3]
; GFX9-O0-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x24
@@ -604,11 +575,10 @@ define amdgpu_kernel void @call_i64(ptr addrspace(8) inreg %tmp14, i64 inreg %ar
; GFX9-O0-NEXT: s_mov_b32 s17, s8
; GFX9-O0-NEXT: s_mov_b32 s18, s7
; GFX9-O0-NEXT: s_mov_b32 s19, s6
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_writelane_b32 v1, s16, 4
-; GFX9-O0-NEXT: v_writelane_b32 v1, s17, 5
-; GFX9-O0-NEXT: v_writelane_b32 v1, s18, 6
-; GFX9-O0-NEXT: v_writelane_b32 v1, s19, 7
+; GFX9-O0-NEXT: v_writelane_b32 v8, s16, 4
+; GFX9-O0-NEXT: v_writelane_b32 v8, s17, 5
+; GFX9-O0-NEXT: v_writelane_b32 v8, s18, 6
+; GFX9-O0-NEXT: v_writelane_b32 v8, s19, 7
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0
; GFX9-O0-NEXT: s_mov_b32 s15, s7
; GFX9-O0-NEXT: s_mov_b32 s8, s3
@@ -623,20 +593,17 @@ define amdgpu_kernel void @call_i64(ptr addrspace(8) inreg %tmp14, i64 inreg %ar
; GFX9-O0-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX9-O0-NEXT: v_mov_b32_e32 v0, s2
; GFX9-O0-NEXT: s_or_saveexec_b64 s[2:3], -1
-; GFX9-O0-NEXT: v_writelane_b32 v1, s2, 8
-; GFX9-O0-NEXT: v_writelane_b32 v1, s3, 9
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[24:27], 0 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
+; GFX9-O0-NEXT: v_writelane_b32 v8, s2, 8
+; GFX9-O0-NEXT: v_writelane_b32 v8, s3, 9
; GFX9-O0-NEXT: v_mov_b32_e32 v7, s6
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[2:3]
; GFX9-O0-NEXT: ; implicit-def: $sgpr2
; GFX9-O0-NEXT: ; implicit-def: $sgpr2
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6
; GFX9-O0-NEXT: s_mov_b32 s2, 32
-; GFX9-O0-NEXT: v_lshrrev_b64 v[10:11], s2, v[8:9]
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10
+; GFX9-O0-NEXT: v_lshrrev_b64 v[11:12], s2, v[9:10]
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 60
; GFX9-O0-NEXT: s_mov_b32 s2, s0
; GFX9-O0-NEXT: s_mov_b32 s0, s1
@@ -664,33 +631,25 @@ define amdgpu_kernel void @call_i64(ptr addrspace(8) inreg %tmp14, i64 inreg %ar
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6
; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_load_dword v2, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s0, v2, 4
-; GFX9-O0-NEXT: v_readlane_b32 s1, v2, 5
-; GFX9-O0-NEXT: v_readlane_b32 s2, v2, 6
-; GFX9-O0-NEXT: v_readlane_b32 s3, v2, 7
-; GFX9-O0-NEXT: v_readlane_b32 s4, v2, 8
-; GFX9-O0-NEXT: v_readlane_b32 s5, v2, 9
+; GFX9-O0-NEXT: v_readlane_b32 s0, v8, 4
+; GFX9-O0-NEXT: v_readlane_b32 s1, v8, 5
+; GFX9-O0-NEXT: v_readlane_b32 s2, v8, 6
+; GFX9-O0-NEXT: v_readlane_b32 s3, v8, 7
+; GFX9-O0-NEXT: v_readlane_b32 s4, v8, 8
+; GFX9-O0-NEXT: v_readlane_b32 s5, v8, 9
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10
; GFX9-O0-NEXT: v_add_co_u32_e64 v3, s[6:7], v3, v5
; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[6:7], v4, v6, s[6:7]
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v3
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4
; GFX9-O0-NEXT: s_mov_b32 s4, 0
-; GFX9-O0-NEXT: buffer_store_dwordx2 v[1:2], off, s[0:3], s4 offset:4
-; GFX9-O0-NEXT: ; kill: killed $vgpr0
+; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], s4 offset:4
; GFX9-O0-NEXT: s_endpgm
;
; GFX9-O3-LABEL: call_i64:
@@ -1007,15 +966,10 @@ define amdgpu_cs void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
; GFX9-O0-NEXT: s_mov_b32 s19, 0xe00000
; GFX9-O0-NEXT: s_add_u32 s16, s16, s4
; GFX9-O0-NEXT: s_addc_u32 s17, s17, 0
-; GFX9-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13]
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_writelane_b32 v0, s3, 0
+; GFX9-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT: v_writelane_b32 v5, s3, 0
; GFX9-O0-NEXT: s_mov_b32 s4, s1
-; GFX9-O0-NEXT: v_readlane_b32 s1, v0, 0
+; GFX9-O0-NEXT: v_readlane_b32 s1, v5, 0
; GFX9-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3
; GFX9-O0-NEXT: s_mov_b32 s3, s1
; GFX9-O0-NEXT: s_mov_b32 s8, s3
@@ -1028,37 +982,37 @@ define amdgpu_cs void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
; GFX9-O0-NEXT: s_mov_b32 s5, s10
; GFX9-O0-NEXT: s_mov_b32 s6, s9
; GFX9-O0-NEXT: s_mov_b32 s7, s8
-; GFX9-O0-NEXT: v_writelane_b32 v0, s2, 1
-; GFX9-O0-NEXT: v_writelane_b32 v0, s3, 2
-; GFX9-O0-NEXT: v_writelane_b32 v0, s0, 3
-; GFX9-O0-NEXT: v_writelane_b32 v0, s1, 4
+; GFX9-O0-NEXT: v_writelane_b32 v5, s2, 1
+; GFX9-O0-NEXT: v_writelane_b32 v5, s3, 2
+; GFX9-O0-NEXT: v_writelane_b32 v5, s0, 3
+; GFX9-O0-NEXT: v_writelane_b32 v5, s1, 4
; GFX9-O0-NEXT: s_mov_b32 s0, 0
; GFX9-O0-NEXT: s_nop 2
-; GFX9-O0-NEXT: buffer_load_dwordx2 v[4:5], off, s[4:7], s0
+; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], off, s[4:7], s0
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_nop 0
-; GFX9-O0-NEXT: buffer_store_dword v5, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill
; GFX9-O0-NEXT: ; implicit-def: $sgpr2_sgpr3
; GFX9-O0-NEXT: ; implicit-def: $sgpr2_sgpr3
; GFX9-O0-NEXT: s_or_saveexec_b64 s[2:3], -1
; GFX9-O0-NEXT: v_mov_b32_e32 v1, s0
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[2:3]
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[2:3]
; GFX9-O0-NEXT: v_mov_b32_e32 v2, s0
; GFX9-O0-NEXT: s_nop 1
; GFX9-O0-NEXT: v_mov_b32_dpp v2, v1 row_bcast:31 row_mask:0xc bank_mask:0xf
; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2
; GFX9-O0-NEXT: s_mov_b64 exec, s[2:3]
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
-; GFX9-O0-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v3, s0
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, s0
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
+; GFX9-O0-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v0, s0
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 s[0:1], exec
-; GFX9-O0-NEXT: v_writelane_b32 v0, s0, 5
-; GFX9-O0-NEXT: v_writelane_b32 v0, s1, 6
+; GFX9-O0-NEXT: v_writelane_b32 v5, s0, 5
+; GFX9-O0-NEXT: v_writelane_b32 v5, s1, 6
; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1
-; GFX9-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 ; 4-byte Folded Spill
+; GFX9-O0-NEXT: buffer_store_dword v5, off, s[16:19], 0 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13]
; GFX9-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
; GFX9-O0-NEXT: s_mov_b64 exec, s[0:1]
@@ -1081,26 +1035,26 @@ define amdgpu_cs void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1
; GFX9-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill
; GFX9-O0-NEXT: .LBB8_2: ; %merge
+; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: s_nop 0
+; GFX9-O0-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 ; 4-byte Folded Reload
+; GFX9-O0-NEXT: buffer_load_dword v5, off, s[16:19], 0 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13]
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s4, v0, 5
-; GFX9-O0-NEXT: v_readlane_b32 s5, v0, 6
+; GFX9-O0-NEXT: v_readlane_b32 s4, v5, 5
+; GFX9-O0-NEXT: v_readlane_b32 s5, v5, 6
; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5]
-; GFX9-O0-NEXT: v_readlane_b32 s2, v0, 1
-; GFX9-O0-NEXT: v_readlane_b32 s3, v0, 2
-; GFX9-O0-NEXT: v_readlane_b32 s0, v0, 3
-; GFX9-O0-NEXT: v_readlane_b32 s1, v0, 4
-; GFX9-O0-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: buffer_load_dword v4, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v4
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5]
+; GFX9-O0-NEXT: v_readlane_b32 s2, v5, 1
+; GFX9-O0-NEXT: v_readlane_b32 s3, v5, 2
+; GFX9-O0-NEXT: v_readlane_b32 s0, v5, 3
+; GFX9-O0-NEXT: v_readlane_b32 s1, v5, 4
+; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v3
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX9-O0-NEXT: s_mov_b32 s4, 1
-; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s4, v3
+; GFX9-O0-NEXT: v_lshlrev_b32_e64 v0, s4, v0
; GFX9-O0-NEXT: s_mov_b32 s4, 2
-; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s4
+; GFX9-O0-NEXT: v_and_b32_e64 v0, v0, s4
; GFX9-O0-NEXT: s_mov_b32 s6, s1
; GFX9-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
; GFX9-O0-NEXT: s_mov_b32 s4, s3
@@ -1110,8 +1064,7 @@ define amdgpu_cs void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) {
; GFX9-O0-NEXT: s_mov_b32 s2, s5
; GFX9-O0-NEXT: s_mov_b32 s3, s4
; GFX9-O0-NEXT: s_mov_b32 s4, 0
-; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s4 offset:4
-; GFX9-O0-NEXT: ; kill: killed $vgpr0
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s4 offset:4
; GFX9-O0-NEXT: s_endpgm
;
; GFX9-O3-LABEL: strict_wwm_cfg:
@@ -1203,38 +1156,32 @@ define hidden i32 @strict_wwm_called(i32 %a) noinline {
define amdgpu_kernel void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) {
; GFX9-O0-LABEL: strict_wwm_call:
; GFX9-O0: ; %bb.0:
-; GFX9-O0-NEXT: s_mov_b32 s32, 0x400
+; GFX9-O0-NEXT: s_mov_b32 s32, 0
; GFX9-O0-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0
; GFX9-O0-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1
; GFX9-O0-NEXT: s_mov_b32 s26, -1
; GFX9-O0-NEXT: s_mov_b32 s27, 0xe00000
; GFX9-O0-NEXT: s_add_u32 s24, s24, s9
; GFX9-O0-NEXT: s_addc_u32 s25, s25, 0
-; GFX9-O0-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane
; GFX9-O0-NEXT: s_or_saveexec_b64 s[10:11], -1
-; GFX9-O0-NEXT: v_writelane_b32 v7, s10, 0
-; GFX9-O0-NEXT: v_writelane_b32 v7, s11, 1
+; GFX9-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT: v_writelane_b32 v3, s10, 0
+; GFX9-O0-NEXT: v_writelane_b32 v3, s11, 1
; GFX9-O0-NEXT: s_mov_b32 s14, s8
; GFX9-O0-NEXT: s_mov_b32 s13, s7
; GFX9-O0-NEXT: s_mov_b32 s12, s6
; GFX9-O0-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[2:3]
-; GFX9-O0-NEXT: v_readlane_b32 s2, v7, 0
-; GFX9-O0-NEXT: v_readlane_b32 s3, v7, 1
-; GFX9-O0-NEXT: v_writelane_b32 v7, s4, 2
-; GFX9-O0-NEXT: v_writelane_b32 v7, s5, 3
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_store_dword v7, off, s[24:27], 0 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
+; GFX9-O0-NEXT: v_readlane_b32 s2, v3, 0
+; GFX9-O0-NEXT: v_readlane_b32 s3, v3, 1
+; GFX9-O0-NEXT: v_writelane_b32 v3, s4, 2
+; GFX9-O0-NEXT: v_writelane_b32 v3, s5, 3
; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[0:1]
-; GFX9-O0-NEXT: v_readlane_b32 s0, v7, 2
-; GFX9-O0-NEXT: v_readlane_b32 s1, v7, 3
-; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2
-; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0
+; GFX9-O0-NEXT: v_readlane_b32 s0, v3, 2
+; GFX9-O0-NEXT: v_readlane_b32 s1, v3, 3
+; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v0
; GFX9-O0-NEXT: s_mov_b64 exec, s[2:3]
; GFX9-O0-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
; GFX9-O0-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x2c
@@ -1248,23 +1195,19 @@ define amdgpu_kernel void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 in
; GFX9-O0-NEXT: s_mov_b32 s17, s7
; GFX9-O0-NEXT: s_mov_b32 s18, s6
; GFX9-O0-NEXT: s_mov_b32 s19, s3
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_writelane_b32 v1, s16, 4
-; GFX9-O0-NEXT: v_writelane_b32 v1, s17, 5
-; GFX9-O0-NEXT: v_writelane_b32 v1, s18, 6
-; GFX9-O0-NEXT: v_writelane_b32 v1, s19, 7
+; GFX9-O0-NEXT: v_writelane_b32 v3, s16, 4
+; GFX9-O0-NEXT: v_writelane_b32 v3, s17, 5
+; GFX9-O0-NEXT: v_writelane_b32 v3, s18, 6
+; GFX9-O0-NEXT: v_writelane_b32 v3, s19, 7
; GFX9-O0-NEXT: s_mov_b32 s6, 0
-; GFX9-O0-NEXT: v_writelane_b32 v1, s6, 8
+; GFX9-O0-NEXT: v_writelane_b32 v3, s6, 8
; GFX9-O0-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX9-O0-NEXT: v_mov_b32_e32 v0, s2
; GFX9-O0-NEXT: s_or_saveexec_b64 s[2:3], -1
-; GFX9-O0-NEXT: v_writelane_b32 v1, s2, 9
-; GFX9-O0-NEXT: v_writelane_b32 v1, s3, 10
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[24:27], 0 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, s6
-; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v0, s[2:3]
+; GFX9-O0-NEXT: v_writelane_b32 v3, s2, 9
+; GFX9-O0-NEXT: v_writelane_b32 v3, s3, 10
+; GFX9-O0-NEXT: v_mov_b32_e32 v7, s6
+; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[2:3]
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 56
; GFX9-O0-NEXT: s_mov_b32 s2, s0
; GFX9-O0-NEXT: s_mov_b32 s0, s1
@@ -1280,35 +1223,28 @@ define amdgpu_kernel void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 in
; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[24:25]
; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[26:27]
; GFX9-O0-NEXT: s_mov_b32 s6, 20
-; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s6, v3
-; GFX9-O0-NEXT: s_mov_b32 s6, 10
; GFX9-O0-NEXT: v_lshlrev_b32_e64 v4, s6, v4
-; GFX9-O0-NEXT: v_or3_b32 v3, v5, v4, v3
+; GFX9-O0-NEXT: s_mov_b32 s6, 10
+; GFX9-O0-NEXT: v_lshlrev_b32_e64 v5, s6, v5
+; GFX9-O0-NEXT: v_or3_b32 v4, v6, v5, v4
; GFX9-O0-NEXT: ; implicit-def: $sgpr6_sgpr7
; GFX9-O0-NEXT: ; implicit-def: $sgpr15
-; GFX9-O0-NEXT: v_mov_b32_e32 v31, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v31, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7
; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s0, v1, 4
-; GFX9-O0-NEXT: v_readlane_b32 s1, v1, 5
-; GFX9-O0-NEXT: v_readlane_b32 s2, v1, 6
-; GFX9-O0-NEXT: v_readlane_b32 s3, v1, 7
-; GFX9-O0-NEXT: v_readlane_b32 s6, v1, 9
-; GFX9-O0-NEXT: v_readlane_b32 s7, v1, 10
-; GFX9-O0-NEXT: v_readlane_b32 s4, v1, 8
+; GFX9-O0-NEXT: v_readlane_b32 s0, v3, 4
+; GFX9-O0-NEXT: v_readlane_b32 s1, v3, 5
+; GFX9-O0-NEXT: v_readlane_b32 s2, v3, 6
+; GFX9-O0-NEXT: v_readlane_b32 s3, v3, 7
+; GFX9-O0-NEXT: v_readlane_b32 s6, v3, 9
+; GFX9-O0-NEXT: v_readlane_b32 s7, v3, 10
+; GFX9-O0-NEXT: v_readlane_b32 s4, v3, 8
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-O0-NEXT: v_add_u32_e64 v3, v3, v6
+; GFX9-O0-NEXT: v_add_u32_e64 v3, v3, v7
; GFX9-O0-NEXT: s_mov_b64 exec, s[6:7]
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s4 offset:4
-; GFX9-O0-NEXT: ; kill: killed $vgpr0
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v3
+; GFX9-O0-NEXT: s_nop 0
+; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s4 offset:4
; GFX9-O0-NEXT: s_endpgm
;
; GFX9-O3-LABEL: strict_wwm_call:
@@ -1452,37 +1388,31 @@ define i64 @strict_wwm_called_i64(i64 %a) noinline {
define amdgpu_kernel void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 inreg %arg) {
; GFX9-O0-LABEL: strict_wwm_call_i64:
; GFX9-O0: ; %bb.0:
-; GFX9-O0-NEXT: s_mov_b32 s32, 0x400
+; GFX9-O0-NEXT: s_mov_b32 s32, 0
; GFX9-O0-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0
; GFX9-O0-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1
; GFX9-O0-NEXT: s_mov_b32 s26, -1
; GFX9-O0-NEXT: s_mov_b32 s27, 0xe00000
; GFX9-O0-NEXT: s_add_u32 s24, s24, s9
; GFX9-O0-NEXT: s_addc_u32 s25, s25, 0
-; GFX9-O0-NEXT: ; implicit-def: $vgpr12 : SGPR spill to VGPR lane
; GFX9-O0-NEXT: s_or_saveexec_b64 s[10:11], -1
-; GFX9-O0-NEXT: v_writelane_b32 v12, s10, 0
-; GFX9-O0-NEXT: v_writelane_b32 v12, s11, 1
+; GFX9-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane
+; GFX9-O0-NEXT: v_writelane_b32 v8, s10, 0
+; GFX9-O0-NEXT: v_writelane_b32 v8, s11, 1
; GFX9-O0-NEXT: s_mov_b32 s14, s8
; GFX9-O0-NEXT: s_mov_b32 s13, s7
; GFX9-O0-NEXT: s_mov_b32 s12, s6
; GFX9-O0-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[2:3]
-; GFX9-O0-NEXT: v_readlane_b32 s2, v12, 0
-; GFX9-O0-NEXT: v_readlane_b32 s3, v12, 1
-; GFX9-O0-NEXT: v_writelane_b32 v12, s4, 2
-; GFX9-O0-NEXT: v_writelane_b32 v12, s5, 3
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_store_dword v12, off, s[24:27], 0 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
+; GFX9-O0-NEXT: v_readlane_b32 s2, v8, 0
+; GFX9-O0-NEXT: v_readlane_b32 s3, v8, 1
+; GFX9-O0-NEXT: v_writelane_b32 v8, s4, 2
+; GFX9-O0-NEXT: v_writelane_b32 v8, s5, 3
; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[0:1]
-; GFX9-O0-NEXT: v_readlane_b32 s0, v12, 2
-; GFX9-O0-NEXT: v_readlane_b32 s1, v12, 3
+; GFX9-O0-NEXT: v_readlane_b32 s0, v8, 2
+; GFX9-O0-NEXT: v_readlane_b32 s1, v8, 3
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0
; GFX9-O0-NEXT: s_mov_b64 exec, s[2:3]
; GFX9-O0-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x24
@@ -1497,11 +1427,10 @@ define amdgpu_kernel void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i6
; GFX9-O0-NEXT: s_mov_b32 s17, s8
; GFX9-O0-NEXT: s_mov_b32 s18, s7
; GFX9-O0-NEXT: s_mov_b32 s19, s6
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_writelane_b32 v1, s16, 4
-; GFX9-O0-NEXT: v_writelane_b32 v1, s17, 5
-; GFX9-O0-NEXT: v_writelane_b32 v1, s18, 6
-; GFX9-O0-NEXT: v_writelane_b32 v1, s19, 7
+; GFX9-O0-NEXT: v_writelane_b32 v8, s16, 4
+; GFX9-O0-NEXT: v_writelane_b32 v8, s17, 5
+; GFX9-O0-NEXT: v_writelane_b32 v8, s18, 6
+; GFX9-O0-NEXT: v_writelane_b32 v8, s19, 7
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0
; GFX9-O0-NEXT: s_mov_b32 s15, s7
; GFX9-O0-NEXT: s_mov_b32 s8, s3
@@ -1516,20 +1445,17 @@ define amdgpu_kernel void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i6
; GFX9-O0-NEXT: ; implicit-def: $sgpr8_sgpr9
; GFX9-O0-NEXT: v_mov_b32_e32 v0, s2
; GFX9-O0-NEXT: s_or_saveexec_b64 s[2:3], -1
-; GFX9-O0-NEXT: v_writelane_b32 v1, s2, 8
-; GFX9-O0-NEXT: v_writelane_b32 v1, s3, 9
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_store_dword v1, off, s[24:27], 0 ; 4-byte Folded Spill
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
+; GFX9-O0-NEXT: v_writelane_b32 v8, s2, 8
+; GFX9-O0-NEXT: v_writelane_b32 v8, s3, 9
; GFX9-O0-NEXT: v_mov_b32_e32 v7, s6
; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[2:3]
; GFX9-O0-NEXT: ; implicit-def: $sgpr2
; GFX9-O0-NEXT: ; implicit-def: $sgpr2
-; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7
-; GFX9-O0-NEXT: v_mov_b32_e32 v9, v6
+; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
+; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6
; GFX9-O0-NEXT: s_mov_b32 s2, 32
-; GFX9-O0-NEXT: v_lshrrev_b64 v[10:11], s2, v[8:9]
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10
+; GFX9-O0-NEXT: v_lshrrev_b64 v[11:12], s2, v[9:10]
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 60
; GFX9-O0-NEXT: s_mov_b32 s2, s0
; GFX9-O0-NEXT: s_mov_b32 s0, s1
@@ -1557,33 +1483,25 @@ define amdgpu_kernel void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i6
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6
; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17]
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_load_dword v2, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
-; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
-; GFX9-O0-NEXT: v_readlane_b32 s0, v2, 4
-; GFX9-O0-NEXT: v_readlane_b32 s1, v2, 5
-; GFX9-O0-NEXT: v_readlane_b32 s2, v2, 6
-; GFX9-O0-NEXT: v_readlane_b32 s3, v2, 7
-; GFX9-O0-NEXT: v_readlane_b32 s4, v2, 8
-; GFX9-O0-NEXT: v_readlane_b32 s5, v2, 9
+; GFX9-O0-NEXT: v_readlane_b32 s0, v8, 4
+; GFX9-O0-NEXT: v_readlane_b32 s1, v8, 5
+; GFX9-O0-NEXT: v_readlane_b32 s2, v8, 6
+; GFX9-O0-NEXT: v_readlane_b32 s3, v8, 7
+; GFX9-O0-NEXT: v_readlane_b32 s4, v8, 8
+; GFX9-O0-NEXT: v_readlane_b32 s5, v8, 9
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0
-; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1
-; GFX9-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 ; 4-byte Folded Reload
-; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21]
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
; GFX9-O0-NEXT: ; implicit-def: $sgpr6
-; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
-; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9
+; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
+; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10
; GFX9-O0-NEXT: v_add_co_u32_e64 v3, s[6:7], v3, v5
; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[6:7], v4, v6, s[6:7]
; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5]
-; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
-; GFX9-O0-NEXT: v_mov_b32_e32 v2, v4
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, v3
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4
; GFX9-O0-NEXT: s_mov_b32 s4, 0
-; GFX9-O0-NEXT: buffer_store_dwordx2 v[1:2], off, s[0:3], s4 offset:4
-; GFX9-O0-NEXT: ; kill: killed $vgpr0
+; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], s4 offset:4
; GFX9-O0-NEXT: s_endpgm
;
; GFX9-O3-LABEL: strict_wwm_call_i64:
More information about the llvm-commits
mailing list