[llvm] fd1cfc9 - [AMDGPU][GlobalISel] Fix waterfall loops
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 28 01:38:50 PDT 2021
Author: Sebastian Neubauer
Date: 2021-10-28T10:30:55+02:00
New Revision: fd1cfc90943d51c76082514f8e3c5a48a086f986
URL: https://github.com/llvm/llvm-project/commit/fd1cfc90943d51c76082514f8e3c5a48a086f986
DIFF: https://github.com/llvm/llvm-project/commit/fd1cfc90943d51c76082514f8e3c5a48a086f986.diff
LOG: [AMDGPU][GlobalISel] Fix waterfall loops
- Move the `s_and exec` to its correct position before the content of
the waterfall loop
- Use the SI_WATERFALL pseudo instruction, like for sdag, to benefit
from optimizations
- Add support for indirect function calls
To support indirect calls, add a G_SI_CALL instruction without register
class restrictions and insert a waterfall loop when applying register
banks.
Differential Revision: https://reviews.llvm.org/D109052
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir
llvm/test/CodeGen/AMDGPU/indirect-call.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 9765751260ef9..118a32580f669 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -932,7 +932,9 @@ getAssignFnsForCC(CallingConv::ID CC, const SITargetLowering &TLI) {
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
bool IsTailCall) {
- return IsTailCall ? AMDGPU::SI_TCRETURN : AMDGPU::SI_CALL;
+ assert(!(IsIndirect && IsTailCall) && "Indirect calls can't be tail calls, "
+ "because the address can be divergent");
+ return IsTailCall ? AMDGPU::SI_TCRETURN : AMDGPU::G_SI_CALL;
}
// Add operands to call instruction to track the callee.
@@ -1062,6 +1064,11 @@ bool AMDGPUCallLowering::isEligibleForTailCallOptimization(
if (!Info.IsTailCall)
return false;
+ // Indirect calls can't be tail calls, because the address can be divergent.
+ // TODO Check divergence info if the call really is divergent.
+ if (Info.Callee.isReg())
+ return false;
+
MachineFunction &MF = B.getMF();
const Function &CallerF = MF.getFunction();
CallingConv::ID CalleeCC = Info.CallConv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 50e515d6c4fe1..28cb2fc57ac78 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3249,6 +3249,9 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case AMDGPU::G_SBFX:
case AMDGPU::G_UBFX:
return selectG_SBFX_UBFX(I);
+ case AMDGPU::G_SI_CALL:
+ I.setDesc(TII.get(AMDGPU::SI_CALL));
+ return true;
default:
return selectImpl(I, *CoverageInfo);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 430a97e6339e6..a697ce2f0dcbb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -734,23 +734,6 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
const int OrigRangeSize = std::distance(Range.begin(), Range.end());
#endif
- for (MachineInstr &MI : Range) {
- for (MachineOperand &Def : MI.defs()) {
- if (MRI.use_nodbg_empty(Def.getReg()))
- continue;
-
- LLT ResTy = MRI.getType(Def.getReg());
- const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI);
- ResultRegs.push_back(Def.getReg());
- Register InitReg = B.buildUndef(ResTy).getReg(0);
- Register PhiReg = MRI.createGenericVirtualRegister(ResTy);
- InitResultRegs.push_back(InitReg);
- PhiRegs.push_back(PhiReg);
- MRI.setRegBank(PhiReg, *DefBank);
- MRI.setRegBank(InitReg, *DefBank);
- }
- }
-
Register SaveExecReg = MRI.createVirtualRegister(WaveRC);
Register InitSaveExecReg = MRI.createVirtualRegister(WaveRC);
@@ -894,23 +877,26 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
bool Is64 = OpSize % 64 == 0;
- LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32);
- unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64
- : AMDGPU::V_CMP_EQ_U32_e64;
-
- // The compares can be done as 64-bit, but the extract needs to be done
- // in 32-bit pieces.
+ unsigned UnmergeTySize = Is64 ? 64 : 32;
+ unsigned CmpOp =
+ Is64 ? AMDGPU::V_CMP_EQ_U64_e64 : AMDGPU::V_CMP_EQ_U32_e64;
// Insert the unmerge before the loop.
B.setMBB(MBB);
- auto Unmerge = B.buildUnmerge(UnmergeTy, OpReg);
+ unsigned NumPieces = OpSize / UnmergeTySize;
+ SmallVector<Register, 8> UnmergePieces;
+ if (NumPieces == 1) {
+ UnmergePieces.push_back(OpReg);
+ } else {
+ LLT UnmergeTy = LLT::scalar(UnmergeTySize);
+ MachineInstrBuilder Unmerge = B.buildUnmerge(UnmergeTy, OpReg);
+ for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx)
+ UnmergePieces.push_back(Unmerge.getReg(PieceIdx));
+ }
B.setInstr(*I);
- unsigned NumPieces = Unmerge->getNumOperands() - 1;
- for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) {
- Register UnmergePiece = Unmerge.getReg(PieceIdx);
-
+ for (Register UnmergePiece : UnmergePieces) {
Register CurrentLaneOpReg;
if (Is64) {
Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32);
@@ -985,12 +971,14 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
if (OpTy.isVector()) {
auto Merge = B.buildBuildVector(OpTy, ReadlanePieces);
Op.setReg(Merge.getReg(0));
- } else {
+ MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank);
+ } else if (ReadlanePieces.size() > 1) {
auto Merge = B.buildMerge(OpTy, ReadlanePieces);
Op.setReg(Merge.getReg(0));
+ MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank);
+ } else {
+ Op.setReg(ReadlanePieces[0]);
}
-
- MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank);
}
// Make sure we don't re-process this register again.
@@ -998,8 +986,6 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
}
}
- B.setInsertPt(*LoopBB, LoopBB->end());
-
// Update EXEC, save the original EXEC value to VCC.
B.buildInstr(AndSaveExecOpc)
.addDef(NewExec)
@@ -1007,6 +993,8 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
MRI.setSimpleHint(NewExec, CondReg);
+ B.setInsertPt(*LoopBB, LoopBB->end());
+
// Update EXEC, switch all done bits to 0 and all todo bits to 1.
B.buildInstr(XorTermOpc)
.addDef(ExecReg)
@@ -1017,8 +1005,7 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
// s_cbranch_scc0?
// Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
- B.buildInstr(AMDGPU::S_CBRANCH_EXECNZ)
- .addMBB(LoopBB);
+ B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
// Save the EXEC mask before the loop.
BuildMI(MBB, MBB.end(), DL, TII->get(MovTermOpc), SaveExecReg)
@@ -1792,7 +1779,7 @@ static unsigned extractSWZ(unsigned CachePolicy) {
MachineInstr *
AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B,
MachineInstr &MI) const {
- MachineRegisterInfo &MRI = *B.getMRI();
+ MachineRegisterInfo &MRI = *B.getMRI();
executeInWaterfallLoop(B, MI, MRI, {2, 4});
// FIXME: DAG lowering brokenly changes opcode based on FP vs. integer.
@@ -3136,6 +3123,101 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
}
break;
}
+ case AMDGPU::G_SI_CALL: {
+ // Use a set to avoid extra readfirstlanes in the case where multiple
+ // operands are the same register.
+ SmallSet<Register, 4> SGPROperandRegs;
+
+ if (!collectWaterfallOperands(SGPROperandRegs, MI, MRI, {1}))
+ break;
+
+ // Move all copies to physical SGPRs that are used by the call instruction
+ // into the loop block. Start searching for these copies until the
+ // ADJCALLSTACKUP.
+ unsigned FrameSetupOpcode = AMDGPU::ADJCALLSTACKUP;
+ unsigned FrameDestroyOpcode = AMDGPU::ADJCALLSTACKDOWN;
+
+ // Move all non-copies before the copies, so that a complete range can be
+ // moved into the waterfall loop.
+ SmallVector<MachineInstr *, 4> NonCopyInstrs;
+ // Count of NonCopyInstrs found until the current LastCopy.
+ unsigned NonCopyInstrsLen = 0;
+ MachineBasicBlock::iterator Start(&MI);
+ MachineBasicBlock::iterator LastCopy = Start;
+ MachineBasicBlock *MBB = MI.getParent();
+ const SIMachineFunctionInfo *Info =
+ MBB->getParent()->getInfo<SIMachineFunctionInfo>();
+ while (Start->getOpcode() != FrameSetupOpcode) {
+ --Start;
+ bool IsCopy = false;
+ if (Start->getOpcode() == AMDGPU::COPY) {
+ auto &Dst = Start->getOperand(0);
+ if (Dst.isReg()) {
+ Register Reg = Dst.getReg();
+ if (Reg.isPhysical() && MI.readsRegister(Reg, TRI)) {
+ IsCopy = true;
+ } else {
+ // Also move the copy from the scratch rsrc descriptor into the loop
+ // to allow it to be optimized away.
+ auto &Src = Start->getOperand(1);
+ if (Src.isReg()) {
+ Reg = Src.getReg();
+ IsCopy = Info->getScratchRSrcReg() == Reg;
+ }
+ }
+ }
+ }
+
+ if (IsCopy) {
+ LastCopy = Start;
+ NonCopyInstrsLen = NonCopyInstrs.size();
+ } else {
+ NonCopyInstrs.push_back(&*Start);
+ }
+ }
+ NonCopyInstrs.resize(NonCopyInstrsLen);
+
+ for (auto *NonCopy : reverse(NonCopyInstrs)) {
+ MBB->splice(LastCopy, MBB, NonCopy->getIterator());
+ }
+ Start = LastCopy;
+
+ // Do the same for copies after the loop
+ NonCopyInstrs.clear();
+ NonCopyInstrsLen = 0;
+ MachineBasicBlock::iterator End(&MI);
+ LastCopy = End;
+ while (End->getOpcode() != FrameDestroyOpcode) {
+ ++End;
+ bool IsCopy = false;
+ if (End->getOpcode() == AMDGPU::COPY) {
+ auto &Src = End->getOperand(1);
+ if (Src.isReg()) {
+ Register Reg = Src.getReg();
+ IsCopy = Reg.isPhysical() && MI.modifiesRegister(Reg, TRI);
+ }
+ }
+
+ if (IsCopy) {
+ LastCopy = End;
+ NonCopyInstrsLen = NonCopyInstrs.size();
+ } else {
+ NonCopyInstrs.push_back(&*End);
+ }
+ }
+ NonCopyInstrs.resize(NonCopyInstrsLen);
+
+ End = LastCopy;
+ ++LastCopy;
+ for (auto *NonCopy : reverse(NonCopyInstrs)) {
+ MBB->splice(LastCopy, MBB, NonCopy->getIterator());
+ }
+
+ ++End;
+ MachineIRBuilder B(*Start);
+ executeInWaterfallLoop(B, make_range(Start, End), SGPROperandRegs, MRI);
+ break;
+ }
case AMDGPU::G_LOAD:
case AMDGPU::G_ZEXTLOAD:
case AMDGPU::G_SEXTLOAD: {
@@ -4506,6 +4588,23 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
+ case AMDGPU::G_SI_CALL: {
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
+ // Lie and claim everything is legal, even though some need to be
+ // SGPRs. applyMapping will have to deal with it as a waterfall loop.
+ OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+
+ // Allow anything for implicit arguments
+ for (unsigned I = 4; I < MI.getNumOperands(); ++I) {
+ if (MI.getOperand(I).isReg()) {
+ Register Reg = MI.getOperand(I).getReg();
+ auto OpBank = getRegBankID(Reg, MRI);
+ unsigned Size = getSizeInBits(Reg, MRI, *TRI);
+ OpdsMapping[I] = AMDGPU::getValueMapping(OpBank, Size);
+ }
+ }
+ break;
+ }
case AMDGPU::G_LOAD:
case AMDGPU::G_ZEXTLOAD:
case AMDGPU::G_SEXTLOAD:
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 4c484b65c36bb..750d1981e7638 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2895,3 +2895,16 @@ def G_AMDGPU_INTRIN_BVH_INTERSECT_RAY : AMDGPUGenericInstruction {
let mayLoad = 1;
let mayStore = 0;
}
+
+// Generic instruction for SI_CALL, so we can select the register bank and insert a waterfall loop
+// if necessary.
+def G_SI_CALL : AMDGPUGenericInstruction {
+ let OutOperandList = (outs SReg_64:$dst);
+ let InOperandList = (ins type0:$src0, unknown:$callee);
+ let Size = 4;
+ let isCall = 1;
+ let UseNamedOperandTable = 1;
+ let SchedRW = [WriteBranch];
+ // TODO: Should really base this on the call target
+ let isConvergent = 1;
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
index b5e53eff2dbf1..f6c50b5379055 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll
@@ -9,36 +9,37 @@ declare hidden void @extern()
define amdgpu_kernel void @kernel_call_no_workitem_ids() {
; CHECK-LABEL: name: kernel_call_no_workitem_ids
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
- ; CHECK: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern
- ; CHECK: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]]
- ; CHECK: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; CHECK: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]]
- ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]]
- ; CHECK: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>)
- ; CHECK: $sgpr4_sgpr5 = COPY [[COPY6]](p4)
- ; CHECK: $sgpr6_sgpr7 = COPY [[COPY7]](p4)
- ; CHECK: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK: $sgpr10_sgpr11 = COPY [[COPY8]](s64)
- ; CHECK: $sgpr12 = COPY [[COPY9]](s32)
- ; CHECK: $sgpr13 = COPY [[COPY10]](s32)
- ; CHECK: $sgpr14 = COPY [[COPY11]](s32)
- ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14
- ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; CHECK: S_ENDPGM 0
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY7]](p4)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY8]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY9]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY10]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY11]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ ; CHECK-NEXT: S_ENDPGM 0
call void @extern() "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
ret void
}
@@ -46,40 +47,41 @@ define amdgpu_kernel void @kernel_call_no_workitem_ids() {
define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
; CHECK-LABEL: name: kernel_call_no_workgroup_ids
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
- ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
- ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern
- ; CHECK: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]]
- ; CHECK: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; CHECK: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]]
- ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C2]](s32)
- ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL]]
- ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C3]](s32)
- ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>)
- ; CHECK: $sgpr4_sgpr5 = COPY [[COPY6]](p4)
- ; CHECK: $sgpr6_sgpr7 = COPY [[COPY7]](p4)
- ; CHECK: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK: $sgpr10_sgpr11 = COPY [[COPY8]](s64)
- ; CHECK: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31
- ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; CHECK: S_ENDPGM 0
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C2]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C3]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY7]](p4)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY8]](s64)
+ ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ ; CHECK-NEXT: S_ENDPGM 0
call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
ret void
}
@@ -87,31 +89,32 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() {
define amdgpu_kernel void @kernel_call_no_other_sgprs() {
; CHECK-LABEL: name: kernel_call_no_other_sgprs
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
- ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
- ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
- ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern
- ; CHECK: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
- ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
- ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
- ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
- ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C2]](s32)
- ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL]]
- ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C3]](s32)
- ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
- ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>)
- ; CHECK: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
- ; CHECK: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31
- ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; CHECK: S_ENDPGM 0
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C2]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL]]
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C3]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
+ ; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ ; CHECK-NEXT: S_ENDPGM 0
call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
ret void
}
@@ -119,37 +122,38 @@ define amdgpu_kernel void @kernel_call_no_other_sgprs() {
define void @func_call_no_workitem_ids() {
; CHECK-LABEL: name: func_call_no_workitem_ids
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13
- ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr12
- ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern
- ; CHECK: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
- ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
- ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY4]]
- ; CHECK: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY3]]
- ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY2]]
- ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY1]]
- ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY]]
- ; CHECK: [[COPY15:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY15]](<4 x s32>)
- ; CHECK: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
- ; CHECK: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
- ; CHECK: $sgpr8_sgpr9 = COPY [[COPY10]](p4)
- ; CHECK: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
- ; CHECK: $sgpr12 = COPY [[COPY12]](s32)
- ; CHECK: $sgpr13 = COPY [[COPY13]](s32)
- ; CHECK: $sgpr14 = COPY [[COPY14]](s32)
- ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14
- ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; CHECK: [[COPY16:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY7]]
- ; CHECK: S_SETPC_B64_return [[COPY16]]
+ ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY15]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
+ ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32)
+ ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32)
+ ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY7]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY16]]
call void @extern() "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z"
ret void
}
@@ -157,31 +161,32 @@ define void @func_call_no_workitem_ids() {
define void @func_call_no_workgroup_ids() {
; CHECK-LABEL: name: func_call_no_workgroup_ids
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
- ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
- ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
- ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern
- ; CHECK: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY4]]
- ; CHECK: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY3]]
- ; CHECK: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY2]]
- ; CHECK: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY1]]
- ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK: [[COPY11:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY11]](<4 x s32>)
- ; CHECK: $sgpr4_sgpr5 = COPY [[COPY6]](p4)
- ; CHECK: $sgpr6_sgpr7 = COPY [[COPY7]](p4)
- ; CHECK: $sgpr8_sgpr9 = COPY [[COPY8]](p4)
- ; CHECK: $sgpr10_sgpr11 = COPY [[COPY9]](s64)
- ; CHECK: $vgpr31 = COPY [[COPY10]](s32)
- ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31
- ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; CHECK: [[COPY12:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]]
- ; CHECK: S_SETPC_B64_return [[COPY12]]
+ ; CHECK-NEXT: liveins: $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY2]]
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY11]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4)
+ ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY7]](p4)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY8]](p4)
+ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64)
+ ; CHECK-NEXT: $vgpr31 = COPY [[COPY10]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY12]]
call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
ret void
}
@@ -189,22 +194,23 @@ define void @func_call_no_workgroup_ids() {
define void @func_call_no_other_sgprs() {
; CHECK-LABEL: name: func_call_no_other_sgprs
; CHECK: bb.1 (%ir-block.0):
- ; CHECK: liveins: $vgpr31, $sgpr8_sgpr9, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
- ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern
- ; CHECK: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY1]]
- ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; CHECK: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>)
- ; CHECK: $sgpr8_sgpr9 = COPY [[COPY3]](p4)
- ; CHECK: $vgpr31 = COPY [[COPY4]](s32)
- ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31
- ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
- ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
- ; CHECK: S_SETPC_B64_return [[COPY6]]
+ ; CHECK-NEXT: liveins: $vgpr31, $sgpr8_sgpr9, $sgpr30_sgpr31
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY1]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>)
+ ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY3]](p4)
+ ; CHECK-NEXT: $vgpr31 = COPY [[COPY4]](s32)
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]]
call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z"
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
index 6ce22ac206a0c..be5396ecdcd19 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
@@ -26,7 +26,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX900-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX900-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+ ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -56,7 +56,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
; GFX908-LABEL: name: test_call_external_void_func_i32
@@ -76,7 +76,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX908-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX908-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+ ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -106,7 +106,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 {
; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -129,7 +129,7 @@ define void @test_func_call_external_void_func_i32() #0 {
; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX900-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+ ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
@@ -149,7 +149,7 @@ define void @test_func_call_external_void_func_i32() #0 {
; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32)
; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[COPY16]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
; GFX900-NEXT: S_SETPC_B64_return [[COPY18]]
@@ -168,7 +168,7 @@ define void @test_func_call_external_void_func_i32() #0 {
; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX908-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+ ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
@@ -188,7 +188,7 @@ define void @test_func_call_external_void_func_i32() #0 {
; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32)
; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[COPY16]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
; GFX908-NEXT: S_SETPC_B64_return [[COPY18]]
@@ -217,7 +217,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
; GFX900-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX900-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
+ ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -282,7 +282,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
; GFX908-LABEL: name: test_call_external_void_func_v32i32
@@ -303,7 +303,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX908-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
; GFX908-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX908-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
+ ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -368,7 +368,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_v32i32(<32 x i32> zeroinitializer)
@@ -443,7 +443,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX900-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX900-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
+ ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
; GFX900-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX900-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY5]]
@@ -498,7 +498,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-NEXT: $sgpr13 = COPY [[COPY31]](s32)
; GFX900-NEXT: $sgpr14 = COPY [[COPY32]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[COPY33]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; GFX900-NEXT: [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY25]]
; GFX900-NEXT: S_SETPC_B64_return [[COPY36]]
@@ -569,7 +569,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GFX908-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX908-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
+ ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
; GFX908-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; GFX908-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY5]]
@@ -624,7 +624,7 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX908-NEXT: $sgpr13 = COPY [[COPY31]](s32)
; GFX908-NEXT: $sgpr14 = COPY [[COPY32]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[COPY33]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; GFX908-NEXT: [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY25]]
; GFX908-NEXT: S_SETPC_B64_return [[COPY36]]
@@ -651,7 +651,7 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX900-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+ ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -681,7 +681,7 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
; GFX908-LABEL: name: test_only_workitem_id_x
@@ -700,7 +700,7 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX908-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+ ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -730,7 +730,7 @@ define amdgpu_kernel void @test_only_workitem_id_x() #0 !reqd_work_group_size !0
; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -754,7 +754,7 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX900-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+ ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -784,7 +784,7 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
; GFX908-LABEL: name: test_only_workitem_id_y
@@ -803,7 +803,7 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX908-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+ ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -833,7 +833,7 @@ define amdgpu_kernel void @test_only_workitem_id_y() #0 !reqd_work_group_size !1
; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -857,7 +857,7 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX900-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+ ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -887,7 +887,7 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
; GFX908-LABEL: name: test_only_workitem_id_z
@@ -906,7 +906,7 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX908-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+ ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -936,7 +936,7 @@ define amdgpu_kernel void @test_only_workitem_id_z() #0 !reqd_work_group_size !2
; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -960,7 +960,7 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX900-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+ ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -990,7 +990,7 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
; GFX908-LABEL: name: test_only_workitem_id_xy
@@ -1009,7 +1009,7 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX908-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+ ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1039,7 +1039,7 @@ define amdgpu_kernel void @test_only_workitem_id_xy() #0 !reqd_work_group_size !
; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -1063,7 +1063,7 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX900-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+ ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1093,7 +1093,7 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
; GFX908-LABEL: name: test_only_workitem_id_yz
@@ -1112,7 +1112,7 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX908-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+ ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1142,7 +1142,7 @@ define amdgpu_kernel void @test_only_workitem_id_yz() #0 !reqd_work_group_size !
; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -1166,7 +1166,7 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !
; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX900-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+ ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1196,7 +1196,7 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !
; GFX900-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX900-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX900-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX900-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX900-NEXT: S_ENDPGM 0
; GFX908-LABEL: name: test_only_workitem_id_xz
@@ -1215,7 +1215,7 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !
; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GFX908-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+ ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1245,7 +1245,7 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !
; GFX908-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GFX908-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GFX908-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GFX908-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GFX908-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
index 15f8b6faf3d36..e097fcb90ffbd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
@@ -15,10 +15,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY2]]
@@ -35,11 +35,11 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32
; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY3]]
@@ -56,11 +56,11 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg
; CHECK-NEXT: $sgpr4 = COPY [[C]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY3]]
@@ -81,14 +81,14 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY2]]
@@ -111,14 +111,14 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32)
; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY2]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 375c78ad0b439..c70a348a0cb8e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -86,7 +86,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)*
; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.out.kernarg.offset.cast, align 16, addrspace 4)
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i32_func_i32
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_func_i32
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -116,7 +116,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)*
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i32_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY21]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1)
@@ -137,11 +137,11 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(i32 addrspace(1)
; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_i32
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_i32
; GCN-NEXT: $vgpr0 = COPY [[C]](s32)
; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
@@ -168,7 +168,7 @@ define amdgpu_kernel void @test_call_external_i1_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i1_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -197,7 +197,7 @@ define amdgpu_kernel void @test_call_external_i1_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY19]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -216,10 +216,10 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 {
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -247,7 +247,7 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i1_zeroext_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_zeroext_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -276,7 +276,7 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i1_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY19]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -305,7 +305,7 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i1_signext_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i1_signext_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -334,7 +334,7 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i1_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i1_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY19]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -363,7 +363,7 @@ define amdgpu_kernel void @test_call_external_i8_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i8_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -392,7 +392,7 @@ define amdgpu_kernel void @test_call_external_i8_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i8_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
@@ -412,10 +412,10 @@ define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 {
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i8_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i8_func_void
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i8_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i8_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
@@ -444,7 +444,7 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i8_zeroext_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_zeroext_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -473,7 +473,7 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i8_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
@@ -503,7 +503,7 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i8_signext_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i8_signext_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -532,7 +532,7 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i8_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i8_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
@@ -562,7 +562,7 @@ define amdgpu_kernel void @test_call_external_i16_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i16_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -591,7 +591,7 @@ define amdgpu_kernel void @test_call_external_i16_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -618,7 +618,7 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i16_zeroext_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_zeroext_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -647,7 +647,7 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i16_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -676,7 +676,7 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i16_signext_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i16_signext_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -705,7 +705,7 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i16_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i16_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -734,7 +734,7 @@ define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i32_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -763,7 +763,7 @@ define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
@@ -781,10 +781,10 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 {
; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_void
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY2]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
@@ -811,7 +811,7 @@ define amdgpu_kernel void @test_call_external_i48_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i48_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -840,7 +840,7 @@ define amdgpu_kernel void @test_call_external_i48_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i48_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
@@ -869,7 +869,7 @@ define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i48_zeroext_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_zeroext_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -898,7 +898,7 @@ define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i48_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_zeroext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
@@ -929,7 +929,7 @@ define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i48_signext_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i48_signext_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -958,7 +958,7 @@ define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i48_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i48_signext_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
@@ -989,7 +989,7 @@ define amdgpu_kernel void @test_call_external_i64_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i64_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i64_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -1018,7 +1018,7 @@ define amdgpu_kernel void @test_call_external_i64_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
@@ -1046,7 +1046,7 @@ define amdgpu_kernel void @test_call_external_p1_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_p1_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_p1_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -1075,7 +1075,7 @@ define amdgpu_kernel void @test_call_external_p1_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_p1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
@@ -1103,7 +1103,7 @@ define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v2p1_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2p1_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -1132,7 +1132,7 @@ define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2p1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2p1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1164,7 +1164,7 @@ define amdgpu_kernel void @test_call_external_p3_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_p3_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_p3_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -1193,7 +1193,7 @@ define amdgpu_kernel void @test_call_external_p3_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(p3) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY19]](p3), [[DEF]](p3) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(3)* undef`, addrspace 3)
@@ -1219,7 +1219,7 @@ define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v2p3_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2p3_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -1248,7 +1248,7 @@ define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2p3_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2p3_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(p3) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(p3) = COPY $vgpr1
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY19]](p3), [[COPY20]](p3)
@@ -1276,7 +1276,7 @@ define amdgpu_kernel void @test_call_external_f16_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_f16_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f16_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -1305,7 +1305,7 @@ define amdgpu_kernel void @test_call_external_f16_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -1332,7 +1332,7 @@ define amdgpu_kernel void @test_call_external_f32_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_f32_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f32_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -1361,7 +1361,7 @@ define amdgpu_kernel void @test_call_external_f32_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY19]](s32), [[DEF]](p1) :: (volatile store (s32) into `float addrspace(1)* undef`, addrspace 1)
@@ -1387,7 +1387,7 @@ define amdgpu_kernel void @test_call_external_f64_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_f64_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_f64_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -1416,7 +1416,7 @@ define amdgpu_kernel void @test_call_external_f64_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_f64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY19]](s32), [[COPY20]](s32)
@@ -1444,7 +1444,7 @@ define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v2f64_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2f64_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -1473,7 +1473,7 @@ define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2f64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1505,7 +1505,7 @@ define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v2i32_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2i32_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -1534,7 +1534,7 @@ define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY19]](s32), [[COPY20]](s32)
@@ -1562,7 +1562,7 @@ define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v3i32_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3i32_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -1591,7 +1591,7 @@ define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v3i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1620,7 +1620,7 @@ define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v4i32_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4i32_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -1649,7 +1649,7 @@ define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v4i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1679,7 +1679,7 @@ define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v5i32_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v5i32_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -1708,7 +1708,7 @@ define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v5i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v5i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1739,7 +1739,7 @@ define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v8i32_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v8i32_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -1768,7 +1768,7 @@ define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v8i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v8i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1802,7 +1802,7 @@ define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v16i32_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v16i32_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -1831,7 +1831,7 @@ define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v16i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v16i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1873,7 +1873,7 @@ define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v32i32_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v32i32_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -1902,7 +1902,7 @@ define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v32i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7, implicit-def $vgpr8, implicit-def $vgpr9, implicit-def $vgpr10, implicit-def $vgpr11, implicit-def $vgpr12, implicit-def $vgpr13, implicit-def $vgpr14, implicit-def $vgpr15, implicit-def $vgpr16, implicit-def $vgpr17, implicit-def $vgpr18, implicit-def $vgpr19, implicit-def $vgpr20, implicit-def $vgpr21, implicit-def $vgpr22, implicit-def $vgpr23, implicit-def $vgpr24, implicit-def $vgpr25, implicit-def $vgpr26, implicit-def $vgpr27, implicit-def $vgpr28, implicit-def $vgpr29, implicit-def $vgpr30, implicit-def $vgpr31
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -1960,7 +1960,7 @@ define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v2i16_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2i16_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -1989,7 +1989,7 @@ define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY19]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1)
@@ -2015,7 +2015,7 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v3i16_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3i16_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -2044,7 +2044,7 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
@@ -2074,7 +2074,7 @@ define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v4i16_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4i16_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -2103,7 +2103,7 @@ define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v4i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>)
@@ -2131,7 +2131,7 @@ define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v2f16_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v2f16_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -2160,7 +2160,7 @@ define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[COPY19]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1)
@@ -2186,7 +2186,7 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v3f16_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3f16_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -2215,7 +2215,7 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
@@ -2245,7 +2245,7 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v4f16_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v4f16_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -2274,7 +2274,7 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v4f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v4f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>)
@@ -2302,7 +2302,7 @@ define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v3f32_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v3f32_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -2331,7 +2331,7 @@ define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v3f32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -2360,7 +2360,7 @@ define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v5f32_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v5f32_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -2389,7 +2389,7 @@ define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v5f32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v5f32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -2422,7 +2422,7 @@ define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 {
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i32_i64_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_i64_func_void
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -2451,7 +2451,7 @@ define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i32_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -2477,10 +2477,10 @@ define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 {
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_i32_i64_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_i64_func_void
; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr2
@@ -2514,7 +2514,7 @@ define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_a2i32_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_a2i32_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -2543,7 +2543,7 @@ define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_a2i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_a2i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
@@ -2574,7 +2574,7 @@ define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 {
; GCN-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_a5i8_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_a5i8_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -2603,7 +2603,7 @@ define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_a5i8_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_a5i8_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32)
; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
@@ -2658,7 +2658,7 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v32i32_i32_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v32i32_i32_func_void
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -2688,7 +2688,7 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5)
; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
@@ -2723,7 +2723,7 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_i32_v32i32_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_v32i32_func_void
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -2753,7 +2753,7 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY14]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5)
; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
@@ -2787,7 +2787,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 {
; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v33i32_func_void
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v33i32_func_void
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
@@ -2817,7 +2817,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 {
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY14]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5)
; GCN-NEXT: G_STORE [[LOAD]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1)
@@ -2850,7 +2850,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(<33 x i32>
; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.idx.kernarg.offset.cast, align 8, addrspace 4)
; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_v33i32_func_v33i32_i32
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v33i32_func_v33i32_i32
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -2884,7 +2884,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(<33 x i32>
; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5)
; GCN-NEXT: G_STORE [[LOAD2]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
index b3b5f4f50331b..6ca94d551fbd8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
@@ -30,7 +30,7 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
; GCN-NEXT: G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.in.gep01, addrspace 5)
; GCN-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.in.gep1, addrspace 5)
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -65,7 +65,7 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32)
; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; GCN-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
; GCN-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32)
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load (s8) from %ir.out.gep02, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index 81d21c03c943b..bd068cd3b45e9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -110,7 +110,7 @@ define amdgpu_kernel void @test_call_external_void_func_void() #0 {
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -139,7 +139,7 @@ define amdgpu_kernel void @test_call_external_void_func_void() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_void()
@@ -153,10 +153,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY2]]
@@ -179,7 +179,7 @@ define void @test_func_call_external_void_func_void() #0 {
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
@@ -198,7 +198,7 @@ define void @test_func_call_external_void_func_void() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY18]]
@@ -223,7 +223,7 @@ define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 {
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_empty_struct
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_empty_struct
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -253,7 +253,7 @@ define amdgpu_kernel void @test_call_external_void_func_empty_struct() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_empty_struct, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_empty_struct, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_empty_struct({} zeroinitializer, i32 23)
@@ -277,7 +277,7 @@ define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 {
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_empty_array
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_empty_array
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -307,7 +307,7 @@ define amdgpu_kernel void @test_call_external_void_func_empty_array() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_empty_array, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_empty_array, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_empty_array([0 x i8] zeroinitializer, i32 23)
@@ -331,7 +331,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i1
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -362,7 +362,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i1, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_i1(i1 true)
@@ -388,7 +388,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile load (s1) from `i1 addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i1_signext
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_signext
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -419,7 +419,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i1_signext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_signext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i1, i1 addrspace(1)* undef
@@ -446,7 +446,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile load (s1) from `i1 addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i1_zeroext
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_zeroext
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -477,7 +477,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i1_zeroext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_zeroext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i1, i1 addrspace(1)* undef
@@ -503,7 +503,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 123
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i8
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -535,7 +535,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_i8(i8 123)
@@ -561,7 +561,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i8_signext
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_signext
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -593,7 +593,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i8_signext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_signext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i8, i8 addrspace(1)* undef
@@ -620,7 +620,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i8_zeroext
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_zeroext
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -652,7 +652,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i8_zeroext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_zeroext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i8, i8 addrspace(1)* undef
@@ -677,7 +677,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 123
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i16
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -708,7 +708,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_i16(i16 123)
@@ -734,7 +734,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile load (s16) from `i16 addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i16_signext
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_signext
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -765,7 +765,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i16_signext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_signext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i16, i16 addrspace(1)* undef
@@ -792,7 +792,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile load (s16) from `i16 addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i16_zeroext
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_zeroext
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -823,7 +823,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i16_zeroext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_zeroext, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i16, i16 addrspace(1)* undef
@@ -849,7 +849,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -879,7 +879,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_i32(i32 42)
@@ -895,11 +895,11 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 {
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32
; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY3]]
@@ -916,11 +916,11 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg
; CHECK-NEXT: $sgpr4 = COPY [[C]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY3]]
@@ -945,7 +945,7 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 123
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i64
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i64
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -977,7 +977,7 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_i64(i64 123)
@@ -1002,7 +1002,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* null`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i64
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i64
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1036,7 +1036,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <2 x i64>, <2 x i64> addrspace(1)* null
@@ -1063,7 +1063,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i64
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i64
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1097,7 +1097,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>)
@@ -1123,7 +1123,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 {
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i48
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1156,7 +1156,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i48, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i48, i48 addrspace(1)* undef
@@ -1183,7 +1183,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 {
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i48_signext
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_signext
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1216,7 +1216,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i48_signext, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_signext, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i48, i48 addrspace(1)* undef
@@ -1243,7 +1243,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 {
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_i48_zeroext
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_zeroext
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1276,7 +1276,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_i48_zeroext, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_zeroext, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%var = load volatile i48, i48 addrspace(1)* undef
@@ -1302,7 +1302,7 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(i8* %arg) #0 {
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.arg.kernarg.offset.cast, align 16, addrspace 4)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_p0
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_p0
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1334,7 +1334,7 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(i8* %arg) #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_p0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_p0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_p0(i8* %arg)
@@ -1359,7 +1359,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 {
; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p0>) = G_LOAD [[C]](p1) :: (load (<2 x p0>) from `<2 x i8*> addrspace(1)* null`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2p0
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2p0
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1393,7 +1393,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2p0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2p0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <2 x i8*>, <2 x i8*> addrspace(1)* null
@@ -1423,7 +1423,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* null`, addrspace 1)
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<3 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i64
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i64
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1459,7 +1459,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%load = load <2 x i64>, <2 x i64> addrspace(1)* null
@@ -1491,7 +1491,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* null`, addrspace 1)
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2, 3)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i64
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i64
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1529,7 +1529,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v4i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%load = load <2 x i64>, <2 x i64> addrspace(1)* null
@@ -1555,7 +1555,7 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH4400
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_f16
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f16
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1586,7 +1586,7 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_f16, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f16, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_f16(half 4.0)
@@ -1610,7 +1610,7 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_f32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1640,7 +1640,7 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_f32(float 4.0)
@@ -1666,7 +1666,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2f32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1698,7 +1698,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>)
@@ -1725,7 +1725,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 4.000000e+00
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3f32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1758,7 +1758,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>)
@@ -1787,7 +1787,7 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v5f32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5f32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1822,7 +1822,7 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v5f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
@@ -1846,7 +1846,7 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_f64
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_f64
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1878,7 +1878,7 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_f64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_f64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_f64(double 4.0)
@@ -1904,7 +1904,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 4.000000e+00
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2f64
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f64
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -1938,7 +1938,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2f64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>)
@@ -1965,7 +1965,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_FCONSTANT double 8.000000e+00
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64), [[C2]](s64)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3f64
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f64
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -2001,7 +2001,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3f64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f64, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>)
@@ -2026,7 +2026,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i16
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i16
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -2056,7 +2056,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <2 x i16>, <2 x i16> addrspace(1)* undef
@@ -2082,7 +2082,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i16
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i16
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -2116,7 +2116,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <3 x i16>, <3 x i16> addrspace(1)* undef
@@ -2142,7 +2142,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3f16
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f16
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -2176,7 +2176,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3f16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <3 x half>, <3 x half> addrspace(1)* undef
@@ -2202,7 +2202,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i16
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i16
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -2234,7 +2234,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <4 x i16>, <4 x i16> addrspace(1)* undef
@@ -2263,7 +2263,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C1]](s16), [[C2]](s16), [[C3]](s16)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i16
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i16
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -2295,7 +2295,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>)
@@ -2320,7 +2320,7 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[DEF]](p1) :: (load (<5 x s16>) from `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v5i16
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5i16
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -2355,7 +2355,7 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v5i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <5 x i16>, <5 x i16> addrspace(1)* undef
@@ -2381,7 +2381,7 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[DEF]](p1) :: (load (<7 x s16>) from `<7 x i16> addrspace(1)* undef`, align 16, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v7i16
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v7i16
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -2417,7 +2417,7 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v7i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v7i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <7 x i16>, <7 x i16> addrspace(1)* undef
@@ -2443,7 +2443,7 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<63 x s16>) = G_LOAD [[DEF]](p1) :: (load (<63 x s16>) from `<63 x i16> addrspace(1)* undef`, align 128, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v63i16
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v63i16
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -2510,7 +2510,7 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v63i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v63i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <63 x i16>, <63 x i16> addrspace(1)* undef
@@ -2536,7 +2536,7 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<65 x s16>) = G_LOAD [[DEF]](p1) :: (load (<65 x s16>) from `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v65i16
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v65i16
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -2606,7 +2606,7 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v65i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v65i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <65 x i16>, <65 x i16> addrspace(1)* undef
@@ -2632,7 +2632,7 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<66 x s16>) = G_LOAD [[DEF]](p1) :: (load (<66 x s16>) from `<66 x i16> addrspace(1)* undef`, align 256, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v66i16
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v66i16
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -2700,7 +2700,7 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v66i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v66i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <66 x i16>, <66 x i16> addrspace(1)* undef
@@ -2726,7 +2726,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2f16
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f16
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -2756,7 +2756,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2f16, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f16, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <2 x half>, <2 x half> addrspace(1)* undef
@@ -2782,7 +2782,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -2814,7 +2814,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <2 x i32>, <2 x i32> addrspace(1)* undef
@@ -2841,7 +2841,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -2873,7 +2873,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>)
@@ -2901,7 +2901,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -2934,7 +2934,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>)
@@ -2963,7 +2963,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i32_i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i32_i32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -2997,7 +2997,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6)
@@ -3022,7 +3022,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -3056,7 +3056,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = load <4 x i32>, <4 x i32> addrspace(1)* undef
@@ -3085,7 +3085,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -3119,7 +3119,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>)
@@ -3148,7 +3148,7 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v5i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5i32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -3183,7 +3183,7 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v5i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>)
@@ -3209,7 +3209,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v8i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -3247,7 +3247,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
@@ -3281,7 +3281,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32), [[C5]](s32), [[C6]](s32), [[C7]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v8i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -3319,7 +3319,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
@@ -3345,7 +3345,7 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v16i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -3391,7 +3391,7 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v16i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
@@ -3419,7 +3419,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -3484,7 +3484,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
@@ -3515,7 +3515,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr0, addrspace 1)
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -3583,7 +3583,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
@@ -3616,7 +3616,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY10]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -3692,7 +3692,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32_i8_i8_i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i8_i8_i16, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 16, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
@@ -3727,7 +3727,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p3) = G_LOAD [[DEF1]](p1) :: (load (p3) from `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[COPY10]](p1) :: (load (p5) from `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_p3_p5
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_p3_p5
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -3798,7 +3798,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v32i32_p3_p5, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_p3_p5, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
@@ -3831,7 +3831,7 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_struct_i8_i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_struct_i8_i32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -3864,7 +3864,7 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
@@ -3886,14 +3886,14 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY2]]
@@ -3916,14 +3916,14 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr0 + 4, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16)
; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32)
; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY2]]
@@ -3956,7 +3956,7 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
; CHECK-NEXT: G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.gep01, addrspace 5)
; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_byval_struct_i8_i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_byval_struct_i8_i32
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -3990,7 +3990,7 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_byval_struct_i8_i32, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_byval_struct_i8_i32, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%val = alloca { i8, i32 }, align 4, addrspace(5)
@@ -4022,7 +4022,7 @@ define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 999
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_func_byval_a3i32_byval_i8_align32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a3i32_byval_i8_align32
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]]
@@ -4051,7 +4051,7 @@ define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming
; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc
; CHECK-NEXT: [[COPY21:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY21]]
@@ -4079,7 +4079,7 @@ define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)*
; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_func_byval_a4i64_align4
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a4i64_align4
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]]
@@ -4103,7 +4103,7 @@ define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)*
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 32, implicit-def $scc
; CHECK-NEXT: [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY20]]
@@ -4130,7 +4130,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<2 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<2 x s8>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v2i8
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i8
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -4166,7 +4166,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v2i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load <2 x i8> addrspace(1)*, <2 x i8> addrspace(1)* addrspace(4)* undef
@@ -4194,7 +4194,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<3 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s8>) from %ir.ptr, align 4, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v3i8
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i8
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -4233,7 +4233,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v3i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load <3 x i8> addrspace(1)*, <3 x i8> addrspace(1)* addrspace(4)* undef
@@ -4261,7 +4261,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v4i8
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i8
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -4303,7 +4303,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v4i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef
@@ -4331,7 +4331,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<8 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s8>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v8i8
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i8
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -4385,7 +4385,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v8i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load <8 x i8> addrspace(1)*, <8 x i8> addrspace(1)* addrspace(4)* undef
@@ -4413,7 +4413,7 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_v16i8
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i8
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -4491,7 +4491,7 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_v16i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i8, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
@@ -4521,7 +4521,7 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C]](s64)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset.cast, align 16, addrspace 4)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @stack_passed_f64_arg
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @stack_passed_f64_arg
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
@@ -4593,7 +4593,7 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @stack_passed_f64_arg, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @stack_passed_f64_arg, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
entry:
@@ -4644,7 +4644,7 @@ define void @stack_12xv3i32() #0 {
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_12xv3i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3i32
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
@@ -4722,7 +4722,7 @@ define void @stack_12xv3i32() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_12xv3i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc
; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY19]]
@@ -4786,7 +4786,7 @@ define void @stack_12xv3f32() #0 {
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.500000e+01
; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_12xv3f32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3f32
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
@@ -4864,7 +4864,7 @@ define void @stack_12xv3f32() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_12xv3f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc
; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY19]]
@@ -4924,7 +4924,7 @@ define void @stack_8xv5i32() #0 {
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_8xv5i32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5i32
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
@@ -5010,7 +5010,7 @@ define void @stack_8xv5i32() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_8xv5i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc
; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY19]]
@@ -5066,7 +5066,7 @@ define void @stack_8xv5f32() #0 {
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.500000e+01
; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_8xv5f32
+ ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5f32
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
@@ -5152,7 +5152,7 @@ define void @stack_8xv5f32() #0 {
; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_8xv5f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc
; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY19]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
index f98a1003e6f8a..ee15b1a0249f9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
@@ -17,7 +17,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(void()* %fptr) {
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
- ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sreg_64(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.fptr.kernarg.offset.cast, align 16, addrspace 4)
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.fptr.kernarg.offset.cast, align 16, addrspace 4)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
@@ -47,7 +47,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(void()* %fptr) {
; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32)
; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32)
; CHECK-NEXT: $vgpr31 = COPY [[OR1]](s32)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[LOAD]](p0), 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
call void %fptr()
@@ -61,12 +61,12 @@ define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(void()* %fptr) {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>)
- ; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[MV]](p0), 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+ ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index 51a214550efe2..518f3ff80cc36 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -154,12 +154,12 @@ define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a,
; GCN-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C2]](s64)
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
; GCN-NEXT: $vgpr0 = COPY [[EVEC]](s32)
; GCN-NEXT: $vgpr1 = COPY [[EVEC1]](s32)
; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: S_ENDPGM 0
@@ -199,7 +199,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i3
; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32
; GCN-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY $sgpr32
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY4]], [[C]](s32)
@@ -208,7 +208,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i3
; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32)
; GCN-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc
; GCN-NEXT: $vgpr0 = COPY [[COPY6]](s32)
@@ -534,7 +534,7 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
; GCN-NEXT: [[COPY3:%[0-9]+]]:_(p5) = COPY $sgpr32
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C1]](s32)
@@ -578,7 +578,7 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
; GCN-NEXT: $vgpr30 = COPY [[C]](s32)
; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc
; GCN-NEXT: $vgpr0 = COPY [[COPY5]](s32)
@@ -600,12 +600,12 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i3
; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
- ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
+ ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32)
; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32)
; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
- ; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+ ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: [[GV1:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @sibling_call_i32_fastcc_i32_i32
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
index 1748f3eddc5e2..66c07d41f5991 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
@@ -147,10 +147,17 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16(i64 %node_ptr, float
define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr) {
; GFX1030-LABEL: image_bvh_intersect_ray_vgpr_descr:
; GFX1030: ; %bb.0:
-; GFX1030-NEXT: v_mov_b32_e32 v5, v0
-; GFX1030-NEXT: v_mov_b32_e32 v9, v1
-; GFX1030-NEXT: v_mov_b32_e32 v13, v2
-; GFX1030-NEXT: v_mov_b32_e32 v18, v3
+; GFX1030-NEXT: v_mov_b32_e32 v18, v0
+; GFX1030-NEXT: v_mov_b32_e32 v19, v1
+; GFX1030-NEXT: v_mov_b32_e32 v20, v2
+; GFX1030-NEXT: v_mov_b32_e32 v21, v3
+; GFX1030-NEXT: v_mov_b32_e32 v22, v4
+; GFX1030-NEXT: v_mov_b32_e32 v23, v6
+; GFX1030-NEXT: v_mov_b32_e32 v24, v7
+; GFX1030-NEXT: v_mov_b32_e32 v25, v8
+; GFX1030-NEXT: v_mov_b32_e32 v26, v10
+; GFX1030-NEXT: v_mov_b32_e32 v27, v11
+; GFX1030-NEXT: v_mov_b32_e32 v28, v12
; GFX1030-NEXT: s_mov_b32 s1, exec_lo
; GFX1030-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
; GFX1030-NEXT: v_readfirstlane_b32 s4, v14
@@ -158,10 +165,23 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
; GFX1030-NEXT: v_readfirstlane_b32 s6, v16
; GFX1030-NEXT: v_readfirstlane_b32 s7, v17
; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[14:15]
-; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], [v5, v9, v13, v18, v4, v6, v7, v8, v10, v11, v12], s[4:7]
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[16:17]
; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
+; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[18:33], s[4:7]
+; GFX1030-NEXT: ; implicit-def: $vgpr14_vgpr15
+; GFX1030-NEXT: ; implicit-def: $vgpr18
+; GFX1030-NEXT: ; implicit-def: $vgpr19
+; GFX1030-NEXT: ; implicit-def: $vgpr20
+; GFX1030-NEXT: ; implicit-def: $vgpr21
+; GFX1030-NEXT: ; implicit-def: $vgpr22
+; GFX1030-NEXT: ; implicit-def: $vgpr23
+; GFX1030-NEXT: ; implicit-def: $vgpr24
+; GFX1030-NEXT: ; implicit-def: $vgpr25
+; GFX1030-NEXT: ; implicit-def: $vgpr26
+; GFX1030-NEXT: ; implicit-def: $vgpr27
+; GFX1030-NEXT: ; implicit-def: $vgpr28
+; GFX1030-NEXT: ; implicit-def: $vgpr14_vgpr15_vgpr16_vgpr17
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX1030-NEXT: s_cbranch_execnz BB6_1
; GFX1030-NEXT: ; %bb.2:
@@ -177,26 +197,32 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
; GFX1013-NEXT: v_mov_b32_e32 v8, v10
; GFX1013-NEXT: v_mov_b32_e32 v9, v11
; GFX1013-NEXT: v_mov_b32_e32 v10, v12
+; GFX1013-NEXT: v_mov_b32_e32 v18, v14
+; GFX1013-NEXT: v_mov_b32_e32 v19, v15
; GFX1013-NEXT: s_mov_b32 s1, exec_lo
; GFX1013-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
-; GFX1013-NEXT: v_readfirstlane_b32 s4, v14
-; GFX1013-NEXT: v_readfirstlane_b32 s5, v15
+; GFX1013-NEXT: v_readfirstlane_b32 s4, v18
+; GFX1013-NEXT: v_readfirstlane_b32 s5, v19
; GFX1013-NEXT: v_readfirstlane_b32 s6, v16
; GFX1013-NEXT: v_readfirstlane_b32 s7, v17
-; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[14:15]
-; GFX1013-NEXT: image_bvh_intersect_ray v[18:21], v[0:15], s[4:7]
+; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[18:19]
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[16:17]
; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX1013-NEXT: s_and_saveexec_b32 s0, s0
+; GFX1013-NEXT: image_bvh_intersect_ray v[20:23], v[0:15], s[4:7]
+; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+; GFX1013-NEXT: ; implicit-def: $vgpr18_vgpr19
+; GFX1013-NEXT: ; implicit-def: $vgpr14_vgpr15_vgpr16_vgpr17
+; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX1013-NEXT: s_cbranch_execnz BB6_1
; GFX1013-NEXT: ; %bb.2:
; GFX1013-NEXT: s_mov_b32 exec_lo, s1
; GFX1013-NEXT: s_waitcnt vmcnt(0)
-; GFX1013-NEXT: v_mov_b32_e32 v0, v18
-; GFX1013-NEXT: v_mov_b32_e32 v1, v19
-; GFX1013-NEXT: v_mov_b32_e32 v2, v20
-; GFX1013-NEXT: v_mov_b32_e32 v3, v21
+; GFX1013-NEXT: v_mov_b32_e32 v0, v20
+; GFX1013-NEXT: v_mov_b32_e32 v1, v21
+; GFX1013-NEXT: v_mov_b32_e32 v2, v22
+; GFX1013-NEXT: v_mov_b32_e32 v3, v23
; GFX1013-NEXT: ; return to shader part epilog
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr)
%r = bitcast <4 x i32> %v to <4 x float>
@@ -207,30 +233,41 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
; GFX1030-LABEL: image_bvh_intersect_ray_a16_vgpr_descr:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_mov_b32 s0, 0xffff
-; GFX1030-NEXT: v_mov_b32_e32 v5, v0
-; GFX1030-NEXT: v_mov_b32_e32 v14, v1
+; GFX1030-NEXT: v_mov_b32_e32 v14, v0
+; GFX1030-NEXT: v_mov_b32_e32 v15, v1
; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v6
; GFX1030-NEXT: v_and_b32_e32 v1, s0, v8
-; GFX1030-NEXT: v_mov_b32_e32 v15, v2
-; GFX1030-NEXT: v_mov_b32_e32 v16, v3
+; GFX1030-NEXT: v_mov_b32_e32 v16, v2
+; GFX1030-NEXT: v_mov_b32_e32 v17, v3
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 16, v8
; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX1030-NEXT: v_and_b32_e32 v3, s0, v9
+; GFX1030-NEXT: v_mov_b32_e32 v18, v4
; GFX1030-NEXT: s_mov_b32 s1, exec_lo
-; GFX1030-NEXT: v_and_or_b32 v6, v6, s0, v0
-; GFX1030-NEXT: v_and_or_b32 v7, v7, s0, v1
-; GFX1030-NEXT: v_lshl_or_b32 v8, v3, 16, v2
+; GFX1030-NEXT: v_and_or_b32 v19, v6, s0, v0
+; GFX1030-NEXT: v_and_or_b32 v20, v7, s0, v1
+; GFX1030-NEXT: v_lshl_or_b32 v21, v3, 16, v2
; GFX1030-NEXT: BB7_1: ; =>This Inner Loop Header: Depth=1
; GFX1030-NEXT: v_readfirstlane_b32 s4, v10
; GFX1030-NEXT: v_readfirstlane_b32 s5, v11
; GFX1030-NEXT: v_readfirstlane_b32 s6, v12
; GFX1030-NEXT: v_readfirstlane_b32 s7, v13
; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11]
-; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], [v5, v14, v15, v16, v4, v6, v7, v8], s[4:7] a16
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]
; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
+; GFX1030-NEXT: image_bvh_intersect_ray v[0:3], v[14:21], s[4:7] a16
+; GFX1030-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX1030-NEXT: ; implicit-def: $vgpr14
+; GFX1030-NEXT: ; implicit-def: $vgpr15
+; GFX1030-NEXT: ; implicit-def: $vgpr16
+; GFX1030-NEXT: ; implicit-def: $vgpr17
+; GFX1030-NEXT: ; implicit-def: $vgpr18
+; GFX1030-NEXT: ; implicit-def: $vgpr19
+; GFX1030-NEXT: ; implicit-def: $vgpr20
+; GFX1030-NEXT: ; implicit-def: $vgpr21
+; GFX1030-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX1030-NEXT: s_cbranch_execnz BB7_1
; GFX1030-NEXT: ; %bb.2:
@@ -257,10 +294,14 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
; GFX1013-NEXT: v_readfirstlane_b32 s6, v12
; GFX1013-NEXT: v_readfirstlane_b32 s7, v13
; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11]
-; GFX1013-NEXT: image_bvh_intersect_ray v[14:17], v[0:7], s[4:7] a16
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]
; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX1013-NEXT: s_and_saveexec_b32 s0, s0
+; GFX1013-NEXT: image_bvh_intersect_ray v[14:17], v[0:7], s[4:7] a16
+; GFX1013-NEXT: ; implicit-def: $vgpr10_vgpr11
+; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+; GFX1013-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13
+; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX1013-NEXT: s_cbranch_execnz BB7_1
; GFX1013-NEXT: ; %bb.2:
@@ -279,10 +320,18 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr) {
; GFX1030-LABEL: image_bvh64_intersect_ray_vgpr_descr:
; GFX1030: ; %bb.0:
-; GFX1030-NEXT: v_mov_b32_e32 v6, v0
-; GFX1030-NEXT: v_mov_b32_e32 v10, v1
-; GFX1030-NEXT: v_mov_b32_e32 v14, v2
-; GFX1030-NEXT: v_mov_b32_e32 v19, v3
+; GFX1030-NEXT: v_mov_b32_e32 v19, v0
+; GFX1030-NEXT: v_mov_b32_e32 v20, v1
+; GFX1030-NEXT: v_mov_b32_e32 v21, v2
+; GFX1030-NEXT: v_mov_b32_e32 v22, v3
+; GFX1030-NEXT: v_mov_b32_e32 v23, v4
+; GFX1030-NEXT: v_mov_b32_e32 v24, v5
+; GFX1030-NEXT: v_mov_b32_e32 v25, v7
+; GFX1030-NEXT: v_mov_b32_e32 v26, v8
+; GFX1030-NEXT: v_mov_b32_e32 v27, v9
+; GFX1030-NEXT: v_mov_b32_e32 v28, v11
+; GFX1030-NEXT: v_mov_b32_e32 v29, v12
+; GFX1030-NEXT: v_mov_b32_e32 v30, v13
; GFX1030-NEXT: s_mov_b32 s1, exec_lo
; GFX1030-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1
; GFX1030-NEXT: v_readfirstlane_b32 s4, v15
@@ -290,10 +339,24 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr
; GFX1030-NEXT: v_readfirstlane_b32 s6, v17
; GFX1030-NEXT: v_readfirstlane_b32 s7, v18
; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[15:16]
-; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], [v6, v10, v14, v19, v4, v5, v7, v8, v9, v11, v12, v13], s[4:7]
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[17:18]
; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
+; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[19:34], s[4:7]
+; GFX1030-NEXT: ; implicit-def: $vgpr15_vgpr16
+; GFX1030-NEXT: ; implicit-def: $vgpr19
+; GFX1030-NEXT: ; implicit-def: $vgpr20
+; GFX1030-NEXT: ; implicit-def: $vgpr21
+; GFX1030-NEXT: ; implicit-def: $vgpr22
+; GFX1030-NEXT: ; implicit-def: $vgpr23
+; GFX1030-NEXT: ; implicit-def: $vgpr24
+; GFX1030-NEXT: ; implicit-def: $vgpr25
+; GFX1030-NEXT: ; implicit-def: $vgpr26
+; GFX1030-NEXT: ; implicit-def: $vgpr27
+; GFX1030-NEXT: ; implicit-def: $vgpr28
+; GFX1030-NEXT: ; implicit-def: $vgpr29
+; GFX1030-NEXT: ; implicit-def: $vgpr30
+; GFX1030-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX1030-NEXT: s_cbranch_execnz BB8_1
; GFX1030-NEXT: ; %bb.2:
@@ -309,26 +372,32 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr
; GFX1013-NEXT: v_mov_b32_e32 v9, v11
; GFX1013-NEXT: v_mov_b32_e32 v10, v12
; GFX1013-NEXT: v_mov_b32_e32 v11, v13
+; GFX1013-NEXT: v_mov_b32_e32 v19, v15
+; GFX1013-NEXT: v_mov_b32_e32 v20, v16
; GFX1013-NEXT: s_mov_b32 s1, exec_lo
; GFX1013-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1
-; GFX1013-NEXT: v_readfirstlane_b32 s4, v15
-; GFX1013-NEXT: v_readfirstlane_b32 s5, v16
+; GFX1013-NEXT: v_readfirstlane_b32 s4, v19
+; GFX1013-NEXT: v_readfirstlane_b32 s5, v20
; GFX1013-NEXT: v_readfirstlane_b32 s6, v17
; GFX1013-NEXT: v_readfirstlane_b32 s7, v18
-; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[15:16]
-; GFX1013-NEXT: image_bvh64_intersect_ray v[19:22], v[0:15], s[4:7]
+; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[19:20]
; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[17:18]
; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX1013-NEXT: s_and_saveexec_b32 s0, s0
+; GFX1013-NEXT: image_bvh64_intersect_ray v[21:24], v[0:15], s[4:7]
+; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+; GFX1013-NEXT: ; implicit-def: $vgpr19_vgpr20
+; GFX1013-NEXT: ; implicit-def: $vgpr15_vgpr16_vgpr17_vgpr18
+; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX1013-NEXT: s_cbranch_execnz BB8_1
; GFX1013-NEXT: ; %bb.2:
; GFX1013-NEXT: s_mov_b32 exec_lo, s1
; GFX1013-NEXT: s_waitcnt vmcnt(0)
-; GFX1013-NEXT: v_mov_b32_e32 v0, v19
-; GFX1013-NEXT: v_mov_b32_e32 v1, v20
-; GFX1013-NEXT: v_mov_b32_e32 v2, v21
-; GFX1013-NEXT: v_mov_b32_e32 v3, v22
+; GFX1013-NEXT: v_mov_b32_e32 v0, v21
+; GFX1013-NEXT: v_mov_b32_e32 v1, v22
+; GFX1013-NEXT: v_mov_b32_e32 v2, v23
+; GFX1013-NEXT: v_mov_b32_e32 v3, v24
; GFX1013-NEXT: ; return to shader part epilog
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr)
%r = bitcast <4 x i32> %v to <4 x float>
@@ -339,30 +408,43 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
; GFX1030-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr:
; GFX1030: ; %bb.0:
; GFX1030-NEXT: s_mov_b32 s0, 0xffff
-; GFX1030-NEXT: v_mov_b32_e32 v6, v0
-; GFX1030-NEXT: v_mov_b32_e32 v15, v1
+; GFX1030-NEXT: v_mov_b32_e32 v15, v0
+; GFX1030-NEXT: v_mov_b32_e32 v16, v1
; GFX1030-NEXT: v_lshrrev_b32_e32 v0, 16, v7
; GFX1030-NEXT: v_and_b32_e32 v1, s0, v9
-; GFX1030-NEXT: v_mov_b32_e32 v16, v2
-; GFX1030-NEXT: v_mov_b32_e32 v17, v3
+; GFX1030-NEXT: v_mov_b32_e32 v17, v2
+; GFX1030-NEXT: v_mov_b32_e32 v18, v3
; GFX1030-NEXT: v_lshrrev_b32_e32 v2, 16, v9
; GFX1030-NEXT: v_lshlrev_b32_e32 v0, 16, v0
; GFX1030-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX1030-NEXT: v_and_b32_e32 v3, s0, v10
+; GFX1030-NEXT: v_mov_b32_e32 v19, v4
+; GFX1030-NEXT: v_mov_b32_e32 v20, v5
+; GFX1030-NEXT: v_and_or_b32 v21, v7, s0, v0
+; GFX1030-NEXT: v_and_or_b32 v22, v8, s0, v1
+; GFX1030-NEXT: v_lshl_or_b32 v23, v3, 16, v2
; GFX1030-NEXT: s_mov_b32 s1, exec_lo
-; GFX1030-NEXT: v_and_or_b32 v7, v7, s0, v0
-; GFX1030-NEXT: v_and_or_b32 v8, v8, s0, v1
-; GFX1030-NEXT: v_lshl_or_b32 v9, v3, 16, v2
; GFX1030-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
; GFX1030-NEXT: v_readfirstlane_b32 s4, v11
; GFX1030-NEXT: v_readfirstlane_b32 s5, v12
; GFX1030-NEXT: v_readfirstlane_b32 s6, v13
; GFX1030-NEXT: v_readfirstlane_b32 s7, v14
; GFX1030-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12]
-; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], [v6, v15, v16, v17, v4, v5, v7, v8, v9], s[4:7] a16
; GFX1030-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]
; GFX1030-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX1030-NEXT: s_and_saveexec_b32 s0, s0
+; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[15:30], s[4:7] a16
+; GFX1030-NEXT: ; implicit-def: $vgpr11_vgpr12
+; GFX1030-NEXT: ; implicit-def: $vgpr15
+; GFX1030-NEXT: ; implicit-def: $vgpr16
+; GFX1030-NEXT: ; implicit-def: $vgpr17
+; GFX1030-NEXT: ; implicit-def: $vgpr18
+; GFX1030-NEXT: ; implicit-def: $vgpr19
+; GFX1030-NEXT: ; implicit-def: $vgpr20
+; GFX1030-NEXT: ; implicit-def: $vgpr21
+; GFX1030-NEXT: ; implicit-def: $vgpr22
+; GFX1030-NEXT: ; implicit-def: $vgpr23
+; GFX1030-NEXT: ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14
; GFX1030-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX1030-NEXT: s_cbranch_execnz BB9_1
; GFX1030-NEXT: ; %bb.2:
@@ -373,36 +455,43 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
; GFX1013-LABEL: image_bvh64_intersect_ray_a16_vgpr_descr:
; GFX1013: ; %bb.0:
; GFX1013-NEXT: s_mov_b32 s0, 0xffff
+; GFX1013-NEXT: v_mov_b32_e32 v16, v11
; GFX1013-NEXT: v_lshrrev_b32_e32 v6, 16, v7
-; GFX1013-NEXT: v_and_b32_e32 v15, s0, v9
+; GFX1013-NEXT: v_and_b32_e32 v11, s0, v9
; GFX1013-NEXT: v_lshrrev_b32_e32 v9, 16, v9
; GFX1013-NEXT: v_and_b32_e32 v10, s0, v10
-; GFX1013-NEXT: s_mov_b32 s1, exec_lo
+; GFX1013-NEXT: v_mov_b32_e32 v17, v12
; GFX1013-NEXT: v_lshlrev_b32_e32 v6, 16, v6
-; GFX1013-NEXT: v_lshlrev_b32_e32 v15, 16, v15
+; GFX1013-NEXT: v_lshlrev_b32_e32 v11, 16, v11
+; GFX1013-NEXT: v_mov_b32_e32 v18, v13
+; GFX1013-NEXT: v_mov_b32_e32 v19, v14
+; GFX1013-NEXT: s_mov_b32 s1, exec_lo
; GFX1013-NEXT: v_and_or_b32 v6, v7, s0, v6
-; GFX1013-NEXT: v_and_or_b32 v7, v8, s0, v15
+; GFX1013-NEXT: v_and_or_b32 v7, v8, s0, v11
; GFX1013-NEXT: v_lshl_or_b32 v8, v10, 16, v9
; GFX1013-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
-; GFX1013-NEXT: v_readfirstlane_b32 s4, v11
-; GFX1013-NEXT: v_readfirstlane_b32 s5, v12
-; GFX1013-NEXT: v_readfirstlane_b32 s6, v13
-; GFX1013-NEXT: v_readfirstlane_b32 s7, v14
-; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12]
-; GFX1013-NEXT: s_waitcnt vmcnt(0)
-; GFX1013-NEXT: image_bvh64_intersect_ray v[15:18], v[0:15], s[4:7] a16
-; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]
+; GFX1013-NEXT: v_readfirstlane_b32 s4, v16
+; GFX1013-NEXT: v_readfirstlane_b32 s5, v17
+; GFX1013-NEXT: v_readfirstlane_b32 s6, v18
+; GFX1013-NEXT: v_readfirstlane_b32 s7, v19
+; GFX1013-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[16:17]
+; GFX1013-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[18:19]
; GFX1013-NEXT: s_and_b32 s0, s0, vcc_lo
; GFX1013-NEXT: s_and_saveexec_b32 s0, s0
+; GFX1013-NEXT: image_bvh64_intersect_ray v[20:23], v[0:15], s[4:7] a16
+; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17
+; GFX1013-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+; GFX1013-NEXT: ; implicit-def: $vgpr16_vgpr17_vgpr18_vgpr19
+; GFX1013-NEXT: s_waitcnt_depctr 0xffe3
; GFX1013-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX1013-NEXT: s_cbranch_execnz BB9_1
; GFX1013-NEXT: ; %bb.2:
; GFX1013-NEXT: s_mov_b32 exec_lo, s1
; GFX1013-NEXT: s_waitcnt vmcnt(0)
-; GFX1013-NEXT: v_mov_b32_e32 v0, v15
-; GFX1013-NEXT: v_mov_b32_e32 v1, v16
-; GFX1013-NEXT: v_mov_b32_e32 v2, v17
-; GFX1013-NEXT: v_mov_b32_e32 v3, v18
+; GFX1013-NEXT: v_mov_b32_e32 v0, v20
+; GFX1013-NEXT: v_mov_b32_e32 v1, v21
+; GFX1013-NEXT: v_mov_b32_e32 v2, v22
+; GFX1013-NEXT: v_mov_b32_e32 v3, v23
; GFX1013-NEXT: ; return to shader part epilog
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr)
%r = bitcast <4 x i32> %v to <4 x float>
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll
index 139d82ae5be33..ab1d762e79f4f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll
@@ -121,10 +121,10 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_vof
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -170,10 +170,10 @@ define amdgpu_ps void @raw_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__sgp
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll
index 7d06738f80394..d1621a4221d3e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll
@@ -83,12 +83,12 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; CHECK: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -136,12 +136,12 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cmp__
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; CHECK: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll
index 8cf0f0f3b93fe..9b9140b448185 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll
@@ -159,10 +159,10 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgp
; GFX908: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; GFX908: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; GFX908: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; GFX908: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; GFX908: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX908: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX908: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX908: bb.3:
; GFX908: successors: %bb.4(0x80000000)
; GFX908: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -200,10 +200,10 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgp
; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX90A: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX90A: bb.3:
; GFX90A: successors: %bb.4(0x80000000)
; GFX90A: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -245,10 +245,10 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_v
; GFX908: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; GFX908: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; GFX908: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; GFX908: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX908: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; GFX908: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX908: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX908: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX908: bb.3:
; GFX908: successors: %bb.4(0x80000000)
; GFX908: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -284,10 +284,10 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_v
; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX90A: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX90A: bb.3:
; GFX90A: successors: %bb.4(0x80000000)
; GFX90A: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll
index 8a9079b660639..d77e2dad0f6c5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll
@@ -169,10 +169,10 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4)
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; PACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; PACKED: bb.3:
; PACKED: successors: %bb.4(0x80000000)
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -209,10 +209,10 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4)
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; UNPACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; UNPACKED: bb.3:
; UNPACKED: successors: %bb.4(0x80000000)
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll
index 8169bb5a6df9e..d93dbdcc515cb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll
@@ -121,10 +121,10 @@ define amdgpu_ps float @raw_buffer_load_format_f32__vgpr_rsrc__sgpr_voffset__vgp
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; CHECK: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
index 1ac2425ebab73..d7a30e49ab3cb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
@@ -69,10 +69,10 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -114,10 +114,10 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffs
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -444,10 +444,10 @@ define amdgpu_ps half @raw_buffer_load_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffse
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -486,10 +486,10 @@ define amdgpu_ps float @raw_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffse
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -744,10 +744,10 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -774,7 +774,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
- ; CHECK: %14:vgpr_32, dead %35:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+ ; CHECK: %14:vgpr_32, dead %33:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
@@ -790,10 +790,10 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %14, [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %14, [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
index 311b3233c81ff..531afc95764e0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
@@ -173,10 +173,10 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4)
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; UNPACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; UNPACKED: bb.3:
; UNPACKED: successors: %bb.4(0x80000000)
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -211,10 +211,10 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4)
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; PACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; PACKED: bb.3:
; PACKED: successors: %bb.4(0x80000000)
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -448,10 +448,10 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4)
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; UNPACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; UNPACKED: bb.3:
; UNPACKED: successors: %bb.4(0x80000000)
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -489,10 +489,10 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY10]], implicit $exec
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4)
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; PACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; PACKED: bb.3:
; PACKED: successors: %bb.4(0x80000000)
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
index f659d728871e2..d5280c489f5f3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
@@ -132,10 +132,10 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -288,10 +288,10 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
index 94e8d8f2e4a1f..70b2e356bc6e6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
@@ -71,10 +71,10 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -103,10 +103,10 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -148,10 +148,10 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -474,10 +474,10 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -711,10 +711,10 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE3]], [[COPY6]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %14, [[REG_SEQUENCE3]], [[COPY6]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -755,10 +755,10 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll
index 3178b2c4e7b4c..4f2fa7e4d3416 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll
@@ -167,10 +167,10 @@ define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffs
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4)
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; UNPACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; UNPACKED: bb.3:
; UNPACKED: successors: %bb.4(0x80000000)
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -207,10 +207,10 @@ define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffs
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 4)
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; PACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; PACKED: bb.3:
; PACKED: successors: %bb.4(0x80000000)
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll
index 87e23a203c2de..c018292215cbb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll
@@ -119,10 +119,10 @@ define amdgpu_ps float @raw_tbuffer_load_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc
- ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll
index c61c0476ca036..9dbec74b5265b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll
@@ -143,10 +143,10 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4)
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; UNPACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; UNPACKED: bb.3:
; UNPACKED: successors: %bb.4(0x80000000)
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -179,10 +179,10 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4)
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; PACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; PACKED: bb.3:
; PACKED: successors: %bb.4(0x80000000)
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -224,10 +224,10 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4)
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; UNPACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; UNPACKED: bb.3:
; UNPACKED: successors: %bb.4(0x80000000)
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -263,10 +263,10 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4)
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; PACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; PACKED: bb.3:
; PACKED: successors: %bb.4(0x80000000)
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -309,10 +309,10 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4)
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; UNPACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; UNPACKED: bb.3:
; UNPACKED: successors: %bb.4(0x80000000)
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -349,10 +349,10 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4)
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; PACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; PACKED: bb.3:
; PACKED: successors: %bb.4(0x80000000)
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll
index 9284e290fa1ce..426d05c166f8d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll
@@ -62,10 +62,10 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4)
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; UNPACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; UNPACKED: bb.3:
; UNPACKED: successors: %bb.4(0x80000000)
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -98,10 +98,10 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4)
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; PACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; PACKED: bb.3:
; PACKED: successors: %bb.4(0x80000000)
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -143,10 +143,10 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4)
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; UNPACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; UNPACKED: bb.3:
; UNPACKED: successors: %bb.4(0x80000000)
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -182,10 +182,10 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4)
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; PACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; PACKED: bb.3:
; PACKED: successors: %bb.4(0x80000000)
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -228,10 +228,10 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4)
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; UNPACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; UNPACKED: bb.3:
; UNPACKED: successors: %bb.4(0x80000000)
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -268,10 +268,10 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 4)
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; PACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; PACKED: bb.3:
; PACKED: successors: %bb.4(0x80000000)
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll
index 92963d8f36859..61607909c34d6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll
@@ -135,10 +135,10 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 1, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 1, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]]
@@ -180,10 +180,10 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__vgpr_soff
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]]
@@ -226,10 +226,10 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]]
@@ -560,10 +560,10 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]]
@@ -606,10 +606,10 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec
; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE3]], [[COPY6]], 904, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE3]], [[COPY6]], 904, 94, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec_lo = S_MOV_B32_term [[S_MOV_B32_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
index 5dcc3cf994aa0..36ae4e61ab3c8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
@@ -2617,10 +2617,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX6: bb.3:
; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX6: bb.4:
@@ -2652,10 +2652,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX7: bb.3:
; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX7: bb.4:
@@ -2687,10 +2687,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX8: bb.3:
; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX8: bb.4:
@@ -2726,10 +2726,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX6: bb.3:
; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX6: bb.4:
@@ -2759,10 +2759,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX7: bb.3:
; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX7: bb.4:
@@ -2792,10 +2792,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX8: bb.3:
; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX8: bb.4:
@@ -2836,10 +2836,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX6: bb.3:
; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX6: bb.4:
@@ -2873,10 +2873,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX7: bb.3:
; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX7: bb.4:
@@ -2910,10 +2910,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX8: bb.3:
; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX8: bb.4:
@@ -2950,10 +2950,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc)
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX6: bb.3:
; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX6: bb.4:
@@ -2983,10 +2983,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc)
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX7: bb.3:
; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX7: bb.4:
@@ -3016,10 +3016,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc)
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX8: bb.3:
; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX8: bb.4:
@@ -3057,10 +3057,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc)
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX6: bb.3:
; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX6: bb.4:
@@ -3092,10 +3092,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc)
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX7: bb.3:
; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX7: bb.4:
@@ -3125,10 +3125,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc)
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
- ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096)
; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096)
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX8: bb.3:
; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX8: bb.4:
@@ -3165,11 +3165,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX6: bb.3:
; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX6: bb.4:
@@ -3215,11 +3215,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX7: bb.3:
; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX7: bb.4:
@@ -3265,11 +3265,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX8: bb.3:
; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX8: bb.4:
@@ -3327,11 +3327,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX6: bb.3:
; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX6: bb.4:
@@ -3381,11 +3381,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX7: bb.3:
; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX7: bb.4:
@@ -3435,11 +3435,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX8: bb.3:
; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX8: bb.4:
@@ -3495,11 +3495,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX6: bb.3:
; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX6: bb.4:
@@ -3549,11 +3549,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX7: bb.3:
; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX7: bb.4:
@@ -3603,11 +3603,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX8: bb.3:
; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX8: bb.4:
@@ -3660,11 +3660,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX6: bb.3:
; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX6: bb.4:
@@ -3711,11 +3711,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX7: bb.3:
; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX7: bb.4:
@@ -3762,11 +3762,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX8: bb.3:
; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX8: bb.4:
@@ -3819,11 +3819,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX6: bb.3:
; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX6: bb.4:
@@ -3870,11 +3870,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX7: bb.3:
; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX7: bb.4:
@@ -3921,11 +3921,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX8: bb.3:
; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX8: bb.4:
@@ -3978,11 +3978,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX6: bb.3:
; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX6: bb.4:
@@ -4029,11 +4029,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX7: bb.3:
; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX7: bb.4:
@@ -4080,11 +4080,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4)
- ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX8: bb.3:
; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX8: bb.4:
@@ -4136,11 +4136,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4
; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
- ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX6: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX6: bb.3:
; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX6: bb.4:
@@ -4186,11 +4186,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4
; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
- ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX7: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX7: bb.3:
; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX7: bb.4:
@@ -4236,11 +4236,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4
; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec
; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
+ ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
- ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX8: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX8: bb.3:
; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GFX8: bb.4:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll
index 7e28d4f26b835..4c0e1a8073191 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll
@@ -131,11 +131,11 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -183,11 +183,11 @@ define amdgpu_ps void @struct_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll
index 603581a770c7d..1d342604fdcb6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll
@@ -90,13 +90,13 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vg
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1
; CHECK: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; CHECK: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -146,13 +146,13 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cm
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1
; CHECK: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; CHECK: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll
index 40438da4f5e05..fcb8cd58762aa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll
@@ -173,11 +173,11 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__
; GFX908: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; GFX908: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; GFX908: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
+ ; GFX908: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX908: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
; GFX908: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
- ; GFX908: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX908: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX908: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX908: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX908: bb.3:
; GFX908: successors: %bb.4(0x80000000)
; GFX908: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -217,11 +217,11 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__
; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
+ ; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
- ; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX90A: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX90A: bb.3:
; GFX90A: successors: %bb.4(0x80000000)
; GFX90A: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -265,10 +265,10 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__
; GFX908: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; GFX908: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; GFX908: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; GFX908: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; GFX908: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX908: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX908: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX908: bb.3:
; GFX908: successors: %bb.4(0x80000000)
; GFX908: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -306,10 +306,10 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__
; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4)
; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GFX90A: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GFX90A: bb.3:
; GFX90A: successors: %bb.4(0x80000000)
; GFX90A: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll
index ccc2d779fd2ea..686405ff76851 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll
@@ -182,11 +182,11 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
+ ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4)
- ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; UNPACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; UNPACKED: bb.3:
; UNPACKED: successors: %bb.4(0x80000000)
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -246,11 +246,11 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
+ ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4)
- ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; PACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; PACKED: bb.3:
; PACKED: successors: %bb.4(0x80000000)
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll
index a883b425552c5..3d9e4efb1bbe2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll
@@ -131,11 +131,11 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__vpr_rsrc__sgpr_vi
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
index 531ce25274577..90bd090363dc4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
@@ -199,11 +199,11 @@ define amdgpu_ps float @struct_buffer_load_f32__vgpr_rsrc__sgpr_vindex__sgpr_vof
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll
index 56b79c875bfcd..27321ce7c1e02 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll
@@ -161,11 +161,11 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
+ ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4)
- ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; UNPACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; UNPACKED: bb.3:
; UNPACKED: successors: %bb.4(0x80000000)
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -205,11 +205,11 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
+ ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 4)
- ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; PACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; PACKED: bb.3:
; PACKED: successors: %bb.4(0x80000000)
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll
index 07dd7b360cf88..888e6aa298881 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll
@@ -125,11 +125,11 @@ define amdgpu_ps void @struct_buffer_store_format_f32__sgpr_val__vgpr_rsrc__sgpr
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll
index 2b92cb1350d6a..06c83485116a2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll
@@ -131,11 +131,11 @@ define amdgpu_ps void @struct_buffer_store_v4f32_vgpr_rsrc__sgpr_val__sgpr_vinde
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY10]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY13]], %subreg.sub1
; CHECK: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll
index 25814330a9f78..16c884a4db38a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll
@@ -219,11 +219,11 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
+ ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4)
- ; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; PACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; PACKED: bb.3:
; PACKED: successors: %bb.4(0x80000000)
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -265,11 +265,11 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
+ ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 4)
- ; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; UNPACKED: SI_WATERFALL_LOOP %bb.2, implicit $exec
; UNPACKED: bb.3:
; UNPACKED: successors: %bb.4(0x80000000)
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll
index d9d4f8127f076..b4162b9bf16bd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll
@@ -151,11 +151,11 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__vgpr_rsrc__sgpr_vindex_
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir
index 24ca3f7c0ad8f..dbcdb83753290 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-s-buffer-load.mir
@@ -74,10 +74,10 @@ body: |
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY2]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY2]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.1, implicit $exec
; CHECK: .2:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -119,10 +119,10 @@ body: |
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.1, implicit $exec
; CHECK: .2:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
index d0f1dd0e30d07..5ad6c175258cb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
@@ -107,14 +107,12 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) {
; FAST: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; FAST: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
- ; FAST: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; FAST: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; FAST: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; FAST: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
; FAST: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; FAST: bb.2:
; FAST: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; FAST: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %19, %bb.2
- ; FAST: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2
+ ; FAST: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2
; FAST: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; FAST: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; FAST: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -135,10 +133,10 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) {
; FAST: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
; FAST: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
; FAST: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
- ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; FAST: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; FAST: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; FAST: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; FAST: SI_WATERFALL_LOOP %bb.2, implicit $exec
; FAST: bb.3:
; FAST: successors: %bb.4(0x80000000)
; FAST: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -161,14 +159,12 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) {
; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
- ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; GREEDY: bb.2:
; GREEDY: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %19, %bb.2
- ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2
+ ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2
; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -189,10 +185,10 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) {
; GREEDY: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
; GREEDY: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
- ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GREEDY: bb.3:
; GREEDY: successors: %bb.4(0x80000000)
; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -223,14 +219,12 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg
; FAST: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; FAST: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32)
- ; FAST: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; FAST: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; FAST: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; FAST: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
; FAST: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; FAST: bb.2:
; FAST: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; FAST: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %20, %bb.2
- ; FAST: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2
+ ; FAST: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %18, %bb.2
; FAST: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; FAST: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; FAST: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -251,10 +245,10 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg
; FAST: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
; FAST: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
; FAST: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
- ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; FAST: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; FAST: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; FAST: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; FAST: SI_WATERFALL_LOOP %bb.2, implicit $exec
; FAST: bb.3:
; FAST: successors: %bb.4(0x80000000)
; FAST: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -278,14 +272,12 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg
; GREEDY: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; GREEDY: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32)
- ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; GREEDY: bb.2:
; GREEDY: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %20, %bb.2
- ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2
+ ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %18, %bb.2
; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -306,10 +298,10 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg
; GREEDY: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
; GREEDY: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
- ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GREEDY: bb.3:
; GREEDY: successors: %bb.4(0x80000000)
; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
index a1973a9e01d6c..182b22e02499f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
@@ -128,14 +128,12 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr
; FAST: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; FAST: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
- ; FAST: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; FAST: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; FAST: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; FAST: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
; FAST: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; FAST: bb.2:
; FAST: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; FAST: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
- ; FAST: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
+ ; FAST: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.2
; FAST: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; FAST: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; FAST: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -156,10 +154,10 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr
; FAST: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
; FAST: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
; FAST: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
- ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; FAST: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; FAST: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; FAST: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; FAST: SI_WATERFALL_LOOP %bb.2, implicit $exec
; FAST: bb.3:
; FAST: successors: %bb.4(0x80000000)
; FAST: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -186,14 +184,12 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr
; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
- ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; GREEDY: bb.2:
; GREEDY: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
- ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
+ ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.2
; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -214,10 +210,10 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr
; GREEDY: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
; GREEDY: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
; GREEDY: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
- ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GREEDY: bb.3:
; GREEDY: successors: %bb.4(0x80000000)
; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -251,14 +247,12 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre
; FAST: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; FAST: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
- ; FAST: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; FAST: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; FAST: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; FAST: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
; FAST: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; FAST: bb.2:
; FAST: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; FAST: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
- ; FAST: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
+ ; FAST: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.2
; FAST: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; FAST: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; FAST: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -269,10 +263,10 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre
; FAST: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; FAST: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; FAST: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; FAST: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; FAST: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; FAST: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; FAST: SI_WATERFALL_LOOP %bb.2, implicit $exec
; FAST: bb.3:
; FAST: successors: %bb.4(0x80000000)
; FAST: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -299,14 +293,12 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre
; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
- ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; GREEDY: bb.2:
; GREEDY: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
- ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
+ ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.2
; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -317,10 +309,10 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre
; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GREEDY: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GREEDY: bb.3:
; GREEDY: successors: %bb.4(0x80000000)
; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -354,15 +346,13 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr
; FAST: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; FAST: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12
; FAST: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
- ; FAST: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; FAST: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; FAST: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; FAST: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
; FAST: [[UV4:%[0-9]+]]:vreg_64(s64), [[UV5:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
; FAST: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; FAST: bb.2:
; FAST: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; FAST: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
- ; FAST: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
+ ; FAST: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.2
; FAST: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; FAST: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; FAST: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -394,10 +384,10 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr
; FAST: [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec
; FAST: [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc
; FAST: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
- ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; FAST: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; FAST: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; FAST: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; FAST: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; FAST: SI_WATERFALL_LOOP %bb.2, implicit $exec
; FAST: bb.3:
; FAST: successors: %bb.4(0x80000000)
; FAST: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -424,15 +414,13 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr
; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12
; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
- ; GREEDY: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; GREEDY: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
; GREEDY: [[UV4:%[0-9]+]]:vreg_64(s64), [[UV5:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; GREEDY: bb.2:
; GREEDY: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF2]], %bb.1, %24, %bb.2
- ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
+ ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %22, %bb.2
; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -464,10 +452,10 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr
; GREEDY: [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec
; GREEDY: [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc
; GREEDY: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
- ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GREEDY: bb.3:
; GREEDY: successors: %bb.4(0x80000000)
; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll
index e111a99ae4504..c3191c9a7e899 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll
@@ -60,14 +60,12 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -78,10 +76,10 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -107,19 +105,17 @@ define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY5]](s32), implicit $exec
- ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -145,14 +141,12 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr5
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
- ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -166,10 +160,10 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]](s32), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY6]](s32), [[COPY4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0 :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
index fc0df27bfa4ba..925c7bd4a17e9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
@@ -1531,14 +1531,12 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32)
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
- ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3, %bb.2
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %8(s32), %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -1549,10 +1547,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32))
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32))
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
@@ -1570,14 +1568,12 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %
; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32)
; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
- ; GREEDY: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; GREEDY: bb.2:
; GREEDY: successors: %bb.3, %bb.2
- ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2
- ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %8(s32), %bb.2
+ ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2
; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -1588,10 +1584,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %
; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32))
; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32))
; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GREEDY: bb.3:
; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GREEDY: bb.4:
@@ -1616,14 +1612,12 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %
; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]]
; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
- ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3, %bb.2
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %18, %bb.2
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -1634,10 +1628,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32))
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32))
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
@@ -1656,14 +1650,12 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %
; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]]
; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
- ; GREEDY: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; GREEDY: bb.2:
; GREEDY: successors: %bb.3, %bb.2
- ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %18, %bb.2
- ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
+ ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.2
; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -1674,10 +1666,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %
; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32))
; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32))
; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GREEDY: bb.3:
; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GREEDY: bb.4:
@@ -1704,14 +1696,12 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
- ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3, %bb.2
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %19, %bb.2
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %17, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -1722,10 +1712,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32))
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32))
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
@@ -1745,14 +1735,12 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %
; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32)
; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
- ; GREEDY: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; GREEDY: bb.2:
; GREEDY: successors: %bb.3, %bb.2
- ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %19, %bb.2
- ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
+ ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %17, %bb.2
; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -1763,10 +1751,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %
; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32))
; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32))
; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GREEDY: bb.3:
; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GREEDY: bb.4:
@@ -1791,14 +1779,12 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc)
; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; CHECK: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
- ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3, %bb.2
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %7(s32), %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -1809,10 +1795,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
@@ -1830,14 +1816,12 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc)
; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; GREEDY: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; GREEDY: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
- ; GREEDY: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; GREEDY: bb.2:
; GREEDY: successors: %bb.3, %bb.2
- ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2
- ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %7(s32), %bb.2
+ ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2
; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -1848,10 +1832,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc)
; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1)
; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GREEDY: bb.3:
; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GREEDY: bb.4:
@@ -1875,14 +1859,12 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc)
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
- ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3, %bb.2
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %7(s32), %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -1893,10 +1875,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32))
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32))
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
@@ -1914,14 +1896,12 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc)
; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
- ; GREEDY: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; GREEDY: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; GREEDY: bb.2:
; GREEDY: successors: %bb.3, %bb.2
- ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %17, %bb.2
- ; GREEDY: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %7(s32), %bb.2
+ ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2
; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -1932,10 +1912,10 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc)
; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32))
; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32))
; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GREEDY: bb.3:
; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GREEDY: bb.4:
@@ -1977,11 +1957,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4)
; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
@@ -2025,11 +2005,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %
; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4)
; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4)
- ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GREEDY: bb.3:
; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GREEDY: bb.4:
@@ -2082,11 +2062,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
@@ -2131,11 +2111,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %
; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4)
- ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GREEDY: bb.3:
; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GREEDY: bb.4:
@@ -2186,11 +2166,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
@@ -2235,11 +2215,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %
; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4)
- ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GREEDY: bb.3:
; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GREEDY: bb.4:
@@ -2289,11 +2269,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
@@ -2337,11 +2317,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000
; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4)
- ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GREEDY: bb.3:
; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GREEDY: bb.4:
@@ -2391,11 +2371,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
@@ -2439,11 +2419,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076
; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4)
- ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GREEDY: bb.3:
; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GREEDY: bb.4:
@@ -2493,11 +2473,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
@@ -2541,11 +2521,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080
; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4)
- ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GREEDY: bb.3:
; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GREEDY: bb.4:
@@ -2594,11 +2574,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
- ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
@@ -2641,11 +2621,11 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4
; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
+ ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4)
- ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; GREEDY: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec
; GREEDY: bb.3:
; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; GREEDY: bb.4:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll
index 24a1773cef8f2..82feec76068a5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll
@@ -58,14 +58,12 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %16, %bb.2
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -76,10 +74,10 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[COPY6]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -104,19 +102,17 @@ define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgp
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
- ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %16, %bb.2
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY6]](s32), implicit $exec
- ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -141,14 +137,12 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr6
- ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.1, %16, %bb.2
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -162,10 +156,10 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]](s32), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable load (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll
index 108cee68d768e..ce5617efb5c85 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll
@@ -76,10 +76,10 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
- ; CHECK: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[COPY7]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -111,10 +111,10 @@ define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vg
; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %14, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY7]](s32), implicit $exec
- ; CHECK: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -158,10 +158,10 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]](s32), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
- ; CHECK: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: G_AMDGPU_BUFFER_STORE [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]], [[V_READFIRSTLANE_B32_4]], 0, 0, -1 :: (dereferenceable store (s32), align 1, addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir
index 25c0d24a65a99..b176820968839 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-waterfall-agpr.mir
@@ -30,10 +30,10 @@ body: |
; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %9, %bb.1
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
- ; CHECK: G_AMDGPU_BUFFER_STORE %val(s32), %rsrc(<4 x s32>), [[COPY]](s32), %voffset, [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable store (s32), addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: G_AMDGPU_BUFFER_STORE %val(s32), %rsrc(<4 x s32>), [[COPY]](s32), %voffset, [[V_READFIRSTLANE_B32_]], 0, 0, 0 :: (dereferenceable store (s32), addrspace 4)
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.1, implicit $exec
; CHECK: .2:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
@@ -62,15 +62,13 @@ body: |
; CHECK: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, $vgpr0
; CHECK: [[COPY:%[0-9]+]]:agpr(<8 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[DEF:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[COPY2:%[0-9]+]]:vgpr(<8 x s32>) = COPY [[COPY]](<8 x s32>)
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY2]](<8 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: .1:
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
- ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF1]], %bb.0, %8, %bb.1
- ; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF]](<4 x s32>), %bb.0, %2(<4 x s32>), %bb.1
+ ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %6, %bb.1
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
@@ -91,10 +89,10 @@ body: |
; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
- ; CHECK: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY1]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>))
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
+ ; CHECK: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY1]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>))
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
- ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; CHECK: SI_WATERFALL_LOOP %bb.1, implicit $exec
; CHECK: .2:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
index fbe0c9fd1dd44..7749499153492 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s
@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant void()*, align 4
@gv.fptr1 = external hidden unnamed_addr addrspace(4) constant void(i32)*, align 4
@@ -97,6 +98,99 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GCN-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_indirect_call_sgpr_ptr:
+; GISEL: .amd_kernel_code_t
+; GISEL-NEXT: amd_code_version_major = 1
+; GISEL-NEXT: amd_code_version_minor = 2
+; GISEL-NEXT: amd_machine_kind = 1
+; GISEL-NEXT: amd_machine_version_major = 7
+; GISEL-NEXT: amd_machine_version_minor = 0
+; GISEL-NEXT: amd_machine_version_stepping = 0
+; GISEL-NEXT: kernel_code_entry_byte_offset = 256
+; GISEL-NEXT: kernel_code_prefetch_byte_size = 0
+; GISEL-NEXT: granulated_workitem_vgpr_count = 7
+; GISEL-NEXT: granulated_wavefront_sgpr_count = 4
+; GISEL-NEXT: priority = 0
+; GISEL-NEXT: float_mode = 240
+; GISEL-NEXT: priv = 0
+; GISEL-NEXT: enable_dx10_clamp = 1
+; GISEL-NEXT: debug_mode = 0
+; GISEL-NEXT: enable_ieee_mode = 1
+; GISEL-NEXT: enable_wgp_mode = 0
+; GISEL-NEXT: enable_mem_ordered = 0
+; GISEL-NEXT: enable_fwd_progress = 0
+; GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1
+; GISEL-NEXT: user_sgpr_count = 14
+; GISEL-NEXT: enable_trap_handler = 0
+; GISEL-NEXT: enable_sgpr_workgroup_id_x = 1
+; GISEL-NEXT: enable_sgpr_workgroup_id_y = 1
+; GISEL-NEXT: enable_sgpr_workgroup_id_z = 1
+; GISEL-NEXT: enable_sgpr_workgroup_info = 0
+; GISEL-NEXT: enable_vgpr_workitem_id = 2
+; GISEL-NEXT: enable_exception_msb = 0
+; GISEL-NEXT: granulated_lds_size = 0
+; GISEL-NEXT: enable_exception = 0
+; GISEL-NEXT: enable_sgpr_private_segment_buffer = 1
+; GISEL-NEXT: enable_sgpr_dispatch_ptr = 1
+; GISEL-NEXT: enable_sgpr_queue_ptr = 1
+; GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; GISEL-NEXT: enable_sgpr_dispatch_id = 1
+; GISEL-NEXT: enable_sgpr_flat_scratch_init = 1
+; GISEL-NEXT: enable_sgpr_private_segment_size = 0
+; GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; GISEL-NEXT: enable_wavefront_size32 = 0
+; GISEL-NEXT: enable_ordered_append_gds = 0
+; GISEL-NEXT: private_element_size = 1
+; GISEL-NEXT: is_ptr64 = 1
+; GISEL-NEXT: is_dynamic_callstack = 1
+; GISEL-NEXT: is_debug_enabled = 0
+; GISEL-NEXT: is_xnack_enabled = 0
+; GISEL-NEXT: workitem_private_segment_byte_size = 16384
+; GISEL-NEXT: workgroup_group_segment_byte_size = 0
+; GISEL-NEXT: gds_segment_byte_size = 0
+; GISEL-NEXT: kernarg_segment_byte_size = 4
+; GISEL-NEXT: workgroup_fbarrier_count = 0
+; GISEL-NEXT: wavefront_sgpr_count = 37
+; GISEL-NEXT: workitem_vgpr_count = 32
+; GISEL-NEXT: reserved_vgpr_first = 0
+; GISEL-NEXT: reserved_vgpr_count = 0
+; GISEL-NEXT: reserved_sgpr_first = 0
+; GISEL-NEXT: reserved_sgpr_count = 0
+; GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; GISEL-NEXT: debug_private_segment_buffer_sgpr = 0
+; GISEL-NEXT: kernarg_segment_alignment = 4
+; GISEL-NEXT: group_segment_alignment = 4
+; GISEL-NEXT: private_segment_alignment = 4
+; GISEL-NEXT: wavefront_size = 6
+; GISEL-NEXT: call_convention = -1
+; GISEL-NEXT: runtime_loader_kernel_symbol = 0
+; GISEL-NEXT: .end_amd_kernel_code_t
+; GISEL-NEXT: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; GISEL-NEXT: s_add_i32 s12, s12, s17
+; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL-NEXT: s_add_u32 s0, s0, s17
+; GISEL-NEXT: s_addc_u32 s1, s1, 0
+; GISEL-NEXT: s_mov_b32 s13, s15
+; GISEL-NEXT: s_mov_b32 s12, s14
+; GISEL-NEXT: s_getpc_b64 s[14:15]
+; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr0 at rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr0 at rel32@hi+12
+; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0
+; GISEL-NEXT: s_add_u32 s8, s8, 8
+; GISEL-NEXT: s_addc_u32 s9, s9, 0
+; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2
+; GISEL-NEXT: v_or_b32_e32 v31, v0, v1
+; GISEL-NEXT: s_mov_b32 s14, s16
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GISEL-NEXT: s_endpgm
%fptr = load void()*, void()* addrspace(4)* @gv.fptr0
call void %fptr()
ret void
@@ -196,6 +290,100 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) {
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19]
; GCN-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_indirect_call_sgpr_ptr_arg:
+; GISEL: .amd_kernel_code_t
+; GISEL-NEXT: amd_code_version_major = 1
+; GISEL-NEXT: amd_code_version_minor = 2
+; GISEL-NEXT: amd_machine_kind = 1
+; GISEL-NEXT: amd_machine_version_major = 7
+; GISEL-NEXT: amd_machine_version_minor = 0
+; GISEL-NEXT: amd_machine_version_stepping = 0
+; GISEL-NEXT: kernel_code_entry_byte_offset = 256
+; GISEL-NEXT: kernel_code_prefetch_byte_size = 0
+; GISEL-NEXT: granulated_workitem_vgpr_count = 7
+; GISEL-NEXT: granulated_wavefront_sgpr_count = 4
+; GISEL-NEXT: priority = 0
+; GISEL-NEXT: float_mode = 240
+; GISEL-NEXT: priv = 0
+; GISEL-NEXT: enable_dx10_clamp = 1
+; GISEL-NEXT: debug_mode = 0
+; GISEL-NEXT: enable_ieee_mode = 1
+; GISEL-NEXT: enable_wgp_mode = 0
+; GISEL-NEXT: enable_mem_ordered = 0
+; GISEL-NEXT: enable_fwd_progress = 0
+; GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1
+; GISEL-NEXT: user_sgpr_count = 14
+; GISEL-NEXT: enable_trap_handler = 0
+; GISEL-NEXT: enable_sgpr_workgroup_id_x = 1
+; GISEL-NEXT: enable_sgpr_workgroup_id_y = 1
+; GISEL-NEXT: enable_sgpr_workgroup_id_z = 1
+; GISEL-NEXT: enable_sgpr_workgroup_info = 0
+; GISEL-NEXT: enable_vgpr_workitem_id = 2
+; GISEL-NEXT: enable_exception_msb = 0
+; GISEL-NEXT: granulated_lds_size = 0
+; GISEL-NEXT: enable_exception = 0
+; GISEL-NEXT: enable_sgpr_private_segment_buffer = 1
+; GISEL-NEXT: enable_sgpr_dispatch_ptr = 1
+; GISEL-NEXT: enable_sgpr_queue_ptr = 1
+; GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1
+; GISEL-NEXT: enable_sgpr_dispatch_id = 1
+; GISEL-NEXT: enable_sgpr_flat_scratch_init = 1
+; GISEL-NEXT: enable_sgpr_private_segment_size = 0
+; GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0
+; GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0
+; GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0
+; GISEL-NEXT: enable_wavefront_size32 = 0
+; GISEL-NEXT: enable_ordered_append_gds = 0
+; GISEL-NEXT: private_element_size = 1
+; GISEL-NEXT: is_ptr64 = 1
+; GISEL-NEXT: is_dynamic_callstack = 1
+; GISEL-NEXT: is_debug_enabled = 0
+; GISEL-NEXT: is_xnack_enabled = 0
+; GISEL-NEXT: workitem_private_segment_byte_size = 16384
+; GISEL-NEXT: workgroup_group_segment_byte_size = 0
+; GISEL-NEXT: gds_segment_byte_size = 0
+; GISEL-NEXT: kernarg_segment_byte_size = 4
+; GISEL-NEXT: workgroup_fbarrier_count = 0
+; GISEL-NEXT: wavefront_sgpr_count = 37
+; GISEL-NEXT: workitem_vgpr_count = 32
+; GISEL-NEXT: reserved_vgpr_first = 0
+; GISEL-NEXT: reserved_vgpr_count = 0
+; GISEL-NEXT: reserved_sgpr_first = 0
+; GISEL-NEXT: reserved_sgpr_count = 0
+; GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0
+; GISEL-NEXT: debug_private_segment_buffer_sgpr = 0
+; GISEL-NEXT: kernarg_segment_alignment = 4
+; GISEL-NEXT: group_segment_alignment = 4
+; GISEL-NEXT: private_segment_alignment = 4
+; GISEL-NEXT: wavefront_size = 6
+; GISEL-NEXT: call_convention = -1
+; GISEL-NEXT: runtime_loader_kernel_symbol = 0
+; GISEL-NEXT: .end_amd_kernel_code_t
+; GISEL-NEXT: ; %bb.0:
+; GISEL-NEXT: s_mov_b32 s32, 0
+; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13
+; GISEL-NEXT: s_add_i32 s12, s12, s17
+; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL-NEXT: s_add_u32 s0, s0, s17
+; GISEL-NEXT: s_addc_u32 s1, s1, 0
+; GISEL-NEXT: s_mov_b32 s13, s15
+; GISEL-NEXT: s_mov_b32 s12, s14
+; GISEL-NEXT: s_getpc_b64 s[14:15]
+; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr1 at rel32@lo+4
+; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr1 at rel32@hi+12
+; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1
+; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0
+; GISEL-NEXT: s_add_u32 s8, s8, 8
+; GISEL-NEXT: v_or_b32_e32 v0, v0, v1
+; GISEL-NEXT: s_addc_u32 s9, s9, 0
+; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2
+; GISEL-NEXT: v_or_b32_e32 v31, v0, v1
+; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
+; GISEL-NEXT: s_mov_b32 s14, s16
+; GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; GISEL-NEXT: s_endpgm
%fptr = load void(i32)*, void(i32)* addrspace(4)* @gv.fptr1
call void %fptr(i32 123)
ret void
@@ -279,6 +467,84 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[4:5]
+;
+; GISEL-LABEL: test_indirect_call_vgpr_ptr:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1
+; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
+; GISEL-NEXT: s_mov_b64 exec, s[16:17]
+; GISEL-NEXT: v_writelane_b32 v40, s33, 17
+; GISEL-NEXT: s_mov_b32 s33, s32
+; GISEL-NEXT: s_addk_i32 s32, 0x400
+; GISEL-NEXT: v_writelane_b32 v40, s34, 0
+; GISEL-NEXT: v_writelane_b32 v40, s35, 1
+; GISEL-NEXT: v_writelane_b32 v40, s36, 2
+; GISEL-NEXT: v_writelane_b32 v40, s37, 3
+; GISEL-NEXT: v_writelane_b32 v40, s38, 4
+; GISEL-NEXT: v_writelane_b32 v40, s39, 5
+; GISEL-NEXT: v_writelane_b32 v40, s40, 6
+; GISEL-NEXT: v_writelane_b32 v40, s41, 7
+; GISEL-NEXT: v_writelane_b32 v40, s42, 8
+; GISEL-NEXT: v_writelane_b32 v40, s43, 9
+; GISEL-NEXT: v_writelane_b32 v40, s44, 10
+; GISEL-NEXT: v_writelane_b32 v40, s46, 11
+; GISEL-NEXT: v_writelane_b32 v40, s47, 12
+; GISEL-NEXT: v_writelane_b32 v40, s48, 13
+; GISEL-NEXT: v_writelane_b32 v40, s49, 14
+; GISEL-NEXT: s_mov_b32 s42, s14
+; GISEL-NEXT: s_mov_b32 s43, s13
+; GISEL-NEXT: s_mov_b32 s44, s12
+; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
+; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
+; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
+; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
+; GISEL-NEXT: v_writelane_b32 v40, s30, 15
+; GISEL-NEXT: v_writelane_b32 v40, s31, 16
+; GISEL-NEXT: s_mov_b64 s[46:47], exec
+; GISEL-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1
+; GISEL-NEXT: v_readfirstlane_b32 s16, v0
+; GISEL-NEXT: v_readfirstlane_b32 s17, v1
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc
+; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
+; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
+; GISEL-NEXT: s_mov_b32 s12, s44
+; GISEL-NEXT: s_mov_b32 s13, s43
+; GISEL-NEXT: s_mov_b32 s14, s42
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GISEL-NEXT: ; implicit-def: $vgpr31
+; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
+; GISEL-NEXT: s_cbranch_execnz BB2_1
+; GISEL-NEXT: ; %bb.2:
+; GISEL-NEXT: s_mov_b64 exec, s[46:47]
+; GISEL-NEXT: v_readlane_b32 s4, v40, 15
+; GISEL-NEXT: v_readlane_b32 s5, v40, 16
+; GISEL-NEXT: v_readlane_b32 s49, v40, 14
+; GISEL-NEXT: v_readlane_b32 s48, v40, 13
+; GISEL-NEXT: v_readlane_b32 s47, v40, 12
+; GISEL-NEXT: v_readlane_b32 s46, v40, 11
+; GISEL-NEXT: v_readlane_b32 s44, v40, 10
+; GISEL-NEXT: v_readlane_b32 s43, v40, 9
+; GISEL-NEXT: v_readlane_b32 s42, v40, 8
+; GISEL-NEXT: v_readlane_b32 s41, v40, 7
+; GISEL-NEXT: v_readlane_b32 s40, v40, 6
+; GISEL-NEXT: v_readlane_b32 s39, v40, 5
+; GISEL-NEXT: v_readlane_b32 s38, v40, 4
+; GISEL-NEXT: v_readlane_b32 s37, v40, 3
+; GISEL-NEXT: v_readlane_b32 s36, v40, 2
+; GISEL-NEXT: v_readlane_b32 s35, v40, 1
+; GISEL-NEXT: v_readlane_b32 s34, v40, 0
+; GISEL-NEXT: s_addk_i32 s32, 0xfc00
+; GISEL-NEXT: v_readlane_b32 s33, v40, 17
+; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
+; GISEL-NEXT: s_mov_b64 exec, s[6:7]
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[4:5]
call void %fptr()
ret void
}
@@ -364,6 +630,85 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[4:5]
+;
+; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1
+; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
+; GISEL-NEXT: s_mov_b64 exec, s[16:17]
+; GISEL-NEXT: v_writelane_b32 v40, s33, 17
+; GISEL-NEXT: s_mov_b32 s33, s32
+; GISEL-NEXT: s_addk_i32 s32, 0x400
+; GISEL-NEXT: v_writelane_b32 v40, s34, 0
+; GISEL-NEXT: v_writelane_b32 v40, s35, 1
+; GISEL-NEXT: v_writelane_b32 v40, s36, 2
+; GISEL-NEXT: v_writelane_b32 v40, s37, 3
+; GISEL-NEXT: v_writelane_b32 v40, s38, 4
+; GISEL-NEXT: v_writelane_b32 v40, s39, 5
+; GISEL-NEXT: v_writelane_b32 v40, s40, 6
+; GISEL-NEXT: v_writelane_b32 v40, s41, 7
+; GISEL-NEXT: v_writelane_b32 v40, s42, 8
+; GISEL-NEXT: v_writelane_b32 v40, s43, 9
+; GISEL-NEXT: v_writelane_b32 v40, s44, 10
+; GISEL-NEXT: v_writelane_b32 v40, s46, 11
+; GISEL-NEXT: v_writelane_b32 v40, s47, 12
+; GISEL-NEXT: v_writelane_b32 v40, s48, 13
+; GISEL-NEXT: v_writelane_b32 v40, s49, 14
+; GISEL-NEXT: s_mov_b32 s42, s14
+; GISEL-NEXT: s_mov_b32 s43, s13
+; GISEL-NEXT: s_mov_b32 s44, s12
+; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
+; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
+; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
+; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
+; GISEL-NEXT: v_writelane_b32 v40, s30, 15
+; GISEL-NEXT: v_writelane_b32 v40, s31, 16
+; GISEL-NEXT: s_mov_b64 s[46:47], exec
+; GISEL-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1
+; GISEL-NEXT: v_readfirstlane_b32 s16, v0
+; GISEL-NEXT: v_readfirstlane_b32 s17, v1
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc
+; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
+; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
+; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
+; GISEL-NEXT: s_mov_b32 s12, s44
+; GISEL-NEXT: s_mov_b32 s13, s43
+; GISEL-NEXT: s_mov_b32 s14, s42
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GISEL-NEXT: ; implicit-def: $vgpr31
+; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
+; GISEL-NEXT: s_cbranch_execnz BB3_1
+; GISEL-NEXT: ; %bb.2:
+; GISEL-NEXT: s_mov_b64 exec, s[46:47]
+; GISEL-NEXT: v_readlane_b32 s4, v40, 15
+; GISEL-NEXT: v_readlane_b32 s5, v40, 16
+; GISEL-NEXT: v_readlane_b32 s49, v40, 14
+; GISEL-NEXT: v_readlane_b32 s48, v40, 13
+; GISEL-NEXT: v_readlane_b32 s47, v40, 12
+; GISEL-NEXT: v_readlane_b32 s46, v40, 11
+; GISEL-NEXT: v_readlane_b32 s44, v40, 10
+; GISEL-NEXT: v_readlane_b32 s43, v40, 9
+; GISEL-NEXT: v_readlane_b32 s42, v40, 8
+; GISEL-NEXT: v_readlane_b32 s41, v40, 7
+; GISEL-NEXT: v_readlane_b32 s40, v40, 6
+; GISEL-NEXT: v_readlane_b32 s39, v40, 5
+; GISEL-NEXT: v_readlane_b32 s38, v40, 4
+; GISEL-NEXT: v_readlane_b32 s37, v40, 3
+; GISEL-NEXT: v_readlane_b32 s36, v40, 2
+; GISEL-NEXT: v_readlane_b32 s35, v40, 1
+; GISEL-NEXT: v_readlane_b32 s34, v40, 0
+; GISEL-NEXT: s_addk_i32 s32, 0xfc00
+; GISEL-NEXT: v_readlane_b32 s33, v40, 17
+; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
+; GISEL-NEXT: s_mov_b64 exec, s[6:7]
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[4:5]
call void %fptr(i32 123)
ret void
}
@@ -448,6 +793,86 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[4:5]
+;
+; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1
+; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
+; GISEL-NEXT: s_mov_b64 exec, s[16:17]
+; GISEL-NEXT: v_writelane_b32 v40, s33, 17
+; GISEL-NEXT: s_mov_b32 s33, s32
+; GISEL-NEXT: s_addk_i32 s32, 0x400
+; GISEL-NEXT: v_writelane_b32 v40, s34, 0
+; GISEL-NEXT: v_writelane_b32 v40, s35, 1
+; GISEL-NEXT: v_writelane_b32 v40, s36, 2
+; GISEL-NEXT: v_writelane_b32 v40, s37, 3
+; GISEL-NEXT: v_writelane_b32 v40, s38, 4
+; GISEL-NEXT: v_writelane_b32 v40, s39, 5
+; GISEL-NEXT: v_writelane_b32 v40, s40, 6
+; GISEL-NEXT: v_writelane_b32 v40, s41, 7
+; GISEL-NEXT: v_writelane_b32 v40, s42, 8
+; GISEL-NEXT: v_writelane_b32 v40, s43, 9
+; GISEL-NEXT: v_writelane_b32 v40, s44, 10
+; GISEL-NEXT: v_writelane_b32 v40, s46, 11
+; GISEL-NEXT: v_writelane_b32 v40, s47, 12
+; GISEL-NEXT: v_writelane_b32 v40, s48, 13
+; GISEL-NEXT: v_writelane_b32 v40, s49, 14
+; GISEL-NEXT: s_mov_b32 s42, s14
+; GISEL-NEXT: s_mov_b32 s43, s13
+; GISEL-NEXT: s_mov_b32 s44, s12
+; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
+; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
+; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
+; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
+; GISEL-NEXT: v_writelane_b32 v40, s30, 15
+; GISEL-NEXT: v_writelane_b32 v40, s31, 16
+; GISEL-NEXT: s_mov_b64 s[46:47], exec
+; GISEL-NEXT: BB4_1: ; =>This Inner Loop Header: Depth=1
+; GISEL-NEXT: v_readfirstlane_b32 s16, v0
+; GISEL-NEXT: v_readfirstlane_b32 s17, v1
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc
+; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
+; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
+; GISEL-NEXT: s_mov_b32 s12, s44
+; GISEL-NEXT: s_mov_b32 s13, s43
+; GISEL-NEXT: s_mov_b32 s14, s42
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GISEL-NEXT: v_mov_b32_e32 v2, v0
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GISEL-NEXT: ; implicit-def: $vgpr31
+; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
+; GISEL-NEXT: s_cbranch_execnz BB4_1
+; GISEL-NEXT: ; %bb.2:
+; GISEL-NEXT: s_mov_b64 exec, s[46:47]
+; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v2
+; GISEL-NEXT: v_readlane_b32 s4, v40, 15
+; GISEL-NEXT: v_readlane_b32 s5, v40, 16
+; GISEL-NEXT: v_readlane_b32 s49, v40, 14
+; GISEL-NEXT: v_readlane_b32 s48, v40, 13
+; GISEL-NEXT: v_readlane_b32 s47, v40, 12
+; GISEL-NEXT: v_readlane_b32 s46, v40, 11
+; GISEL-NEXT: v_readlane_b32 s44, v40, 10
+; GISEL-NEXT: v_readlane_b32 s43, v40, 9
+; GISEL-NEXT: v_readlane_b32 s42, v40, 8
+; GISEL-NEXT: v_readlane_b32 s41, v40, 7
+; GISEL-NEXT: v_readlane_b32 s40, v40, 6
+; GISEL-NEXT: v_readlane_b32 s39, v40, 5
+; GISEL-NEXT: v_readlane_b32 s38, v40, 4
+; GISEL-NEXT: v_readlane_b32 s37, v40, 3
+; GISEL-NEXT: v_readlane_b32 s36, v40, 2
+; GISEL-NEXT: v_readlane_b32 s35, v40, 1
+; GISEL-NEXT: v_readlane_b32 s34, v40, 0
+; GISEL-NEXT: s_addk_i32 s32, 0xfc00
+; GISEL-NEXT: v_readlane_b32 s33, v40, 17
+; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
+; GISEL-NEXT: s_mov_b64 exec, s[6:7]
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[4:5]
%a = call i32 %fptr()
%b = add i32 %a, 1
ret i32 %b
@@ -542,6 +967,95 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: test_indirect_call_vgpr_ptr_in_branch:
+; GISEL: ; %bb.0: ; %bb0
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_or_saveexec_b64 s[16:17], -1
+; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
+; GISEL-NEXT: s_mov_b64 exec, s[16:17]
+; GISEL-NEXT: v_writelane_b32 v40, s33, 19
+; GISEL-NEXT: s_mov_b32 s33, s32
+; GISEL-NEXT: s_addk_i32 s32, 0x400
+; GISEL-NEXT: v_writelane_b32 v40, s34, 0
+; GISEL-NEXT: v_writelane_b32 v40, s35, 1
+; GISEL-NEXT: v_writelane_b32 v40, s36, 2
+; GISEL-NEXT: v_writelane_b32 v40, s37, 3
+; GISEL-NEXT: v_writelane_b32 v40, s38, 4
+; GISEL-NEXT: v_writelane_b32 v40, s39, 5
+; GISEL-NEXT: v_writelane_b32 v40, s40, 6
+; GISEL-NEXT: v_writelane_b32 v40, s41, 7
+; GISEL-NEXT: v_writelane_b32 v40, s42, 8
+; GISEL-NEXT: v_writelane_b32 v40, s43, 9
+; GISEL-NEXT: v_writelane_b32 v40, s44, 10
+; GISEL-NEXT: v_writelane_b32 v40, s46, 11
+; GISEL-NEXT: v_writelane_b32 v40, s47, 12
+; GISEL-NEXT: v_writelane_b32 v40, s48, 13
+; GISEL-NEXT: v_writelane_b32 v40, s49, 14
+; GISEL-NEXT: v_writelane_b32 v40, s50, 15
+; GISEL-NEXT: v_writelane_b32 v40, s51, 16
+; GISEL-NEXT: s_mov_b32 s42, s14
+; GISEL-NEXT: s_mov_b32 s43, s13
+; GISEL-NEXT: s_mov_b32 s44, s12
+; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11]
+; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9]
+; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7]
+; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5]
+; GISEL-NEXT: v_and_b32_e32 v2, 1, v2
+; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
+; GISEL-NEXT: s_and_saveexec_b64 s[46:47], vcc
+; GISEL-NEXT: s_cbranch_execz BB5_4
+; GISEL-NEXT: ; %bb.1: ; %bb1
+; GISEL-NEXT: v_writelane_b32 v40, s30, 17
+; GISEL-NEXT: v_writelane_b32 v40, s31, 18
+; GISEL-NEXT: s_mov_b64 s[48:49], exec
+; GISEL-NEXT: BB5_2: ; =>This Inner Loop Header: Depth=1
+; GISEL-NEXT: v_readfirstlane_b32 s16, v0
+; GISEL-NEXT: v_readfirstlane_b32 s17, v1
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GISEL-NEXT: s_and_saveexec_b64 s[50:51], vcc
+; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41]
+; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39]
+; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37]
+; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35]
+; GISEL-NEXT: s_mov_b32 s12, s44
+; GISEL-NEXT: s_mov_b32 s13, s43
+; GISEL-NEXT: s_mov_b32 s14, s42
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17]
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GISEL-NEXT: ; implicit-def: $vgpr31
+; GISEL-NEXT: s_xor_b64 exec, exec, s[50:51]
+; GISEL-NEXT: s_cbranch_execnz BB5_2
+; GISEL-NEXT: ; %bb.3:
+; GISEL-NEXT: s_mov_b64 exec, s[48:49]
+; GISEL-NEXT: v_readlane_b32 s30, v40, 17
+; GISEL-NEXT: v_readlane_b32 s31, v40, 18
+; GISEL-NEXT: BB5_4: ; %bb2
+; GISEL-NEXT: s_or_b64 exec, exec, s[46:47]
+; GISEL-NEXT: v_readlane_b32 s51, v40, 16
+; GISEL-NEXT: v_readlane_b32 s50, v40, 15
+; GISEL-NEXT: v_readlane_b32 s49, v40, 14
+; GISEL-NEXT: v_readlane_b32 s48, v40, 13
+; GISEL-NEXT: v_readlane_b32 s47, v40, 12
+; GISEL-NEXT: v_readlane_b32 s46, v40, 11
+; GISEL-NEXT: v_readlane_b32 s44, v40, 10
+; GISEL-NEXT: v_readlane_b32 s43, v40, 9
+; GISEL-NEXT: v_readlane_b32 s42, v40, 8
+; GISEL-NEXT: v_readlane_b32 s41, v40, 7
+; GISEL-NEXT: v_readlane_b32 s40, v40, 6
+; GISEL-NEXT: v_readlane_b32 s39, v40, 5
+; GISEL-NEXT: v_readlane_b32 s38, v40, 4
+; GISEL-NEXT: v_readlane_b32 s37, v40, 3
+; GISEL-NEXT: v_readlane_b32 s36, v40, 2
+; GISEL-NEXT: v_readlane_b32 s35, v40, 1
+; GISEL-NEXT: v_readlane_b32 s34, v40, 0
+; GISEL-NEXT: s_addk_i32 s32, 0xfc00
+; GISEL-NEXT: v_readlane_b32 s33, v40, 19
+; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
+; GISEL-NEXT: s_mov_b64 exec, s[4:5]
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[30:31]
bb0:
br i1 %cond, label %bb1, label %bb2
@@ -595,6 +1109,48 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) {
; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[4:5]
+;
+; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
+; GISEL-NEXT: s_mov_b64 exec, s[4:5]
+; GISEL-NEXT: v_writelane_b32 v40, s33, 6
+; GISEL-NEXT: s_mov_b32 s33, s32
+; GISEL-NEXT: s_addk_i32 s32, 0x400
+; GISEL-NEXT: v_writelane_b32 v40, s34, 0
+; GISEL-NEXT: v_writelane_b32 v40, s35, 1
+; GISEL-NEXT: v_writelane_b32 v40, s36, 2
+; GISEL-NEXT: v_writelane_b32 v40, s37, 3
+; GISEL-NEXT: v_writelane_b32 v40, s30, 4
+; GISEL-NEXT: v_writelane_b32 v40, s31, 5
+; GISEL-NEXT: s_mov_b64 s[34:35], exec
+; GISEL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
+; GISEL-NEXT: v_readfirstlane_b32 s6, v0
+; GISEL-NEXT: v_readfirstlane_b32 s7, v1
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
+; GISEL-NEXT: s_and_saveexec_b64 s[36:37], vcc
+; GISEL-NEXT: s_movk_i32 s4, 0x7b
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7]
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GISEL-NEXT: s_xor_b64 exec, exec, s[36:37]
+; GISEL-NEXT: s_cbranch_execnz BB6_1
+; GISEL-NEXT: ; %bb.2:
+; GISEL-NEXT: s_mov_b64 exec, s[34:35]
+; GISEL-NEXT: v_readlane_b32 s4, v40, 4
+; GISEL-NEXT: v_readlane_b32 s5, v40, 5
+; GISEL-NEXT: v_readlane_b32 s37, v40, 3
+; GISEL-NEXT: v_readlane_b32 s36, v40, 2
+; GISEL-NEXT: v_readlane_b32 s35, v40, 1
+; GISEL-NEXT: v_readlane_b32 s34, v40, 0
+; GISEL-NEXT: s_addk_i32 s32, 0xfc00
+; GISEL-NEXT: v_readlane_b32 s33, v40, 6
+; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
+; GISEL-NEXT: s_mov_b64 exec, s[6:7]
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[4:5]
call amdgpu_gfx void %fptr(i32 inreg 123)
ret void
}
@@ -645,6 +1201,52 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr)
; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[4:5]
+;
+; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; GISEL-NEXT: s_mov_b64 exec, s[4:5]
+; GISEL-NEXT: v_writelane_b32 v40, s33, 6
+; GISEL-NEXT: s_mov_b32 s33, s32
+; GISEL-NEXT: s_addk_i32 s32, 0x400
+; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL-NEXT: v_writelane_b32 v40, s34, 0
+; GISEL-NEXT: v_writelane_b32 v40, s35, 1
+; GISEL-NEXT: v_writelane_b32 v40, s36, 2
+; GISEL-NEXT: v_writelane_b32 v40, s37, 3
+; GISEL-NEXT: v_mov_b32_e32 v41, v0
+; GISEL-NEXT: v_writelane_b32 v40, s30, 4
+; GISEL-NEXT: v_writelane_b32 v40, s31, 5
+; GISEL-NEXT: s_mov_b64 s[34:35], exec
+; GISEL-NEXT: BB7_1: ; =>This Inner Loop Header: Depth=1
+; GISEL-NEXT: v_readfirstlane_b32 s4, v1
+; GISEL-NEXT: v_readfirstlane_b32 s5, v2
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[1:2]
+; GISEL-NEXT: s_and_saveexec_b64 s[36:37], vcc
+; GISEL-NEXT: v_mov_b32_e32 v0, v41
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2
+; GISEL-NEXT: s_xor_b64 exec, exec, s[36:37]
+; GISEL-NEXT: s_cbranch_execnz BB7_1
+; GISEL-NEXT: ; %bb.2:
+; GISEL-NEXT: s_mov_b64 exec, s[34:35]
+; GISEL-NEXT: v_mov_b32_e32 v0, v41
+; GISEL-NEXT: v_readlane_b32 s4, v40, 4
+; GISEL-NEXT: v_readlane_b32 s5, v40, 5
+; GISEL-NEXT: v_readlane_b32 s37, v40, 3
+; GISEL-NEXT: v_readlane_b32 s36, v40, 2
+; GISEL-NEXT: v_readlane_b32 s35, v40, 1
+; GISEL-NEXT: v_readlane_b32 s34, v40, 0
+; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL-NEXT: s_addk_i32 s32, 0xfc00
+; GISEL-NEXT: v_readlane_b32 s33, v40, 6
+; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; GISEL-NEXT: s_mov_b64 exec, s[6:7]
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[4:5]
call amdgpu_gfx void %fptr(i32 %i)
ret i32 %i
}
@@ -697,6 +1299,137 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr)
; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[4:5]
+;
+; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
+; GISEL-NEXT: s_mov_b64 exec, s[4:5]
+; GISEL-NEXT: v_writelane_b32 v40, s33, 6
+; GISEL-NEXT: s_mov_b32 s33, s32
+; GISEL-NEXT: s_addk_i32 s32, 0x400
+; GISEL-NEXT: v_writelane_b32 v40, s34, 0
+; GISEL-NEXT: v_writelane_b32 v40, s35, 1
+; GISEL-NEXT: v_writelane_b32 v40, s36, 2
+; GISEL-NEXT: v_writelane_b32 v40, s37, 3
+; GISEL-NEXT: v_writelane_b32 v40, s30, 4
+; GISEL-NEXT: v_writelane_b32 v40, s31, 5
+; GISEL-NEXT: s_mov_b64 s[34:35], exec
+; GISEL-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1
+; GISEL-NEXT: v_readfirstlane_b32 s4, v1
+; GISEL-NEXT: v_readfirstlane_b32 s5, v2
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[1:2]
+; GISEL-NEXT: s_and_saveexec_b64 s[36:37], vcc
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: v_mov_b32_e32 v3, v0
+; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2
+; GISEL-NEXT: ; implicit-def: $vgpr0
+; GISEL-NEXT: s_xor_b64 exec, exec, s[36:37]
+; GISEL-NEXT: s_cbranch_execnz BB8_1
+; GISEL-NEXT: ; %bb.2:
+; GISEL-NEXT: s_mov_b64 exec, s[34:35]
+; GISEL-NEXT: v_mov_b32_e32 v0, v3
+; GISEL-NEXT: v_readlane_b32 s4, v40, 4
+; GISEL-NEXT: v_readlane_b32 s5, v40, 5
+; GISEL-NEXT: v_readlane_b32 s37, v40, 3
+; GISEL-NEXT: v_readlane_b32 s36, v40, 2
+; GISEL-NEXT: v_readlane_b32 s35, v40, 1
+; GISEL-NEXT: v_readlane_b32 s34, v40, 0
+; GISEL-NEXT: s_addk_i32 s32, 0xfc00
+; GISEL-NEXT: v_readlane_b32 s33, v40, 6
+; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
+; GISEL-NEXT: s_mov_b64 exec, s[6:7]
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[4:5]
%ret = call amdgpu_gfx i32 %fptr(i32 %i)
ret i32 %ret
}
+
+; Calling a vgpr can never be a tail call.
+define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) {
+; GCN-LABEL: test_indirect_tail_call_vgpr_ptr:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[4:5]
+; GCN-NEXT: v_writelane_b32 v40, s33, 6
+; GCN-NEXT: s_mov_b32 s33, s32
+; GCN-NEXT: s_addk_i32 s32, 0x400
+; GCN-NEXT: v_writelane_b32 v40, s34, 0
+; GCN-NEXT: v_writelane_b32 v40, s35, 1
+; GCN-NEXT: v_writelane_b32 v40, s36, 2
+; GCN-NEXT: v_writelane_b32 v40, s37, 3
+; GCN-NEXT: v_writelane_b32 v40, s30, 4
+; GCN-NEXT: v_writelane_b32 v40, s31, 5
+; GCN-NEXT: s_mov_b64 s[34:35], exec
+; GCN-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT: v_readfirstlane_b32 s4, v0
+; GCN-NEXT: v_readfirstlane_b32 s5, v1
+; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GCN-NEXT: s_and_saveexec_b64 s[36:37], vcc
+; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GCN-NEXT: s_xor_b64 exec, exec, s[36:37]
+; GCN-NEXT: s_cbranch_execnz BB9_1
+; GCN-NEXT: ; %bb.2:
+; GCN-NEXT: s_mov_b64 exec, s[34:35]
+; GCN-NEXT: v_readlane_b32 s4, v40, 4
+; GCN-NEXT: v_readlane_b32 s5, v40, 5
+; GCN-NEXT: v_readlane_b32 s37, v40, 3
+; GCN-NEXT: v_readlane_b32 s36, v40, 2
+; GCN-NEXT: v_readlane_b32 s35, v40, 1
+; GCN-NEXT: v_readlane_b32 s34, v40, 0
+; GCN-NEXT: s_addk_i32 s32, 0xfc00
+; GCN-NEXT: v_readlane_b32 s33, v40, 6
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[4:5]
+;
+; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
+; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
+; GISEL-NEXT: s_mov_b64 exec, s[4:5]
+; GISEL-NEXT: v_writelane_b32 v40, s33, 6
+; GISEL-NEXT: s_mov_b32 s33, s32
+; GISEL-NEXT: s_addk_i32 s32, 0x400
+; GISEL-NEXT: v_writelane_b32 v40, s34, 0
+; GISEL-NEXT: v_writelane_b32 v40, s35, 1
+; GISEL-NEXT: v_writelane_b32 v40, s36, 2
+; GISEL-NEXT: v_writelane_b32 v40, s37, 3
+; GISEL-NEXT: v_writelane_b32 v40, s30, 4
+; GISEL-NEXT: v_writelane_b32 v40, s31, 5
+; GISEL-NEXT: s_mov_b64 s[34:35], exec
+; GISEL-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
+; GISEL-NEXT: v_readfirstlane_b32 s4, v0
+; GISEL-NEXT: v_readfirstlane_b32 s5, v1
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1]
+; GISEL-NEXT: s_and_saveexec_b64 s[36:37], vcc
+; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
+; GISEL-NEXT: s_xor_b64 exec, exec, s[36:37]
+; GISEL-NEXT: s_cbranch_execnz BB9_1
+; GISEL-NEXT: ; %bb.2:
+; GISEL-NEXT: s_mov_b64 exec, s[34:35]
+; GISEL-NEXT: v_readlane_b32 s4, v40, 4
+; GISEL-NEXT: v_readlane_b32 s5, v40, 5
+; GISEL-NEXT: v_readlane_b32 s37, v40, 3
+; GISEL-NEXT: v_readlane_b32 s36, v40, 2
+; GISEL-NEXT: v_readlane_b32 s35, v40, 1
+; GISEL-NEXT: v_readlane_b32 s34, v40, 0
+; GISEL-NEXT: s_addk_i32 s32, 0xfc00
+; GISEL-NEXT: v_readlane_b32 s33, v40, 6
+; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1
+; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
+; GISEL-NEXT: s_mov_b64 exec, s[6:7]
+; GISEL-NEXT: s_waitcnt vmcnt(0)
+; GISEL-NEXT: s_setpc_b64 s[4:5]
+ tail call amdgpu_gfx void %fptr()
+ ret void
+}
More information about the llvm-commits
mailing list