[llvm] 8bde5e5 - Delay outgoing register assignments to last.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 4 12:33:25 PDT 2021
Author: Amara Emerson
Date: 2021-10-04T12:33:20-07:00
New Revision: 8bde5e58c02c14b43904e9c2e08cca1ab20fafe5
URL: https://github.com/llvm/llvm-project/commit/8bde5e58c02c14b43904e9c2e08cca1ab20fafe5
DIFF: https://github.com/llvm/llvm-project/commit/8bde5e58c02c14b43904e9c2e08cca1ab20fafe5.diff
LOG: Delay outgoing register assignments to last.
The delayed stack protector feature which is currently used for SDAG (and thus
allows for more commonly generating tail calls) depends on being able to extract
the tail call into a separate return block. To do this it also has to extract
the vreg->physreg copies that set up the call's arguments, since if it doesn't
then the call inst ends up using undefined physregs in it's new spliced block.
SelectionDAG implementations can do this because they delay emitting register
copies until *after* the stack arguments are set up. GISel however just
processes and emits the arguments in IR order, so stack arguments always end up
last, and thus this breaks the code that looks for any register arg copies that
precede the call instruction.
This patch adds a thunk argument to the assignValueToReg() and custom assignment
hooks. For outgoing arguments, register assignments use this return param to
return a thunk that does the actual generating of the copies. We collect these
until all the outgoing stack assignments have been done and then execute them,
so that the copies (and perhaps some artifacts like G_SEXTs) are placed after
any stores.
Differential Revision: https://reviews.llvm.org/D110610
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
llvm/lib/Target/ARM/ARMCallLowering.cpp
llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp
llvm/lib/Target/M68k/GlSel/M68kCallLowering.h
llvm/lib/Target/Mips/MipsCallLowering.cpp
llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h
llvm/lib/Target/X86/X86CallLowering.cpp
llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll
llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll
llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll
llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll
llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll
llvm/test/CodeGen/AArch64/GlobalISel/legalize-s128-div.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-vfp4.mir
llvm/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll
llvm/test/CodeGen/ARM/GlobalISel/irtranslator-varargs-lowering.ll
llvm/test/CodeGen/Mips/GlobalISel/irtranslator/extend_args.ll
llvm/test/CodeGen/Mips/GlobalISel/irtranslator/float_args.ll
llvm/test/CodeGen/Mips/GlobalISel/irtranslator/stack_args.ll
llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index aa3c824c298f3..9c878d4b087ba 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -262,7 +262,7 @@ class CallLowering {
/// handle the appropriate COPY (either to or from) and mark any
/// relevant uses/defines as needed.
virtual void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) = 0;
+ CCValAssign VA) = 0;
/// The specified value has been assigned to a stack
/// location. Load or store it there, with appropriate extension
@@ -282,11 +282,14 @@ class CallLowering {
}
/// Handle custom values, which may be passed into one or more of \p VAs.
+ /// \p If the handler wants the assignments to be delayed until after
+ /// mem loc assignments, then it sets \p Thunk to the thunk to do the
+ /// assignment.
/// \return The number of \p VAs that have been assigned after the first
/// one, and which should therefore be skipped from further
/// processing.
- virtual unsigned assignCustomValue(ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) {
+ virtual unsigned assignCustomValue(ArgInfo &Arg, ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk = nullptr) {
// This is not a pure virtual method because not all targets need to worry
// about custom values.
llvm_unreachable("Custom values not supported");
@@ -318,7 +321,7 @@ class CallLowering {
/// Provides a default implementation for argument handling.
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override;
+ CCValAssign VA) override;
};
/// Base class for ValueHandlers used for arguments passed to a function call,
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index f6a0439186576..17094a8e44f89 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -617,14 +618,31 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
const unsigned NumArgs = Args.size();
+ // Stores thunks for outgoing register assignments. This is used so we delay
+ // generating register copies until mem loc assignments are done. We do this
+ // so that if the target is using the delayed stack protector feature, we can
+ // find the split point of the block accurately. E.g. if we have:
+ // G_STORE %val, %memloc
+ // $x0 = COPY %foo
+ // $x1 = COPY %bar
+ // CALL func
+ // ... then the split point for the block will correctly be at, and including,
+ // the copy to $x0. If instead the G_STORE instruction immediately precedes
+ // the CALL, then we'd prematurely choose the CALL as the split point, thus
+ // generating a split block with a CALL that uses undefined physregs.
+ SmallVector<std::function<void()>> DelayedOutgoingRegAssignments;
+
for (unsigned i = 0, j = 0; i != NumArgs; ++i, ++j) {
assert(j < ArgLocs.size() && "Skipped too many arg locs");
CCValAssign &VA = ArgLocs[j];
assert(VA.getValNo() == i && "Location doesn't correspond to current arg");
if (VA.needsCustom()) {
- unsigned NumArgRegs =
- Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j));
+ std::function<void()> Thunk;
+ unsigned NumArgRegs = Handler.assignCustomValue(
+ Args[i], makeArrayRef(ArgLocs).slice(j), &Thunk);
+ if (Thunk)
+ DelayedOutgoingRegAssignments.emplace_back(Thunk);
if (!NumArgRegs)
return false;
j += NumArgRegs;
@@ -743,7 +761,13 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
continue;
}
- Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ if (Handler.isIncomingArgumentHandler())
+ Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ else {
+ DelayedOutgoingRegAssignments.emplace_back([=, &Handler]() {
+ Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ });
+ }
}
// Now that all pieces have been assigned, re-pack the register typed values
@@ -757,6 +781,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
j += NumParts - 1;
}
+ for (auto &Fn : DelayedOutgoingRegAssignments)
+ Fn();
return true;
}
@@ -1157,7 +1183,7 @@ static bool isCopyCompatibleType(LLT SrcTy, LLT DstTy) {
void CallLowering::IncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign &VA) {
+ CCValAssign VA) {
const MVT LocVT = VA.getLocVT();
const LLT LocTy(LocVT);
const LLT RegTy = MRI.getType(ValVReg);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 5ad2c6e6e7a08..226ecc18a5864 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -156,7 +156,7 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
@@ -281,7 +281,7 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index ad9a618f5b2c3..9765751260ef9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -60,7 +60,7 @@ struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
// If this is a scalar return, insert a readfirstlane just in case the value
@@ -103,7 +103,7 @@ struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
markPhysRegUsed(PhysReg);
if (VA.getLocVT().getSizeInBits() < 32) {
@@ -203,7 +203,7 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp
index aff7ec8d2ed63..81ec4d09a408d 100644
--- a/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -45,6 +45,7 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
+#include <functional>
#include <utility>
using namespace llvm;
@@ -109,7 +110,7 @@ struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
@@ -130,7 +131,8 @@ struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}
unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) override {
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk) override {
assert(Arg.Regs.size() == 1 && "Can't handle multple regs yet");
CCValAssign VA = VAs[0];
@@ -158,9 +160,15 @@ struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
if (!IsLittle)
std::swap(NewRegs[0], NewRegs[1]);
+ if (Thunk) {
+ *Thunk = [=]() {
+ assignValueToReg(NewRegs[0], VA.getLocReg(), VA);
+ assignValueToReg(NewRegs[1], NextVA.getLocReg(), NextVA);
+ };
+ return 1;
+ }
assignValueToReg(NewRegs[0], VA.getLocReg(), VA);
assignValueToReg(NewRegs[1], NextVA.getLocReg(), NextVA);
-
return 1;
}
@@ -273,7 +281,7 @@ struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
@@ -298,7 +306,8 @@ struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
}
unsigned assignCustomValue(ARMCallLowering::ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) override {
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk) override {
assert(Arg.Regs.size() == 1 && "Can't handle multple regs yet");
CCValAssign VA = VAs[0];
diff --git a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp b/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp
index c5931cbfe04ff..9cd959012e6f9 100644
--- a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp
+++ b/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp
@@ -33,7 +33,7 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
: OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB) {}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
@@ -110,7 +110,7 @@ bool M68kCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
void M68kIncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign &VA) {
+ CCValAssign VA) {
MIRBuilder.getMRI()->addLiveIn(PhysReg);
MIRBuilder.getMBB().addLiveIn(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
diff --git a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h b/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h
index 9e0d462db677f..47cdefdba100a 100644
--- a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h
+++ b/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h
@@ -52,7 +52,7 @@ struct M68kIncomingValueHandler : public CallLowering::IncomingValueHandler {
private:
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override;
+ CCValAssign VA) override;
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override;
diff --git a/llvm/lib/Target/Mips/MipsCallLowering.cpp b/llvm/lib/Target/Mips/MipsCallLowering.cpp
index 5c2549ee176b8..97062cf619a28 100644
--- a/llvm/lib/Target/Mips/MipsCallLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsCallLowering.cpp
@@ -92,7 +92,7 @@ class MipsIncomingValueHandler : public CallLowering::IncomingValueHandler {
private:
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override;
+ CCValAssign VA) override;
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO,
@@ -101,7 +101,8 @@ class MipsIncomingValueHandler : public CallLowering::IncomingValueHandler {
MachinePointerInfo &MPO, CCValAssign &VA) override;
unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) override;
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk = nullptr) override;
virtual void markPhysRegUsed(unsigned PhysReg) {
MIRBuilder.getMRI()->addLiveIn(PhysReg);
@@ -127,7 +128,7 @@ class CallReturnHandler : public MipsIncomingValueHandler {
void MipsIncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign &VA) {
+ CCValAssign VA) {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
@@ -163,7 +164,8 @@ void MipsIncomingValueHandler::assignValueToAddress(Register ValVReg,
/// dependent on other arguments.
unsigned
MipsIncomingValueHandler::assignCustomValue(CallLowering::ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) {
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk) {
const CCValAssign &VALo = VAs[0];
const CCValAssign &VAHi = VAs[1];
@@ -197,7 +199,7 @@ class MipsOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
private:
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override;
+ CCValAssign VA) override;
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO,
@@ -206,7 +208,8 @@ class MipsOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override;
unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) override;
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk) override;
MachineInstrBuilder &MIB;
};
@@ -214,7 +217,7 @@ class MipsOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
void MipsOutgoingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign &VA) {
+ CCValAssign VA) {
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
MIB.addUse(PhysReg, RegState::Implicit);
@@ -253,7 +256,8 @@ void MipsOutgoingValueHandler::assignValueToAddress(Register ValVReg,
unsigned
MipsOutgoingValueHandler::assignCustomValue(CallLowering::ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) {
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk) {
const CCValAssign &VALo = VAs[0];
const CCValAssign &VAHi = VAs[1];
@@ -271,6 +275,15 @@ MipsOutgoingValueHandler::assignCustomValue(CallLowering::ArgInfo &Arg,
if (!STI.isLittle())
std::swap(Lo, Hi);
+ // If we can return a thunk, just include the register copies. The unmerge can
+ // be emitted earlier.
+ if (Thunk) {
+ *Thunk = [=]() {
+ MIRBuilder.buildCopy(VALo.getLocReg(), Lo);
+ MIRBuilder.buildCopy(VAHi.getLocReg(), Hi);
+ };
+ return 2;
+ }
MIRBuilder.buildCopy(VALo.getLocReg(), Lo);
MIRBuilder.buildCopy(VAHi.getLocReg(), Hi);
return 2;
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
index 22731bbd0f829..6b16af2932449 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
@@ -80,7 +80,7 @@ bool PPCCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
void PPCIncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign &VA) {
+ CCValAssign VA) {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h
index b045032bec065..cc2cb7b26e844 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h
+++ b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h
@@ -46,7 +46,7 @@ class PPCIncomingValueHandler : public CallLowering::IncomingValueHandler {
private:
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override;
+ CCValAssign VA) override;
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override;
diff --git a/llvm/lib/Target/X86/X86CallLowering.cpp b/llvm/lib/Target/X86/X86CallLowering.cpp
index c8bffb4d4d371..a14ce82313cb2 100644
--- a/llvm/lib/Target/X86/X86CallLowering.cpp
+++ b/llvm/lib/Target/X86/X86CallLowering.cpp
@@ -105,7 +105,7 @@ struct X86OutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
@@ -195,7 +195,7 @@ struct X86IncomingValueHandler : public CallLowering::IncomingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll
index 1caef7c8e8772..b8b66236d7ea6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll
@@ -17,9 +17,6 @@ define void @test_varargs() {
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
; CHECK-NEXT: ADJCALLSTACKDOWN 40, 0, implicit-def $sp, implicit $sp
- ; CHECK-NEXT: $w0 = COPY [[C]](s32)
- ; CHECK-NEXT: $d0 = COPY [[C1]](s64)
- ; CHECK-NEXT: $x1 = COPY [[C2]](s64)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C3]](s8)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sp
; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
@@ -41,6 +38,9 @@ define void @test_varargs() {
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64)
; CHECK-NEXT: G_STORE [[C7]](s64), [[PTR_ADD4]](p0) :: (store (s64) into stack + 32, align 1)
+ ; CHECK-NEXT: $w0 = COPY [[C]](s32)
+ ; CHECK-NEXT: $d0 = COPY [[C1]](s64)
+ ; CHECK-NEXT: $x1 = COPY [[C2]](s64)
; CHECK-NEXT: BL @varargs, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
; CHECK-NEXT: ADJCALLSTACKUP 40, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: RET_ReallyLR
@@ -66,16 +66,6 @@ define i32 @i8i16caller() nounwind readnone {
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s8) = G_CONSTANT i8 99
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s8) = G_CONSTANT i8 100
; CHECK-NEXT: ADJCALLSTACKDOWN 6, 0, implicit-def $sp, implicit $sp
- ; CHECK-NEXT: $x0 = COPY [[C]](s64)
- ; CHECK-NEXT: $x1 = COPY [[C1]](s64)
- ; CHECK-NEXT: $x2 = COPY [[C2]](s64)
- ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C3]](s8)
- ; CHECK-NEXT: $w3 = COPY [[SEXT]](s32)
- ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[C4]](s16)
- ; CHECK-NEXT: $w4 = COPY [[SEXT1]](s32)
- ; CHECK-NEXT: $x5 = COPY [[C5]](s64)
- ; CHECK-NEXT: $x6 = COPY [[C6]](s64)
- ; CHECK-NEXT: $x7 = COPY [[C7]](s64)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sp
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64)
@@ -89,6 +79,16 @@ define i32 @i8i16caller() nounwind readnone {
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C15]](s64)
; CHECK-NEXT: G_STORE [[C11]](s8), [[PTR_ADD3]](p0) :: (store (s8) into stack + 5)
+ ; CHECK-NEXT: $x0 = COPY [[C]](s64)
+ ; CHECK-NEXT: $x1 = COPY [[C1]](s64)
+ ; CHECK-NEXT: $x2 = COPY [[C2]](s64)
+ ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C3]](s8)
+ ; CHECK-NEXT: $w3 = COPY [[SEXT]](s32)
+ ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[C4]](s16)
+ ; CHECK-NEXT: $w4 = COPY [[SEXT1]](s32)
+ ; CHECK-NEXT: $x5 = COPY [[C5]](s64)
+ ; CHECK-NEXT: $x6 = COPY [[C6]](s64)
+ ; CHECK-NEXT: $x7 = COPY [[C7]](s64)
; CHECK-NEXT: BL @i8i16callee, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3, implicit $w4, implicit $x5, implicit $x6, implicit $x7, implicit-def $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: ADJCALLSTACKUP 6, 0, implicit-def $sp, implicit $sp
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll
index dbe1e36b2a3fb..dc9370cf17ac2 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll
@@ -86,12 +86,12 @@ define void @test_varargs() {
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C3]](s8)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[C4]](s16)
; CHECK-NEXT: $w0 = COPY [[C]](s32)
; CHECK-NEXT: $d0 = COPY [[C1]](s64)
; CHECK-NEXT: $x1 = COPY [[C2]](s64)
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C3]](s8)
; CHECK-NEXT: $w2 = COPY [[ANYEXT]](s32)
- ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[C4]](s16)
; CHECK-NEXT: $w3 = COPY [[ANYEXT1]](s32)
; CHECK-NEXT: $w4 = COPY [[C5]](s32)
; CHECK-NEXT: $s1 = COPY [[C6]](s32)
@@ -114,6 +114,10 @@ define void @test_stack_ext_needed() {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 42
; CHECK-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def $sp, implicit $sp
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sp
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+ ; CHECK-NEXT: G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
; CHECK-NEXT: $x0 = COPY [[DEF]](s64)
; CHECK-NEXT: $x1 = COPY [[DEF]](s64)
; CHECK-NEXT: $x2 = COPY [[DEF]](s64)
@@ -122,10 +126,6 @@ define void @test_stack_ext_needed() {
; CHECK-NEXT: $x5 = COPY [[DEF]](s64)
; CHECK-NEXT: $x6 = COPY [[DEF]](s64)
; CHECK-NEXT: $x7 = COPY [[DEF]](s64)
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sp
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; CHECK-NEXT: G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
; CHECK-NEXT: BL @stack_ext_needed, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7
; CHECK-NEXT: ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: RET_ReallyLR
@@ -162,9 +162,9 @@ define void @caller_s128(i128 *%ptr) {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128) from %ir.ptr)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128)
; CHECK-NEXT: $x0 = COPY [[UV]](s64)
; CHECK-NEXT: $x1 = COPY [[UV1]](s64)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128)
; CHECK-NEXT: $x2 = COPY [[UV2]](s64)
; CHECK-NEXT: $x3 = COPY [[UV3]](s64)
; CHECK-NEXT: $x4 = COPY [[COPY]](p0)
@@ -195,16 +195,6 @@ define i32 @i8i16caller() nounwind readnone {
; CHECK-NEXT: [[C10:%[0-9]+]]:_(s8) = G_CONSTANT i8 99
; CHECK-NEXT: [[C11:%[0-9]+]]:_(s8) = G_CONSTANT i8 100
; CHECK-NEXT: ADJCALLSTACKDOWN 32, 0, implicit-def $sp, implicit $sp
- ; CHECK-NEXT: $x0 = COPY [[C]](s64)
- ; CHECK-NEXT: $x1 = COPY [[C1]](s64)
- ; CHECK-NEXT: $x2 = COPY [[C2]](s64)
- ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C3]](s8)
- ; CHECK-NEXT: $w3 = COPY [[SEXT]](s32)
- ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[C4]](s16)
- ; CHECK-NEXT: $w4 = COPY [[SEXT1]](s32)
- ; CHECK-NEXT: $x5 = COPY [[C5]](s64)
- ; CHECK-NEXT: $x6 = COPY [[C6]](s64)
- ; CHECK-NEXT: $x7 = COPY [[C7]](s64)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sp
; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s64)
@@ -218,6 +208,16 @@ define i32 @i8i16caller() nounwind readnone {
; CHECK-NEXT: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C15]](s64)
; CHECK-NEXT: G_STORE [[C11]](s8), [[PTR_ADD3]](p0) :: (store (s8) into stack + 24)
+ ; CHECK-NEXT: $x0 = COPY [[C]](s64)
+ ; CHECK-NEXT: $x1 = COPY [[C1]](s64)
+ ; CHECK-NEXT: $x2 = COPY [[C2]](s64)
+ ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C3]](s8)
+ ; CHECK-NEXT: $w3 = COPY [[SEXT]](s32)
+ ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[C4]](s16)
+ ; CHECK-NEXT: $w4 = COPY [[SEXT1]](s32)
+ ; CHECK-NEXT: $x5 = COPY [[C5]](s64)
+ ; CHECK-NEXT: $x6 = COPY [[C6]](s64)
+ ; CHECK-NEXT: $x7 = COPY [[C7]](s64)
; CHECK-NEXT: BL @i8i16callee, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $w3, implicit $w4, implicit $x5, implicit $x6, implicit $x7, implicit-def $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0
; CHECK-NEXT: ADJCALLSTACKUP 32, 0, implicit-def $sp, implicit $sp
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll
index 63af1ba433ee9..6360538a28221 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll
@@ -104,6 +104,13 @@ define i32 @test_too_big_stack() {
; DARWIN-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 8
; DARWIN-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
; DARWIN-NEXT: ADJCALLSTACKDOWN 4, 0, implicit-def $sp, implicit $sp
+ ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sp
+ ; DARWIN-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; DARWIN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; DARWIN-NEXT: G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
+ ; DARWIN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; DARWIN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+ ; DARWIN-NEXT: G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 2, align 1)
; DARWIN-NEXT: $x0 = COPY [[DEF]](s64)
; DARWIN-NEXT: $x1 = COPY [[DEF]](s64)
; DARWIN-NEXT: $x2 = COPY [[DEF]](s64)
@@ -112,13 +119,6 @@ define i32 @test_too_big_stack() {
; DARWIN-NEXT: $x5 = COPY [[DEF]](s64)
; DARWIN-NEXT: $x6 = COPY [[DEF]](s64)
; DARWIN-NEXT: $x7 = COPY [[DEF]](s64)
- ; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sp
- ; DARWIN-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; DARWIN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; DARWIN-NEXT: G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
- ; DARWIN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; DARWIN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
- ; DARWIN-NEXT: G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 2, align 1)
; DARWIN-NEXT: BL @too_big_stack, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit-def $w0
; DARWIN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
; DARWIN-NEXT: ADJCALLSTACKUP 4, 0, implicit-def $sp, implicit $sp
@@ -130,6 +130,13 @@ define i32 @test_too_big_stack() {
; WINDOWS-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 8
; WINDOWS-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
; WINDOWS-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
+ ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sp
+ ; WINDOWS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; WINDOWS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+ ; WINDOWS-NEXT: G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
+ ; WINDOWS-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; WINDOWS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+ ; WINDOWS-NEXT: G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 8, align 1)
; WINDOWS-NEXT: $x0 = COPY [[DEF]](s64)
; WINDOWS-NEXT: $x1 = COPY [[DEF]](s64)
; WINDOWS-NEXT: $x2 = COPY [[DEF]](s64)
@@ -138,13 +145,6 @@ define i32 @test_too_big_stack() {
; WINDOWS-NEXT: $x5 = COPY [[DEF]](s64)
; WINDOWS-NEXT: $x6 = COPY [[DEF]](s64)
; WINDOWS-NEXT: $x7 = COPY [[DEF]](s64)
- ; WINDOWS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sp
- ; WINDOWS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; WINDOWS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; WINDOWS-NEXT: G_STORE [[C]](s8), [[PTR_ADD]](p0) :: (store (s8) into stack)
- ; WINDOWS-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
- ; WINDOWS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
- ; WINDOWS-NEXT: G_STORE [[C1]](s16), [[PTR_ADD1]](p0) :: (store (s16) into stack + 8, align 1)
; WINDOWS-NEXT: BL @too_big_stack, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit $x4, implicit $x5, implicit $x6, implicit $x7, implicit-def $w0
; WINDOWS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
; WINDOWS-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
@@ -206,13 +206,13 @@ define void @test_varargs_2() {
; DARWIN-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; DARWIN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
; DARWIN-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def $sp, implicit $sp
- ; DARWIN-NEXT: $w0 = COPY [[C]](s32)
- ; DARWIN-NEXT: $d0 = COPY [[C1]](s64)
- ; DARWIN-NEXT: $x1 = COPY [[C2]](s64)
; DARWIN-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sp
; DARWIN-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; DARWIN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; DARWIN-NEXT: G_STORE [[C3]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 1)
+ ; DARWIN-NEXT: $w0 = COPY [[C]](s32)
+ ; DARWIN-NEXT: $d0 = COPY [[C1]](s64)
+ ; DARWIN-NEXT: $x1 = COPY [[C2]](s64)
; DARWIN-NEXT: BL @varargs, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
; DARWIN-NEXT: ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp
; DARWIN-NEXT: RET_ReallyLR
@@ -255,13 +255,13 @@ define void @test_varargs_3([8 x <2 x double>], <4 x half> %arg) {
; DARWIN-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
; DARWIN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
; DARWIN-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def $sp, implicit $sp
- ; DARWIN-NEXT: $w0 = COPY [[C]](s32)
- ; DARWIN-NEXT: $d0 = COPY [[C1]](s64)
- ; DARWIN-NEXT: $x1 = COPY [[C2]](s64)
; DARWIN-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY $sp
; DARWIN-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; DARWIN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C4]](s64)
; DARWIN-NEXT: G_STORE [[C3]](s64), [[PTR_ADD]](p0) :: (store (s64) into stack, align 1)
+ ; DARWIN-NEXT: $w0 = COPY [[C]](s32)
+ ; DARWIN-NEXT: $d0 = COPY [[C1]](s64)
+ ; DARWIN-NEXT: $x1 = COPY [[C2]](s64)
; DARWIN-NEXT: BL @varargs, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
; DARWIN-NEXT: ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp
; DARWIN-NEXT: RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll
index 034018b34aab6..d2989a852a032 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll
@@ -26,9 +26,9 @@ define i32 @call_use_s128(i32 %p1, i128 %p2, i32 %p3, i32 %p4, i32 %p5, i128 %p6
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV]](s128)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV1]](s128)
; CHECK-NEXT: $x0 = COPY [[UV]](s64)
; CHECK-NEXT: $x1 = COPY [[UV1]](s64)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV1]](s128)
; CHECK-NEXT: $x2 = COPY [[UV2]](s64)
; CHECK-NEXT: $x3 = COPY [[UV3]](s64)
; CHECK-NEXT: BL @use_s128, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll
index d314081f07527..ee9a938bd8661 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll
@@ -101,7 +101,6 @@ define void @test_invoke_varargs() personality i8* bitcast (i32 (...)* @__gxx_pe
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
; CHECK-NEXT: EH_LABEL <mcsymbol >
; CHECK-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
- ; CHECK-NEXT: $x0 = COPY [[C]](p0)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $sp
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
@@ -110,6 +109,7 @@ define void @test_invoke_varargs() personality i8* bitcast (i32 (...)* @__gxx_pe
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
; CHECK-NEXT: G_STORE [[C2]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 8, align 1)
+ ; CHECK-NEXT: $x0 = COPY [[C]](p0)
; CHECK-NEXT: BL @printf, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0
; CHECK-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: EH_LABEL <mcsymbol >
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-s128-div.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-s128-div.mir
index 8b0de6dfe4271..88d9680a0abf6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-s128-div.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-s128-div.mir
@@ -30,9 +30,9 @@ body: |
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.v2ptr)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD1]](s128)
; CHECK-NEXT: $x0 = COPY [[UV]](s64)
; CHECK-NEXT: $x1 = COPY [[UV1]](s64)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD1]](s128)
; CHECK-NEXT: $x2 = COPY [[UV2]](s64)
; CHECK-NEXT: $x3 = COPY [[UV3]](s64)
; CHECK-NEXT: BL &__udivti3, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit-def $x0, implicit-def $x1
@@ -72,9 +72,9 @@ body: |
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128) from %ir.v2ptr)
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](s128)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD1]](s128)
; CHECK-NEXT: $x0 = COPY [[UV]](s64)
; CHECK-NEXT: $x1 = COPY [[UV1]](s64)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD1]](s128)
; CHECK-NEXT: $x2 = COPY [[UV2]](s64)
; CHECK-NEXT: $x3 = COPY [[UV3]](s64)
; CHECK-NEXT: BL &__divti3, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3, implicit-def $x0, implicit-def $x1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
index 72fbdf7fbb75f..6ce22ac206a0c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
@@ -237,6 +237,10 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
; GFX900-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GFX900-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
+ ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
+ ; GFX900-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX900-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
+ ; GFX900-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
; GFX900-NEXT: $vgpr0 = COPY [[UV]](s32)
; GFX900-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX900-NEXT: $vgpr2 = COPY [[UV2]](s32)
@@ -268,10 +272,6 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-NEXT: $vgpr28 = COPY [[UV28]](s32)
; GFX900-NEXT: $vgpr29 = COPY [[UV29]](s32)
; GFX900-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; GFX900-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
- ; GFX900-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX900-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
- ; GFX900-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
; GFX900-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -323,6 +323,10 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX908-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
; GFX908-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; GFX908-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
+ ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
+ ; GFX908-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX908-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
+ ; GFX908-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
; GFX908-NEXT: $vgpr0 = COPY [[UV]](s32)
; GFX908-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX908-NEXT: $vgpr2 = COPY [[UV2]](s32)
@@ -354,10 +358,6 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX908-NEXT: $vgpr28 = COPY [[UV28]](s32)
; GFX908-NEXT: $vgpr29 = COPY [[UV29]](s32)
; GFX908-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; GFX908-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
- ; GFX908-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX908-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
- ; GFX908-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
; GFX908-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -453,6 +453,10 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; GFX900-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX900-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
+ ; GFX900-NEXT: [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32
+ ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32)
+ ; GFX900-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
; GFX900-NEXT: $vgpr0 = COPY [[UV]](s32)
; GFX900-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX900-NEXT: $vgpr2 = COPY [[UV2]](s32)
@@ -484,10 +488,6 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX900-NEXT: $vgpr28 = COPY [[UV28]](s32)
; GFX900-NEXT: $vgpr29 = COPY [[UV29]](s32)
; GFX900-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; GFX900-NEXT: [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32
- ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32)
- ; GFX900-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
; GFX900-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>)
; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4)
@@ -579,6 +579,10 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX908-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; GFX908-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GFX908-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>)
+ ; GFX908-NEXT: [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32
+ ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32)
+ ; GFX908-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
; GFX908-NEXT: $vgpr0 = COPY [[UV]](s32)
; GFX908-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX908-NEXT: $vgpr2 = COPY [[UV2]](s32)
@@ -610,10 +614,6 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 {
; GFX908-NEXT: $vgpr28 = COPY [[UV28]](s32)
; GFX908-NEXT: $vgpr29 = COPY [[UV29]](s32)
; GFX908-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; GFX908-NEXT: [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32
- ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32)
- ; GFX908-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
; GFX908-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>)
; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 16c56eaf9a380..375c78ad0b439 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -2869,8 +2869,8 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(<33 x i32>
; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5)
; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1)
+ ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX]](p5)
; GCN-NEXT: $vgpr1 = COPY [[UV]](s32)
; GCN-NEXT: $vgpr2 = COPY [[UV1]](s32)
; GCN-NEXT: $vgpr3 = COPY [[LOAD1]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
index 56e996f6c0a76..b3b5f4f50331b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
@@ -49,12 +49,12 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C5]](s32)
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX1]](p5)
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg
; GCN-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C6]](s32)
; GCN-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; GCN-NEXT: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.in.val, align 4, addrspace 5)
+ ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX1]](p5)
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>)
; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index bd7275c361b0e..81d21c03c943b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -2465,6 +2465,10 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<63 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<126 x s16>) = G_CONCAT_VECTORS [[LOAD]](<63 x s16>), [[DEF1]](<63 x s16>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>), [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>), [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>), [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>), [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<126 x s16>)
+ ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
+ ; CHECK-NEXT: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>)
@@ -2496,10 +2500,6 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](<2 x s16>)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](<2 x s16>)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](<2 x s16>)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
- ; CHECK-NEXT: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -2558,6 +2558,13 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<65 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[LOAD]](<65 x s16>), [[DEF1]](<65 x s16>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>), [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>), [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>), [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>), [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>)
+ ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
+ ; CHECK-NEXT: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
+ ; CHECK-NEXT: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>)
@@ -2589,13 +2596,6 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](<2 x s16>)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](<2 x s16>)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](<2 x s16>)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
- ; CHECK-NEXT: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
- ; CHECK-NEXT: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5)
; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -2652,6 +2652,13 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<66 x s16>)
+ ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
+ ; CHECK-NEXT: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
+ ; CHECK-NEXT: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>)
@@ -2683,13 +2690,6 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](<2 x s16>)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](<2 x s16>)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](<2 x s16>)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
- ; CHECK-NEXT: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5)
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
- ; CHECK-NEXT: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5)
; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -3439,6 +3439,10 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
+ ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
+ ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
@@ -3470,10 +3474,6 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
- ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -3535,6 +3535,13 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
+ ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
+ ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
+ ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
@@ -3566,13 +3573,6 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32)
- ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
- ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5)
; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -3636,6 +3636,21 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
+ ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C3]](s32)
+ ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD2]](s8)
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C4]](s32)
+ ; CHECK-NEXT: G_STORE [[ANYEXT]](s16), [[PTR_ADD2]](p5) :: (store (s16) into stack + 4, align 4, addrspace 5)
+ ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16)
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C5]](s32)
+ ; CHECK-NEXT: G_STORE [[COPY22]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5)
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C6]](s32)
+ ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
@@ -3667,21 +3682,6 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C3]](s32)
- ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD2]](s8)
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C4]](s32)
- ; CHECK-NEXT: G_STORE [[ANYEXT]](s16), [[PTR_ADD2]](p5) :: (store (s16) into stack + 4, align 4, addrspace 5)
- ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16)
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C5]](s32)
- ; CHECK-NEXT: G_STORE [[COPY22]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5)
- ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
- ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C6]](s32)
- ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5)
; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY23]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
@@ -3747,6 +3747,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
+ ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C3]](s32)
+ ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C4]](s32)
+ ; CHECK-NEXT: G_STORE [[LOAD2]](p3), [[PTR_ADD2]](p5) :: (store (p3) into stack + 4, addrspace 5)
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C5]](s32)
+ ; CHECK-NEXT: G_STORE [[LOAD3]](p5), [[PTR_ADD3]](p5) :: (store (p5) into stack + 8, align 8, addrspace 5)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
@@ -3778,16 +3788,6 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C3]](s32)
- ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5)
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C4]](s32)
- ; CHECK-NEXT: G_STORE [[LOAD2]](p3), [[PTR_ADD2]](p5) :: (store (p3) into stack + 4, addrspace 5)
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C5]](s32)
- ; CHECK-NEXT: G_STORE [[LOAD3]](p5), [[PTR_ADD3]](p5) :: (store (p5) into stack + 8, align 8, addrspace 5)
; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
@@ -4541,6 +4541,17 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C3]](s32)
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<32 x s32>)
+ ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
+ ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+ ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](s64)
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C5]](s32)
+ ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 4, addrspace 5)
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C6]](s32)
+ ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
@@ -4572,17 +4583,6 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32)
- ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack, align 16, addrspace 5)
- ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](s64)
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C5]](s32)
- ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 4, addrspace 5)
- ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C6]](s32)
- ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -4654,64 +4654,64 @@ define void @stack_12xv3i32() #0 {
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
+ ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>)
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>)
+ ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<3 x s32>)
+ ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<3 x s32>)
+ ; CHECK-NEXT: [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<3 x s32>)
+ ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<3 x s32>)
+ ; CHECK-NEXT: [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<3 x s32>)
+ ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>)
+ ; CHECK-NEXT: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>)
+ ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>)
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32
+ ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32)
+ ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+ ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32)
+ ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
+ ; CHECK-NEXT: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>)
+ ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32)
+ ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
+ ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32)
+ ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5)
+ ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32)
+ ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>)
; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32)
; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>)
; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32)
; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32)
; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32)
- ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<3 x s32>)
; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32)
; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32)
; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32)
- ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<3 x s32>)
; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32)
; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32)
; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32)
- ; CHECK-NEXT: [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<3 x s32>)
; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32)
; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32)
; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32)
- ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<3 x s32>)
; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32)
; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32)
; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32)
- ; CHECK-NEXT: [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<3 x s32>)
; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32)
; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32)
; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32)
- ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>)
; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32)
; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32)
; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32)
- ; CHECK-NEXT: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>)
; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32)
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
- ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32
- ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32)
- ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
- ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32)
- ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
- ; CHECK-NEXT: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>)
- ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32)
- ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
- ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
- ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32)
- ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5)
- ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32)
- ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
@@ -4796,64 +4796,64 @@ define void @stack_12xv3f32() #0 {
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
+ ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>)
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>)
+ ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<3 x s32>)
+ ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<3 x s32>)
+ ; CHECK-NEXT: [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<3 x s32>)
+ ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<3 x s32>)
+ ; CHECK-NEXT: [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<3 x s32>)
+ ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>)
+ ; CHECK-NEXT: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>)
+ ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>)
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32
+ ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32)
+ ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+ ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32)
+ ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
+ ; CHECK-NEXT: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>)
+ ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32)
+ ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
+ ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32)
+ ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5)
+ ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32)
+ ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>)
; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32)
; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>)
; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32)
; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32)
; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32)
- ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<3 x s32>)
; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32)
; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32)
; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32)
- ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<3 x s32>)
; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32)
; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32)
; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32)
- ; CHECK-NEXT: [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<3 x s32>)
; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32)
; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32)
; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32)
- ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<3 x s32>)
; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32)
; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32)
; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32)
- ; CHECK-NEXT: [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<3 x s32>)
; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32)
; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32)
; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32)
- ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>)
; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32)
; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32)
; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32)
- ; CHECK-NEXT: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>)
; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32)
; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
- ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>)
; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
- ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32
- ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32)
- ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
- ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32)
- ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
- ; CHECK-NEXT: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>)
- ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32)
- ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
- ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
- ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32)
- ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5)
- ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32)
- ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
@@ -4934,43 +4934,12 @@ define void @stack_8xv5i32() #0 {
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32)
; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>)
- ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32)
- ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32)
- ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32)
- ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32)
; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>)
- ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32)
- ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32)
- ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32)
- ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32)
- ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32)
; CHECK-NEXT: [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<5 x s32>)
- ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32)
- ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32)
- ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32)
- ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32)
- ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32)
; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>)
- ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32)
- ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32)
- ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32)
- ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32)
- ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32)
; CHECK-NEXT: [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>)
- ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32)
- ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32)
- ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32)
- ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
- ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>)
- ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32
; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32)
@@ -5000,6 +4969,37 @@ define void @stack_8xv5i32() #0 {
; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32)
; CHECK-NEXT: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5)
+ ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32)
+ ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32)
+ ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32)
+ ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32)
+ ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32)
+ ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32)
+ ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32)
+ ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32)
+ ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32)
+ ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32)
+ ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32)
+ ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32)
+ ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32)
+ ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32)
+ ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32)
+ ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32)
+ ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32)
+ ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32)
+ ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32)
+ ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32)
+ ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32)
+ ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32)
+ ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32)
+ ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32)
+ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
+ ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
+ ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
@@ -5076,43 +5076,12 @@ define void @stack_8xv5f32() #0 {
; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32)
; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>)
- ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32)
- ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32)
- ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32)
- ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32)
; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>)
- ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32)
- ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32)
- ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32)
- ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32)
- ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32)
; CHECK-NEXT: [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR3]](<5 x s32>)
- ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32)
- ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32)
- ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32)
- ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32)
- ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32)
; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>)
- ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32)
- ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32)
- ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32)
- ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32)
- ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32)
; CHECK-NEXT: [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>)
- ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32)
- ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32)
- ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32)
- ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
- ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>)
- ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32
; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32)
@@ -5142,6 +5111,37 @@ define void @stack_8xv5f32() #0 {
; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32)
; CHECK-NEXT: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5)
+ ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32)
+ ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32)
+ ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32)
+ ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32)
+ ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32)
+ ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32)
+ ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32)
+ ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32)
+ ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32)
+ ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32)
+ ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32)
+ ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32)
+ ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32)
+ ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32)
+ ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32)
+ ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32)
+ ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32)
+ ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32)
+ ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32)
+ ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32)
+ ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32)
+ ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32)
+ ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32)
+ ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32)
+ ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32)
+ ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32)
+ ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32)
; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index 7732958191914..51a214550efe2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -200,12 +200,12 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i3
; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32
- ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32)
; GCN-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY $sgpr32
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY4]], [[C]](s32)
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; GCN-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY1]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5)
+ ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32)
; GCN-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>)
; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
@@ -266,10 +266,10 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %lar
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GCN-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[C]](s32)
; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32
- ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32)
; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; GCN-NEXT: G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (s32) from `i32 addrspace(5)* inttoptr (i32 16 to i32 addrspace(5)*)`, align 16, addrspace 5)
+ ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32)
; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_byval_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
@@ -379,6 +379,12 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x
; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5)
; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
+ ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+ ; GCN-NEXT: G_STORE [[LOAD]](s32), [[FRAME_INDEX3]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
+ ; GCN-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+ ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
+ ; GCN-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+ ; GCN-NEXT: G_STORE [[LOAD2]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32)
; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32)
; GCN-NEXT: $vgpr2 = COPY [[COPY2]](s32)
@@ -410,12 +416,6 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x
; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32)
; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32)
; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32)
- ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
- ; GCN-NEXT: G_STORE [[LOAD]](s32), [[FRAME_INDEX3]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
- ; GCN-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
- ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
- ; GCN-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
- ; GCN-NEXT: G_STORE [[LOAD2]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
@@ -473,6 +473,12 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32)
; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
+ ; GCN-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+ ; GCN-NEXT: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
+ ; GCN-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+ ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
+ ; GCN-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+ ; GCN-NEXT: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32)
; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32)
; GCN-NEXT: $vgpr2 = COPY [[COPY2]](s32)
@@ -504,12 +510,6 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i
; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32)
; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32)
; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32)
- ; GCN-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
- ; GCN-NEXT: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
- ; GCN-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
- ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
- ; GCN-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
- ; GCN-NEXT: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
@@ -535,6 +535,16 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
+ ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(p5) = COPY $sgpr32
+ ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C1]](s32)
+ ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
+ ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C2]](s32)
+ ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
+ ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C3]](s32)
+ ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32)
; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32)
; GCN-NEXT: $vgpr2 = COPY [[C]](s32)
@@ -566,16 +576,6 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
; GCN-NEXT: $vgpr28 = COPY [[C]](s32)
; GCN-NEXT: $vgpr29 = COPY [[C]](s32)
; GCN-NEXT: $vgpr30 = COPY [[C]](s32)
- ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(p5) = COPY $sgpr32
- ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C1]](s32)
- ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5)
- ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
- ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C2]](s32)
- ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5)
- ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C3]](s32)
- ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5)
; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>)
; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
@@ -672,6 +672,12 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i3
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32)
; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
+ ; GCN-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+ ; GCN-NEXT: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
+ ; GCN-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+ ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
+ ; GCN-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+ ; GCN-NEXT: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32)
; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32)
; GCN-NEXT: $vgpr2 = COPY [[COPY2]](s32)
@@ -703,12 +709,6 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i3
; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32)
; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32)
; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32)
- ; GCN-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
- ; GCN-NEXT: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
- ; GCN-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
- ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
- ; GCN-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
- ; GCN-NEXT: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
@@ -778,6 +778,12 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX7]], [[C2]](s32)
; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5)
; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32
+ ; GCN-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+ ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX8]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
+ ; GCN-NEXT: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+ ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX9]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
+ ; GCN-NEXT: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+ ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX10]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32)
; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32)
; GCN-NEXT: $vgpr2 = COPY [[C1]](s32)
@@ -809,12 +815,6 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
; GCN-NEXT: $vgpr28 = COPY [[C1]](s32)
; GCN-NEXT: $vgpr29 = COPY [[C1]](s32)
; GCN-NEXT: $vgpr30 = COPY [[C1]](s32)
- ; GCN-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
- ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX8]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5)
- ; GCN-NEXT: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
- ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX9]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
- ; GCN-NEXT: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
- ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX10]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5)
; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>)
; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
@@ -964,13 +964,13 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
- ; GCN-NEXT: $vgpr0 = COPY [[COPY8]](s32)
; GCN-NEXT: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; GCN-NEXT: G_MEMCPY [[FRAME_INDEX36]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.1, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca0, align 16, addrspace 5)
; GCN-NEXT: [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GCN-NEXT: G_MEMCPY [[FRAME_INDEX37]](p5), [[FRAME_INDEX35]](p5), [[C5]](s32), 0 :: (dereferenceable store (s128) into %fixed-stack.0, addrspace 5), (dereferenceable load (s128) from %ir.alloca1, align 8, addrspace 5)
+ ; GCN-NEXT: $vgpr0 = COPY [[COPY8]](s32)
; GCN-NEXT: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>)
; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4)
@@ -1129,6 +1129,10 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
; GCN-NEXT: [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; GCN-NEXT: G_MEMCPY [[FRAME_INDEX35]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.2, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca, align 16, addrspace 5)
+ ; GCN-NEXT: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+ ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX36]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
+ ; GCN-NEXT: [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+ ; GCN-NEXT: G_STORE [[COPY8]](s32), [[FRAME_INDEX37]](p5) :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
; GCN-NEXT: $vgpr0 = COPY [[C1]](s32)
; GCN-NEXT: $vgpr1 = COPY [[C1]](s32)
; GCN-NEXT: $vgpr2 = COPY [[C1]](s32)
@@ -1160,10 +1164,6 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
; GCN-NEXT: $vgpr28 = COPY [[C1]](s32)
; GCN-NEXT: $vgpr29 = COPY [[C1]](s32)
; GCN-NEXT: $vgpr30 = COPY [[C1]](s32)
- ; GCN-NEXT: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
- ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX36]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5)
- ; GCN-NEXT: [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
- ; GCN-NEXT: G_STORE [[COPY8]](s32), [[FRAME_INDEX37]](p5) :: (store (s32) into %fixed-stack.0, align 16, addrspace 5)
; GCN-NEXT: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>)
; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4)
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-vfp4.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-vfp4.mir
index 1613f0ff49a61..f721e7f3da128 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-vfp4.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-vfp4.mir
@@ -88,21 +88,21 @@ body: |
; HARD: [[R:%[0-9]+]]:_(s64) = G_FMA [[X]], [[X]], [[Y]]
; SOFT-NOT: G_FMA
; SOFT: ADJCALLSTACKDOWN
- ; SOFT-ABI-DAG: $r{{[0-1]}} = COPY [[X0]]
- ; SOFT-ABI-DAG: $r{{[0-1]}} = COPY [[X1]]
- ; SOFT-ABI-DAG: $r{{[2-3]}} = COPY [[X0]]
- ; SOFT-ABI-DAG: $r{{[2-3]}} = COPY [[X1]]
; SOFT-ABI: [[SP1:%[0-9]+]]:_(p0) = COPY $sp
; SOFT-ABI: [[OFF1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; SOFT-ABI: [[FI1:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP1]], [[OFF1]](s32)
-
- ; FIXME: Should avoid multiple copies from $sp
- ; FIXME: This ought to be align 8 but ARM's call lowering hardcodes it to 1
; SOFT-ABI: G_STORE [[Y0]](s32), [[FI1]](p0){{.*}}store (s32) into stack, align 1)
; SOFT-ABI: [[SP2:%[0-9]+]]:_(p0) = COPY $sp
+ ; FIXME: Should avoid multiple copies from $sp
+ ; FIXME: This ought to be align 8 but ARM's call lowering hardcodes it to 1
; SOFT-ABI: [[OFF2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; SOFT-ABI: [[FI2:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP2]], [[OFF2]](s32)
; SOFT-ABI: G_STORE [[Y1]](s32), [[FI2]](p0){{.*}}store (s32) into stack + 4, align 1)
+ ; SOFT-ABI-DAG: $r{{[0-1]}} = COPY [[X0]]
+ ; SOFT-ABI-DAG: $r{{[0-1]}} = COPY [[X1]]
+ ; SOFT-ABI-DAG: $r{{[2-3]}} = COPY [[X0]]
+ ; SOFT-ABI-DAG: $r{{[2-3]}} = COPY [[X1]]
+
; SOFT-ABI: BL &fma, {{.*}}, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1
; SOFT-ABI-DAG: [[R0:%[0-9]+]]:_(s32) = COPY $r0
; SOFT-ABI-DAG: [[R1:%[0-9]+]]:_(s32) = COPY $r1
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll b/llvm/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll
index d2e73235b0e4c..f8d83ba9a2165 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll
@@ -30,10 +30,6 @@ define arm_aapcscc i32* @test_call_simple_stack_params(i32 *%a, i32 %b) {
; CHECK-DAG: [[AVREG:%[0-9]+]]:_(p0) = COPY $r0
; CHECK-DAG: [[BVREG:%[0-9]+]]:_(s32) = COPY $r1
; CHECK: ADJCALLSTACKDOWN 8, 0, 14 /* CC::al */, $noreg, implicit-def $sp, implicit $sp
-; CHECK-DAG: $r0 = COPY [[BVREG]]
-; CHECK-DAG: $r1 = COPY [[AVREG]]
-; CHECK-DAG: $r2 = COPY [[BVREG]]
-; CHECK-DAG: $r3 = COPY [[AVREG]]
; CHECK: [[SP1:%[0-9]+]]:_(p0) = COPY $sp
; CHECK: [[OFF1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[FI1:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP1]], [[OFF1]](s32)
@@ -42,6 +38,10 @@ define arm_aapcscc i32* @test_call_simple_stack_params(i32 *%a, i32 %b) {
; CHECK: [[OFF2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK: [[FI2:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP2]], [[OFF2]](s32)
; CHECK: G_STORE [[AVREG]](p0), [[FI2]](p0){{.*}}store (p0)
+; CHECK-DAG: $r0 = COPY [[BVREG]]
+; CHECK-DAG: $r1 = COPY [[AVREG]]
+; CHECK-DAG: $r2 = COPY [[BVREG]]
+; CHECK-DAG: $r3 = COPY [[AVREG]]
; ARM: BL @simple_stack_params_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0
; THUMB: tBL 14 /* CC::al */, $noreg, @simple_stack_params_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0
; CHECK: [[RVREG:%[0-9]+]]:_(p0) = COPY $r0
@@ -66,13 +66,9 @@ define arm_aapcscc signext i16 @test_call_ext_params(i8 %a, i16 %b, i1 %c) {
; CHECK-DAG: [[CVREG:%[0-9]+]]:_(s1) = G_TRUNC [[R2VREG]]
; CHECK: ADJCALLSTACKDOWN 20, 0, 14 /* CC::al */, $noreg, implicit-def $sp, implicit $sp
; CHECK: [[SEXTA:%[0-9]+]]:_(s32) = G_SEXT [[AVREG]](s8)
-; CHECK: $r0 = COPY [[SEXTA]]
; CHECK: [[ZEXTA:%[0-9]+]]:_(s32) = G_ZEXT [[AVREG]](s8)
-; CHECK: $r1 = COPY [[ZEXTA]]
; CHECK: [[SEXTB:%[0-9]+]]:_(s32) = G_SEXT [[BVREG]](s16)
-; CHECK: $r2 = COPY [[SEXTB]]
; CHECK: [[ZEXTB:%[0-9]+]]:_(s32) = G_ZEXT [[BVREG]](s16)
-; CHECK: $r3 = COPY [[ZEXTB]]
; CHECK: [[SEXTA2:%[0-9]+]]:_(s32) = G_SEXT [[AVREG]]
; CHECK: [[SP1:%[0-9]+]]:_(p0) = COPY $sp
; CHECK: [[OFF1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -98,6 +94,10 @@ define arm_aapcscc signext i16 @test_call_ext_params(i8 %a, i16 %b, i1 %c) {
; CHECK: [[OFF5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK: [[FI5:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP5]], [[OFF5]](s32)
; CHECK: G_STORE [[ZEXTC]](s32), [[FI5]](p0){{.*}}store (s32)
+; CHECK: $r0 = COPY [[SEXTA]]
+; CHECK: $r1 = COPY [[ZEXTA]]
+; CHECK: $r2 = COPY [[SEXTB]]
+; CHECK: $r3 = COPY [[ZEXTB]]
; ARM: BL @ext_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0
; THUMB: tBL 14 /* CC::al */, $noreg, @ext_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0
; CHECK: [[R0VREG:%[0-9]+]]:_(s32) = COPY $r0
@@ -144,12 +144,7 @@ define arm_aapcscc double @test_call_aapcs_fp_params(double %a, float %b) {
; BIG-DAG: [[AVREG:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[A2]](s32), [[A1]](s32)
; CHECK-DAG: [[BVREG:%[0-9]+]]:_(s32) = COPY $r2
; CHECK: ADJCALLSTACKDOWN 16, 0, 14 /* CC::al */, $noreg, implicit-def $sp, implicit $sp
-; CHECK-DAG: $r0 = COPY [[BVREG]]
; CHECK-DAG: [[A1:%[0-9]+]]:_(s32), [[A2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AVREG]](s64)
-; LITTLE-DAG: $r2 = COPY [[A1]]
-; LITTLE-DAG: $r3 = COPY [[A2]]
-; BIG-DAG: $r2 = COPY [[A2]]
-; BIG-DAG: $r3 = COPY [[A1]]
; CHECK: [[SP1:%[0-9]+]]:_(p0) = COPY $sp
; CHECK: [[OFF1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[FI1:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP1]], [[OFF1]](s32)
@@ -158,6 +153,11 @@ define arm_aapcscc double @test_call_aapcs_fp_params(double %a, float %b) {
; CHECK: [[OFF2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK: [[FI2:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP2]], [[OFF2]](s32)
; CHECK: G_STORE [[AVREG]](s64), [[FI2]](p0){{.*}}store (s64)
+; CHECK-DAG: $r0 = COPY [[BVREG]]
+; LITTLE-DAG: $r2 = COPY [[A1]]
+; LITTLE-DAG: $r3 = COPY [[A2]]
+; BIG-DAG: $r2 = COPY [[A2]]
+; BIG-DAG: $r3 = COPY [[A1]]
; ARM: BL @aapcscc_fp_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1
; THUMB: tBL 14 /* CC::al */, $noreg, @aapcscc_fp_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1
; CHECK-DAG: [[R1:%[0-9]+]]:_(s32) = COPY $r0
@@ -268,10 +268,6 @@ define arm_aapcscc void @test_large_int_arrays([20 x i32] %arr) {
; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]]
; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load (s32) from %fixed-stack.[[LAST_STACK_ID]]
; CHECK: ADJCALLSTACKDOWN 64, 0, 14 /* CC::al */, $noreg, implicit-def $sp, implicit $sp
-; CHECK: $r0 = COPY [[R0]]
-; CHECK: $r1 = COPY [[R1]]
-; CHECK: $r2 = COPY [[R2]]
-; CHECK: $r3 = COPY [[R3]]
; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp
; CHECK: [[OFF_FIRST_ELEMENT:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[FIRST_STACK_ARG_ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[OFF_FIRST_ELEMENT]](s32)
@@ -282,6 +278,10 @@ define arm_aapcscc void @test_large_int_arrays([20 x i32] %arr) {
; CHECK: [[OFF_LAST_ELEMENT:%[0-9]+]]:_(s32) = G_CONSTANT i32 60
; CHECK: [[LAST_STACK_ARG_ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[OFF_LAST_ELEMENT]](s32)
; CHECK: G_STORE [[LAST_STACK_ELEMENT]](s32), [[LAST_STACK_ARG_ADDR]]{{.*}}store (s32)
+; CHECK: $r0 = COPY [[R0]]
+; CHECK: $r1 = COPY [[R1]]
+; CHECK: $r2 = COPY [[R2]]
+; CHECK: $r3 = COPY [[R3]]
; ARM: BL @large_int_arrays_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3
; THUMB: tBL 14 /* CC::al */, $noreg, @large_int_arrays_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3
; CHECK: ADJCALLSTACKUP 64, 0, 14 /* CC::al */, $noreg, implicit-def $sp, implicit $sp
@@ -311,19 +311,19 @@ define arm_aapcscc [2 x float] @test_fp_arrays_aapcs([3 x double] %arr) {
; CHECK: [[ARR2:%[0-9]+]]:_(s64) = G_LOAD [[ARR2_FI]]{{.*}}load (s64) from %fixed-stack.[[ARR2_ID]]
; CHECK: ADJCALLSTACKDOWN 8, 0, 14 /* CC::al */, $noreg, implicit-def $sp, implicit $sp
; CHECK: [[ARR0_0:%[0-9]+]]:_(s32), [[ARR0_1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARR0]](s64)
+; CHECK: [[ARR1_0:%[0-9]+]]:_(s32), [[ARR1_1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARR1]](s64)
+; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp
+; CHECK: [[ARR2_OFFSET:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+; CHECK: [[ARR2_ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[ARR2_OFFSET]](s32)
+; CHECK: G_STORE [[ARR2]](s64), [[ARR2_ADDR]](p0){{.*}}store (s64)
; LITTLE: $r0 = COPY [[ARR0_0]](s32)
; LITTLE: $r1 = COPY [[ARR0_1]](s32)
; BIG: $r0 = COPY [[ARR0_1]](s32)
; BIG: $r1 = COPY [[ARR0_0]](s32)
-; CHECK: [[ARR1_0:%[0-9]+]]:_(s32), [[ARR1_1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARR1]](s64)
; LITTLE: $r2 = COPY [[ARR1_0]](s32)
; LITTLE: $r3 = COPY [[ARR1_1]](s32)
; BIG: $r2 = COPY [[ARR1_1]](s32)
; BIG: $r3 = COPY [[ARR1_0]](s32)
-; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp
-; CHECK: [[ARR2_OFFSET:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-; CHECK: [[ARR2_ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[ARR2_OFFSET]](s32)
-; CHECK: G_STORE [[ARR2]](s64), [[ARR2_ADDR]](p0){{.*}}store (s64)
; ARM: BL @fp_arrays_aapcs_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1
; THUMB: tBL 14 /* CC::al */, $noreg, @fp_arrays_aapcs_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1
; CHECK: [[R0:%[0-9]+]]:_(s32) = COPY $r0
@@ -363,12 +363,6 @@ define arm_aapcs_vfpcc [4 x float] @test_fp_arrays_aapcs_vfp([3 x double] %x, [3
; CHECK: [[Z3_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[Z3_ID]]
; CHECK: [[Z3:%[0-9]+]]:_(s64) = G_LOAD [[Z3_FI]]{{.*}}load (s64)
; CHECK: ADJCALLSTACKDOWN 32, 0, 14 /* CC::al */, $noreg, implicit-def $sp, implicit $sp
-; CHECK: $d0 = COPY [[X0]](s64)
-; CHECK: $d1 = COPY [[X1]](s64)
-; CHECK: $d2 = COPY [[X2]](s64)
-; CHECK: $s6 = COPY [[Y0]](s32)
-; CHECK: $s7 = COPY [[Y1]](s32)
-; CHECK: $s8 = COPY [[Y2]](s32)
; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp
; CHECK: [[Z0_OFFSET:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[Z0_ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[Z0_OFFSET]](s32)
@@ -385,6 +379,12 @@ define arm_aapcs_vfpcc [4 x float] @test_fp_arrays_aapcs_vfp([3 x double] %x, [3
; CHECK: [[Z3_OFFSET:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK: [[Z3_ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[Z3_OFFSET]](s32)
; CHECK: G_STORE [[Z3]](s64), [[Z3_ADDR]](p0){{.*}}store (s64)
+; CHECK: $d0 = COPY [[X0]](s64)
+; CHECK: $d1 = COPY [[X1]](s64)
+; CHECK: $d2 = COPY [[X2]](s64)
+; CHECK: $s6 = COPY [[Y0]](s32)
+; CHECK: $s7 = COPY [[Y1]](s32)
+; CHECK: $s8 = COPY [[Y2]](s32)
; ARM: BL @fp_arrays_aapcs_vfp_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $d0, implicit $d1, implicit $d2, implicit $s6, implicit $s7, implicit $s8, implicit-def $s0, implicit-def $s1, implicit-def $s2, implicit-def $s3
; THUMB: tBL 14 /* CC::al */, $noreg, @fp_arrays_aapcs_vfp_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $d0, implicit $d1, implicit $d2, implicit $s6, implicit $s7, implicit $s8, implicit-def $s0, implicit-def $s1, implicit-def $s2, implicit-def $s3
; CHECK: [[R0:%[0-9]+]]:_(s32) = COPY $s0
@@ -422,10 +422,6 @@ define arm_aapcscc [2 x i32*] @test_tough_arrays([6 x [4 x i32]] %arr) {
; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]]
; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load (s32) from %fixed-stack.[[LAST_STACK_ID]]
; CHECK: ADJCALLSTACKDOWN 80, 0, 14 /* CC::al */, $noreg, implicit-def $sp, implicit $sp
-; CHECK: $r0 = COPY [[R0]]
-; CHECK: $r1 = COPY [[R1]]
-; CHECK: $r2 = COPY [[R2]]
-; CHECK: $r3 = COPY [[R3]]
; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY $sp
; CHECK: [[OFF_FIRST_ELEMENT:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[FIRST_STACK_ARG_ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[OFF_FIRST_ELEMENT]](s32)
@@ -436,6 +432,10 @@ define arm_aapcscc [2 x i32*] @test_tough_arrays([6 x [4 x i32]] %arr) {
; CHECK: [[OFF_LAST_ELEMENT:%[0-9]+]]:_(s32) = G_CONSTANT i32 76
; CHECK: [[LAST_STACK_ARG_ADDR:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP]], [[OFF_LAST_ELEMENT]](s32)
; CHECK: G_STORE [[LAST_STACK_ELEMENT]](s32), [[LAST_STACK_ARG_ADDR]]{{.*}}store (s32)
+; CHECK: $r0 = COPY [[R0]]
+; CHECK: $r1 = COPY [[R1]]
+; CHECK: $r2 = COPY [[R2]]
+; CHECK: $r3 = COPY [[R3]]
; ARM: BL @tough_arrays_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1
; THUMB: tBL 14 /* CC::al */, $noreg, @tough_arrays_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0, implicit-def $r1
; CHECK: [[R0:%[0-9]+]]:_(p0) = COPY $r0
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/irtranslator-varargs-lowering.ll b/llvm/test/CodeGen/ARM/GlobalISel/irtranslator-varargs-lowering.ll
index 104a78506a4d7..6837954faf4df 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/irtranslator-varargs-lowering.ll
+++ b/llvm/test/CodeGen/ARM/GlobalISel/irtranslator-varargs-lowering.ll
@@ -8,10 +8,6 @@ define arm_aapcscc i32 @test_call_to_varargs_with_ints(i32 *%a, i32 %b) {
; CHECK-DAG: [[AVREG:%[0-9]+]]:_(p0) = COPY $r0
; CHECK-DAG: [[BVREG:%[0-9]+]]:_(s32) = COPY $r1
; CHECK: ADJCALLSTACKDOWN 8, 0, 14 /* CC::al */, $noreg, implicit-def $sp, implicit $sp
-; CHECK-DAG: $r0 = COPY [[BVREG]]
-; CHECK-DAG: $r1 = COPY [[AVREG]]
-; CHECK-DAG: $r2 = COPY [[BVREG]]
-; CHECK-DAG: $r3 = COPY [[AVREG]]
; CHECK: [[SP1:%[0-9]+]]:_(p0) = COPY $sp
; CHECK: [[OFF1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[FI1:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP1]], [[OFF1]](s32)
@@ -20,6 +16,10 @@ define arm_aapcscc i32 @test_call_to_varargs_with_ints(i32 *%a, i32 %b) {
; CHECK: [[OFF2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; CHECK: [[FI2:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP2]], [[OFF2]](s32)
; CHECK: G_STORE [[AVREG]](p0), [[FI2]](p0){{.*}}store (p0)
+; CHECK-DAG: $r0 = COPY [[BVREG]]
+; CHECK-DAG: $r1 = COPY [[AVREG]]
+; CHECK-DAG: $r2 = COPY [[BVREG]]
+; CHECK-DAG: $r3 = COPY [[AVREG]]
; ARM: BL @int_varargs_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0
; THUMB: tBL 14 /* CC::al */, $noreg, @int_varargs_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r1, implicit $r2, implicit $r3, implicit-def $r0
; CHECK: [[RVREG:%[0-9]+]]:_(s32) = COPY $r0
@@ -39,14 +39,14 @@ define arm_aapcs_vfpcc float @test_call_to_varargs_with_floats(float %a, double
; CHECK-DAG: [[AVREG:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-DAG: [[BVREG:%[0-9]+]]:_(s64) = COPY $d1
; CHECK: ADJCALLSTACKDOWN 8, 0, 14 /* CC::al */, $noreg, implicit-def $sp, implicit $sp
-; CHECK-DAG: $r0 = COPY [[AVREG]]
; CHECK-DAG: [[B1:%[0-9]+]]:_(s32), [[B2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BVREG]](s64)
-; CHECK-DAG: $r2 = COPY [[B1]]
-; CHECK-DAG: $r3 = COPY [[B2]]
; CHECK: [[SP1:%[0-9]+]]:_(p0) = COPY $sp
; CHECK: [[OFF1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[FI1:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP1]], [[OFF1]](s32)
; CHECK: G_STORE [[BVREG]](s64), [[FI1]](p0){{.*}}store (s64)
+; CHECK-DAG: $r0 = COPY [[AVREG]]
+; CHECK-DAG: $r2 = COPY [[B1]]
+; CHECK-DAG: $r3 = COPY [[B2]]
; ARM: BL @float_varargs_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r2, implicit $r3, implicit-def $r0
; THUMB: tBL 14 /* CC::al */, $noreg, @float_varargs_target, csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r2, implicit $r3, implicit-def $r0
; CHECK: [[RVREG:%[0-9]+]]:_(s32) = COPY $r0
@@ -86,14 +86,14 @@ define arm_aapcs_vfpcc float @test_indirect_call_to_varargs(float (float, double
; CHECK-DAG: [[AVREG:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-DAG: [[BVREG:%[0-9]+]]:_(s64) = COPY $d1
; CHECK: ADJCALLSTACKDOWN 8, 0, 14 /* CC::al */, $noreg, implicit-def $sp, implicit $sp
-; CHECK-DAG: $r0 = COPY [[AVREG]]
; CHECK-DAG: [[B1:%[0-9]+]]:_(s32), [[B2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BVREG]](s64)
-; CHECK-DAG: $r2 = COPY [[B1]]
-; CHECK-DAG: $r3 = COPY [[B2]]
; CHECK: [[SP1:%[0-9]+]]:_(p0) = COPY $sp
; CHECK: [[OFF1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[FI1:%[0-9]+]]:_(p0) = G_PTR_ADD [[SP1]], [[OFF1]](s32)
; CHECK: G_STORE [[BVREG]](s64), [[FI1]](p0){{.*}}store (s64)
+; CHECK-DAG: $r0 = COPY [[AVREG]]
+; CHECK-DAG: $r2 = COPY [[B1]]
+; CHECK-DAG: $r3 = COPY [[B2]]
; ARM: BLX [[FPTRVREG]](p0), csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r2, implicit $r3, implicit-def $r0
; THUMB: tBLXr 14 /* CC::al */, $noreg, [[FPTRVREG]](p0), csr_aapcs, implicit-def $lr, implicit $sp, implicit $r0, implicit $r2, implicit $r3, implicit-def $r0
; CHECK: [[RVREG:%[0-9]+]]:_(s32) = COPY $r0
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/extend_args.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/extend_args.ll
index 4823ebef14625..b8b5c0cba9938 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/extend_args.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/extend_args.ll
@@ -64,15 +64,15 @@ define signext i8 @call_sext_stack_arg_i8(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i8
; MIPS32-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[LOAD]], 8
; MIPS32-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
; MIPS32-NEXT: ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
- ; MIPS32-NEXT: $a0 = COPY [[COPY]](s32)
- ; MIPS32-NEXT: $a1 = COPY [[COPY1]](s32)
- ; MIPS32-NEXT: $a2 = COPY [[COPY2]](s32)
- ; MIPS32-NEXT: $a3 = COPY [[COPY3]](s32)
; MIPS32-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8)
; MIPS32-NEXT: [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; MIPS32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
; MIPS32-NEXT: G_STORE [[SEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
+ ; MIPS32-NEXT: $a0 = COPY [[COPY]](s32)
+ ; MIPS32-NEXT: $a1 = COPY [[COPY1]](s32)
+ ; MIPS32-NEXT: $a2 = COPY [[COPY2]](s32)
+ ; MIPS32-NEXT: $a3 = COPY [[COPY3]](s32)
; MIPS32-NEXT: JAL @sext_stack_arg_i8, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
; MIPS32-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
; MIPS32-NEXT: [[ASSERT_SEXT1:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY5]], 8
@@ -100,15 +100,15 @@ define zeroext i8 @call_zext_stack_arg_i8(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i8
; MIPS32-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 8
; MIPS32-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32)
; MIPS32-NEXT: ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
- ; MIPS32-NEXT: $a0 = COPY [[COPY]](s32)
- ; MIPS32-NEXT: $a1 = COPY [[COPY1]](s32)
- ; MIPS32-NEXT: $a2 = COPY [[COPY2]](s32)
- ; MIPS32-NEXT: $a3 = COPY [[COPY3]](s32)
; MIPS32-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)
; MIPS32-NEXT: [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; MIPS32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
; MIPS32-NEXT: G_STORE [[ZEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
+ ; MIPS32-NEXT: $a0 = COPY [[COPY]](s32)
+ ; MIPS32-NEXT: $a1 = COPY [[COPY1]](s32)
+ ; MIPS32-NEXT: $a2 = COPY [[COPY2]](s32)
+ ; MIPS32-NEXT: $a3 = COPY [[COPY3]](s32)
; MIPS32-NEXT: JAL @zext_stack_arg_i8, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
; MIPS32-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
; MIPS32-NEXT: [[ASSERT_ZEXT1:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY5]], 8
@@ -135,15 +135,15 @@ define i8 @call_aext_stack_arg_i8(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i8 %a) {
; MIPS32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8)
; MIPS32-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32)
; MIPS32-NEXT: ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
- ; MIPS32-NEXT: $a0 = COPY [[COPY]](s32)
- ; MIPS32-NEXT: $a1 = COPY [[COPY1]](s32)
- ; MIPS32-NEXT: $a2 = COPY [[COPY2]](s32)
- ; MIPS32-NEXT: $a3 = COPY [[COPY3]](s32)
; MIPS32-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s8)
; MIPS32-NEXT: [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; MIPS32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
; MIPS32-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
+ ; MIPS32-NEXT: $a0 = COPY [[COPY]](s32)
+ ; MIPS32-NEXT: $a1 = COPY [[COPY1]](s32)
+ ; MIPS32-NEXT: $a2 = COPY [[COPY2]](s32)
+ ; MIPS32-NEXT: $a3 = COPY [[COPY3]](s32)
; MIPS32-NEXT: JAL @aext_stack_arg_i8, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
; MIPS32-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
; MIPS32-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY5]](s32)
@@ -219,15 +219,15 @@ define signext i16 @call_sext_stack_arg_i16(i32 %x1, i32 %x2, i32 %x3, i32 %x4,
; MIPS32-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[LOAD]], 16
; MIPS32-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
; MIPS32-NEXT: ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
- ; MIPS32-NEXT: $a0 = COPY [[COPY]](s32)
- ; MIPS32-NEXT: $a1 = COPY [[COPY1]](s32)
- ; MIPS32-NEXT: $a2 = COPY [[COPY2]](s32)
- ; MIPS32-NEXT: $a3 = COPY [[COPY3]](s32)
; MIPS32-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
; MIPS32-NEXT: [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; MIPS32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
; MIPS32-NEXT: G_STORE [[SEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
+ ; MIPS32-NEXT: $a0 = COPY [[COPY]](s32)
+ ; MIPS32-NEXT: $a1 = COPY [[COPY1]](s32)
+ ; MIPS32-NEXT: $a2 = COPY [[COPY2]](s32)
+ ; MIPS32-NEXT: $a3 = COPY [[COPY3]](s32)
; MIPS32-NEXT: JAL @sext_stack_arg_i16, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
; MIPS32-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
; MIPS32-NEXT: [[ASSERT_SEXT1:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY5]], 16
@@ -255,15 +255,15 @@ define zeroext i16 @call_zext_stack_arg_i16(i32 %x1, i32 %x2, i32 %x3, i32 %x4,
; MIPS32-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 16
; MIPS32-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
; MIPS32-NEXT: ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
- ; MIPS32-NEXT: $a0 = COPY [[COPY]](s32)
- ; MIPS32-NEXT: $a1 = COPY [[COPY1]](s32)
- ; MIPS32-NEXT: $a2 = COPY [[COPY2]](s32)
- ; MIPS32-NEXT: $a3 = COPY [[COPY3]](s32)
; MIPS32-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
; MIPS32-NEXT: [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; MIPS32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
; MIPS32-NEXT: G_STORE [[ZEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
+ ; MIPS32-NEXT: $a0 = COPY [[COPY]](s32)
+ ; MIPS32-NEXT: $a1 = COPY [[COPY1]](s32)
+ ; MIPS32-NEXT: $a2 = COPY [[COPY2]](s32)
+ ; MIPS32-NEXT: $a3 = COPY [[COPY3]](s32)
; MIPS32-NEXT: JAL @zext_stack_arg_i16, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
; MIPS32-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
; MIPS32-NEXT: [[ASSERT_ZEXT1:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY5]], 16
@@ -290,15 +290,15 @@ define i16 @call_aext_stack_arg_i16(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i16 %a)
; MIPS32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8)
; MIPS32-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
; MIPS32-NEXT: ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
- ; MIPS32-NEXT: $a0 = COPY [[COPY]](s32)
- ; MIPS32-NEXT: $a1 = COPY [[COPY1]](s32)
- ; MIPS32-NEXT: $a2 = COPY [[COPY2]](s32)
- ; MIPS32-NEXT: $a3 = COPY [[COPY3]](s32)
; MIPS32-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
; MIPS32-NEXT: [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; MIPS32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
; MIPS32-NEXT: G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
+ ; MIPS32-NEXT: $a0 = COPY [[COPY]](s32)
+ ; MIPS32-NEXT: $a1 = COPY [[COPY1]](s32)
+ ; MIPS32-NEXT: $a2 = COPY [[COPY2]](s32)
+ ; MIPS32-NEXT: $a3 = COPY [[COPY3]](s32)
; MIPS32-NEXT: JAL @aext_stack_arg_i16, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
; MIPS32-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
; MIPS32-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32)
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/float_args.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/float_args.ll
index 0dc649b3648b5..ad61728827830 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/float_args.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/float_args.ll
@@ -204,8 +204,8 @@ define double @call_double_in_gpr(i32 %a, double %b) {
; FP32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3
; FP32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; FP32-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
- ; FP32-NEXT: $a0 = COPY [[COPY]](s32)
; FP32-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
+ ; FP32-NEXT: $a0 = COPY [[COPY]](s32)
; FP32-NEXT: $a2 = COPY [[UV]](s32)
; FP32-NEXT: $a3 = COPY [[UV1]](s32)
; FP32-NEXT: JAL @double_in_gpr, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0
@@ -222,8 +222,8 @@ define double @call_double_in_gpr(i32 %a, double %b) {
; FP64-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $a3
; FP64-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; FP64-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp
- ; FP64-NEXT: $a0 = COPY [[COPY]](s32)
; FP64-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
+ ; FP64-NEXT: $a0 = COPY [[COPY]](s32)
; FP64-NEXT: $a2 = COPY [[UV]](s32)
; FP64-NEXT: $a3 = COPY [[UV1]](s32)
; FP64-NEXT: JAL @double_in_gpr, csr_o32_fp64, implicit-def $ra, implicit-def $sp, implicit $a0, implicit-def $d0_64
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/stack_args.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/stack_args.ll
index b823f66a64cd9..1cc2e31d86b2d 100644
--- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/stack_args.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/stack_args.ll
@@ -15,14 +15,14 @@ define i32 @g(i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5){
; MIPS32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
; MIPS32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 8)
; MIPS32-NEXT: ADJCALLSTACKDOWN 24, 0, implicit-def $sp, implicit $sp
- ; MIPS32-NEXT: $a0 = COPY [[COPY]](s32)
- ; MIPS32-NEXT: $a1 = COPY [[COPY1]](s32)
- ; MIPS32-NEXT: $a2 = COPY [[COPY2]](s32)
- ; MIPS32-NEXT: $a3 = COPY [[COPY3]](s32)
; MIPS32-NEXT: [[COPY4:%[0-9]+]]:_(p0) = COPY $sp
; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; MIPS32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s32)
; MIPS32-NEXT: G_STORE [[LOAD]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack + 16, align 8)
+ ; MIPS32-NEXT: $a0 = COPY [[COPY]](s32)
+ ; MIPS32-NEXT: $a1 = COPY [[COPY1]](s32)
+ ; MIPS32-NEXT: $a2 = COPY [[COPY2]](s32)
+ ; MIPS32-NEXT: $a3 = COPY [[COPY3]](s32)
; MIPS32-NEXT: JAL @f, csr_o32, implicit-def $ra, implicit-def $sp, implicit $a0, implicit $a1, implicit $a2, implicit $a3, implicit-def $v0
; MIPS32-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $v0
; MIPS32-NEXT: ADJCALLSTACKUP 24, 0, implicit-def $sp, implicit $sp
diff --git a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
index 1ccfec531d026..cf15fedffb2b4 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
@@ -441,12 +441,6 @@ define void @test_simple_arg8_call(i32 %in0) {
; X64-NEXT: {{ $}}
; X64-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi
; X64-NEXT: ADJCALLSTACKDOWN64 16, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
- ; X64-NEXT: $edi = COPY [[COPY]](s32)
- ; X64-NEXT: $esi = COPY [[COPY]](s32)
- ; X64-NEXT: $edx = COPY [[COPY]](s32)
- ; X64-NEXT: $ecx = COPY [[COPY]](s32)
- ; X64-NEXT: $r8d = COPY [[COPY]](s32)
- ; X64-NEXT: $r9d = COPY [[COPY]](s32)
; X64-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $rsp
; X64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; X64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
@@ -455,6 +449,12 @@ define void @test_simple_arg8_call(i32 %in0) {
; X64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; X64-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C1]](s64)
; X64-NEXT: G_STORE [[COPY]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 8, align 1)
+ ; X64-NEXT: $edi = COPY [[COPY]](s32)
+ ; X64-NEXT: $esi = COPY [[COPY]](s32)
+ ; X64-NEXT: $edx = COPY [[COPY]](s32)
+ ; X64-NEXT: $ecx = COPY [[COPY]](s32)
+ ; X64-NEXT: $r8d = COPY [[COPY]](s32)
+ ; X64-NEXT: $r9d = COPY [[COPY]](s32)
; X64-NEXT: CALL64pcrel32 @simple_arg8_callee, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit $edx, implicit $ecx, implicit $r8d, implicit $r9d
; X64-NEXT: ADJCALLSTACKUP64 16, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
; X64-NEXT: RET 0
diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
index 7b233b83c49be..2270e4fb406be 100644
--- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
@@ -2149,9 +2149,9 @@ TEST_F(AArch64GISelMITest, LibcallSRem) {
CHECK: $x1 = COPY [[COPY]]
CHECK: BL &__moddi3
CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ANYEXT]]
+ CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ANYEXT]]
CHECK: $x0 = COPY [[UV]]
CHECK: $x1 = COPY [[UV1]]
- CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ANYEXT]]
CHECK: $x2 = COPY [[UV2]]
CHECK: $x3 = COPY [[UV3]]
CHECK: BL &__modti3
@@ -2207,9 +2207,9 @@ TEST_F(AArch64GISelMITest, LibcallURem) {
CHECK: $x1 = COPY [[COPY]]
CHECK: BL &__umoddi3
CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ANYEXT]]
+ CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ANYEXT]]
CHECK: $x0 = COPY [[UV]]
CHECK: $x1 = COPY [[UV1]]
- CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ANYEXT]]
CHECK: $x2 = COPY [[UV2]]
CHECK: $x3 = COPY [[UV3]]
CHECK: BL &__umodti3
More information about the llvm-commits
mailing list