[llvm] r366582 - AMDGPU/GlobalISel: Rewrite lowerFormalArguments
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 19 07:15:18 PDT 2019
Author: arsenm
Date: Fri Jul 19 07:15:18 2019
New Revision: 366582
URL: http://llvm.org/viewvc/llvm-project?rev=366582&view=rev
Log:
AMDGPU/GlobalISel: Rewrite lowerFormalArguments
This should now handle everything except structs passed as multiple
registers.
I think most of the packing logic should be handled by
handleAssignments, but I'm unclear on what the contract is for
multiple registers. This is copying how x86 handles this.
This does change the behavior of the test_sgpr_alignment0 amdgpu_vs
test. I don't think shader arguments should try to follow the
alignment, and registers need to be repacked. I also don't think it
matters, since I think the pointers are packed to the beginning of the
argument list anyway.
Added:
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp?rev=366582&r1=366581&r2=366582&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp Fri Jul 19 07:15:18 2019
@@ -61,12 +61,126 @@ struct OutgoingArgHandler : public CallL
}
};
+struct IncomingArgHandler : public CallLowering::ValueHandler {
+ uint64_t StackUsed = 0;
+
+ IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn)
+ : ValueHandler(MIRBuilder, MRI, AssignFn) {}
+
+ Register getStackAddress(uint64_t Size, int64_t Offset,
+ MachinePointerInfo &MPO) override {
+ auto &MFI = MIRBuilder.getMF().getFrameInfo();
+ int FI = MFI.CreateFixedObject(Size, Offset, true);
+ MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
+ Register AddrReg = MRI.createGenericVirtualRegister(
+ LLT::pointer(AMDGPUAS::PRIVATE_ADDRESS, 32));
+ MIRBuilder.buildFrameIndex(AddrReg, FI);
+ StackUsed = std::max(StackUsed, Size + Offset);
+ return AddrReg;
+ }
+
+ void assignValueToReg(Register ValVReg, Register PhysReg,
+ CCValAssign &VA) override {
+ markPhysRegUsed(PhysReg);
+
+ if (VA.getLocVT().getSizeInBits() < 32) {
+ // 16-bit types are reported as legal for 32-bit registers. We need to do
+ // a 32-bit copy, and truncate to avoid the verifier complaining about it.
+ auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
+ MIRBuilder.buildTrunc(ValVReg, Copy);
+ return;
+ }
+
+ switch (VA.getLocInfo()) {
+ case CCValAssign::LocInfo::SExt:
+ case CCValAssign::LocInfo::ZExt:
+ case CCValAssign::LocInfo::AExt: {
+ auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
+ MIRBuilder.buildTrunc(ValVReg, Copy);
+ break;
+ }
+ default:
+ MIRBuilder.buildCopy(ValVReg, PhysReg);
+ break;
+ }
+ }
+
+ void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size,
+ MachinePointerInfo &MPO, CCValAssign &VA) override {
+ // FIXME: Get alignment
+ auto MMO = MIRBuilder.getMF().getMachineMemOperand(
+ MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 1);
+ MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
+ }
+
+ /// How the physical register gets marked varies between formal
+ /// parameters (it's a basic-block live-in), and a call instruction
+ /// (it's an implicit-def of the BL).
+ virtual void markPhysRegUsed(unsigned PhysReg) = 0;
+
+ // FIXME: What is the point of this being a callback?
+ bool isArgumentHandler() const override { return true; }
+};
+
+struct FormalArgHandler : public IncomingArgHandler {
+ FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn)
+ : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
+
+ void markPhysRegUsed(unsigned PhysReg) override {
+ MIRBuilder.getMBB().addLiveIn(PhysReg);
+ }
+};
+
}
AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
: CallLowering(&TLI) {
}
+void AMDGPUCallLowering::splitToValueTypes(
+ const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv,
+ SplitArgTy PerformArgSplit) const {
+ const SITargetLowering &TLI = *getTLI<SITargetLowering>();
+ LLVMContext &Ctx = OrigArg.Ty->getContext();
+
+ if (OrigArg.Ty->isVoidTy())
+ return;
+
+ SmallVector<EVT, 4> SplitVTs;
+ ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs);
+
+ EVT VT = SplitVTs[0];
+ unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT);
+
+ if (NumParts == 1) {
+ // No splitting to do, but we want to replace the original type (e.g. [1 x
+ // double] -> double).
+ SplitArgs.emplace_back(OrigArg.Regs[0], VT.getTypeForEVT(Ctx),
+ OrigArg.Flags, OrigArg.IsFixed);
+ return;
+ }
+
+ LLT LLTy = getLLTForType(*OrigArg.Ty, DL);
+ SmallVector<Register, 8> SplitRegs;
+
+ EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
+ Type *PartTy = PartVT.getTypeForEVT(Ctx);
+ LLT PartLLT = getLLTForType(*PartTy, DL);
+
+ // FIXME: Should we be reporting all of the part registers for a single
+ // argument, and let handleAssignments take care of the repacking?
+ for (unsigned i = 0; i < NumParts; ++i) {
+ Register PartReg = MRI.createGenericVirtualRegister(PartLLT);
+ SplitRegs.push_back(PartReg);
+ SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags);
+ }
+
+ PerformArgSplit(SplitRegs, LLTy, PartLLT);
+}
+
bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val,
ArrayRef<Register> VRegs) const {
@@ -156,48 +270,6 @@ void AMDGPUCallLowering::lowerParameter(
MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
}
-static Register findFirstFreeSGPR(CCState &CCInfo) {
- unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
- for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
- if (!CCInfo.isAllocated(AMDGPU::SGPR0 + Reg)) {
- return AMDGPU::SGPR0 + Reg;
- }
- }
- llvm_unreachable("Cannot allocate sgpr");
-}
-
-static void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
- MachineFunction &MF,
- const SIRegisterInfo &TRI,
- SIMachineFunctionInfo &Info) {
- const LLT S32 = LLT::scalar(32);
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- if (Info.hasWorkItemIDX()) {
- Register Reg = AMDGPU::VGPR0;
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
-
- CCInfo.AllocateReg(Reg);
- Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg));
- }
-
- if (Info.hasWorkItemIDY()) {
- Register Reg = AMDGPU::VGPR1;
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
-
- CCInfo.AllocateReg(Reg);
- Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg));
- }
-
- if (Info.hasWorkItemIDZ()) {
- Register Reg = AMDGPU::VGPR2;
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
-
- CCInfo.AllocateReg(Reg);
- Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg));
- }
-}
-
// Allocate special inputs passed in user SGPRs.
static void allocateHSAUserSGPRs(CCState &CCInfo,
MachineIRBuilder &MIRBuilder,
@@ -250,60 +322,6 @@ static void allocateHSAUserSGPRs(CCState
// these from the dispatch pointer.
}
-static void allocateSystemSGPRs(CCState &CCInfo,
- MachineFunction &MF,
- SIMachineFunctionInfo &Info,
- CallingConv::ID CallConv,
- bool IsShader) {
- const LLT S32 = LLT::scalar(32);
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- if (Info.hasWorkGroupIDX()) {
- Register Reg = Info.addWorkGroupIDX();
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasWorkGroupIDY()) {
- Register Reg = Info.addWorkGroupIDY();
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasWorkGroupIDZ()) {
- unsigned Reg = Info.addWorkGroupIDZ();
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasWorkGroupInfo()) {
- unsigned Reg = Info.addWorkGroupInfo();
- MRI.setType(MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass), S32);
- CCInfo.AllocateReg(Reg);
- }
-
- if (Info.hasPrivateSegmentWaveByteOffset()) {
- // Scratch wave offset passed in system SGPR.
- unsigned PrivateSegmentWaveByteOffsetReg;
-
- if (IsShader) {
- PrivateSegmentWaveByteOffsetReg =
- Info.getPrivateSegmentWaveByteOffsetSystemSGPR();
-
- // This is true if the scratch wave byte offset doesn't have a fixed
- // location.
- if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) {
- PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo);
- Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
- }
- } else
- PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset();
-
- MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
- CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
- }
-}
-
bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs) const {
@@ -311,7 +329,9 @@ bool AMDGPUCallLowering::lowerFormalArgu
const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
+ const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const SITargetLowering &TLI = *getTLI<SITargetLowering>();
+
const DataLayout &DL = F.getParent()->getDataLayout();
SmallVector<CCValAssign, 16> ArgLocs;
@@ -349,117 +369,228 @@ bool AMDGPUCallLowering::lowerFormalArgu
++i;
}
- allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
- allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
+ TLI.allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
+ TLI.allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), false);
return true;
}
+static void packSplitRegsToOrigType(MachineIRBuilder &MIRBuilder,
+ ArrayRef<Register> OrigRegs,
+ ArrayRef<Register> Regs,
+ LLT LLTy,
+ LLT PartLLT) {
+ if (!LLTy.isVector() && !PartLLT.isVector()) {
+ MIRBuilder.buildMerge(OrigRegs[0], Regs);
+ return;
+ }
+
+ if (LLTy.isVector() && PartLLT.isVector()) {
+ assert(LLTy.getElementType() == PartLLT.getElementType());
+
+ int DstElts = LLTy.getNumElements();
+ int PartElts = PartLLT.getNumElements();
+ if (DstElts % PartElts == 0)
+ MIRBuilder.buildConcatVectors(OrigRegs[0], Regs);
+ else {
+ // Deal with v3s16 split into v2s16
+ assert(PartElts == 2 && DstElts % 2 != 0);
+ int RoundedElts = PartElts * ((DstElts + PartElts - 1) / PartElts);
+
+ LLT RoundedDestTy = LLT::vector(RoundedElts, PartLLT.getElementType());
+ auto RoundedConcat = MIRBuilder.buildConcatVectors(RoundedDestTy, Regs);
+ MIRBuilder.buildExtract(OrigRegs[0], RoundedConcat, 0);
+ }
+
+ return;
+ }
+
+ assert(LLTy.isVector() && !PartLLT.isVector());
+
+ LLT DstEltTy = LLTy.getElementType();
+ if (DstEltTy == PartLLT) {
+ // Vector was trivially scalarized.
+ MIRBuilder.buildBuildVector(OrigRegs[0], Regs);
+ } else if (DstEltTy.getSizeInBits() > PartLLT.getSizeInBits()) {
+ // Deal with vector with 64-bit elements decomposed to 32-bit
+ // registers. Need to create intermediate 64-bit elements.
+ SmallVector<Register, 8> EltMerges;
+ int PartsPerElt = DstEltTy.getSizeInBits() / PartLLT.getSizeInBits();
+
+ assert(DstEltTy.getSizeInBits() % PartLLT.getSizeInBits() == 0);
+
+ for (int I = 0, NumElts = LLTy.getNumElements(); I != NumElts; ++I) {
+ auto Merge = MIRBuilder.buildMerge(DstEltTy,
+ Regs.take_front(PartsPerElt));
+ EltMerges.push_back(Merge.getReg(0));
+ Regs = Regs.drop_front(PartsPerElt);
+ }
+
+ MIRBuilder.buildBuildVector(OrigRegs[0], EltMerges);
+ } else {
+ // Vector was split, and elements promoted to a wider type.
+ LLT BVType = LLT::vector(LLTy.getNumElements(), PartLLT);
+ auto BV = MIRBuilder.buildBuildVector(BVType, Regs);
+ MIRBuilder.buildTrunc(OrigRegs[0], BV);
+ }
+}
+
bool AMDGPUCallLowering::lowerFormalArguments(
MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs) const {
+ CallingConv::ID CC = F.getCallingConv();
+
// The infrastructure for normal calling convention lowering is essentially
// useless for kernels. We want to avoid any kind of legalization or argument
// splitting.
- if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL)
+ if (CC == CallingConv::AMDGPU_KERNEL)
return lowerFormalArgumentsKernel(MIRBuilder, F, VRegs);
// AMDGPU_GS and AMDGP_HS are not supported yet.
- if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
- F.getCallingConv() == CallingConv::AMDGPU_HS)
+ if (CC == CallingConv::AMDGPU_GS || CC == CallingConv::AMDGPU_HS)
return false;
+ const bool IsShader = AMDGPU::isShader(CC);
+ const bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CC);
+
MachineFunction &MF = MIRBuilder.getMF();
+ MachineBasicBlock &MBB = MIRBuilder.getMBB();
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
+ const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = Subtarget.getRegisterInfo();
const DataLayout &DL = F.getParent()->getDataLayout();
- bool IsShader = AMDGPU::isShader(F.getCallingConv());
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
+ CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
if (Info->hasImplicitBufferPtr()) {
- unsigned ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
+ Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(ImplicitBufferPtrReg);
}
- unsigned NumArgs = F.arg_size();
- Function::const_arg_iterator CurOrigArg = F.arg_begin();
- const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
+
+ SmallVector<ArgInfo, 32> SplitArgs;
+ unsigned Idx = 0;
unsigned PSInputNum = 0;
- BitVector Skipped(NumArgs);
- for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
- EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
-
- // We can only hanlde simple value types at the moment.
- ISD::ArgFlagsTy Flags;
- assert(VRegs[i].size() == 1 && "Can't lower into more than one register");
- ArgInfo OrigArg{VRegs[i][0], CurOrigArg->getType()};
- setArgFlags(OrigArg, i + 1, DL, F);
- Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
-
- if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
- !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
- PSInputNum <= 15) {
- if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
- Skipped.set(i);
- ++PSInputNum;
- continue;
- }
- Info->markPSInputAllocated(PSInputNum);
- if (!CurOrigArg->use_empty())
- Info->markPSInputEnabled(PSInputNum);
+ for (auto &Arg : F.args()) {
+ if (DL.getTypeStoreSize(Arg.getType()) == 0)
+ continue;
+
+ const bool InReg = Arg.hasAttribute(Attribute::InReg);
+
+ // SGPR arguments to functions not implemented.
+ if (!IsShader && InReg)
+ return false;
+
+ // TODO: Handle multiple registers and sret.
+ if (Arg.hasAttribute(Attribute::StructRet) ||
+ Arg.hasAttribute(Attribute::SwiftSelf) ||
+ Arg.hasAttribute(Attribute::SwiftError) ||
+ Arg.hasAttribute(Attribute::Nest) || VRegs[Idx].size() > 1)
+ return false;
+
+ if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) {
+ const bool ArgUsed = !Arg.use_empty();
+ bool SkipArg = !ArgUsed && !Info->isPSInputAllocated(PSInputNum);
+
+ if (!SkipArg) {
+ Info->markPSInputAllocated(PSInputNum);
+ if (ArgUsed)
+ Info->markPSInputEnabled(PSInputNum);
+ }
++PSInputNum;
+
+ if (SkipArg) {
+ MIRBuilder.buildUndef(VRegs[Idx][0]);
+ ++Idx;
+ continue;
+ }
}
- CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
- /*IsVarArg=*/false);
+ ArgInfo OrigArg(VRegs[Idx], Arg.getType());
+ setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F);
+ splitToValueTypes(OrigArg, SplitArgs, DL, MRI, CC,
+ // FIXME: We should probably be passing multiple registers to
+ // handleAssignments to do this
+ [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT) {
+ packSplitRegsToOrigType(MIRBuilder, VRegs[Idx], Regs, LLTy, PartLLT);
+ });
+
+ ++Idx;
+ }
+
+ // At least one interpolation mode must be enabled or else the GPU will
+ // hang.
+ //
+ // Check PSInputAddr instead of PSInputEnable. The idea is that if the user
+ // set PSInputAddr, the user wants to enable some bits after the compilation
+ // based on run-time states. Since we can't know what the final PSInputEna
+ // will look like, so we shouldn't do anything here and the user should take
+ // responsibility for the correct programming.
+ //
+ // Otherwise, the following restrictions apply:
+ // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
+ // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
+ // enabled too.
+ if (CC == CallingConv::AMDGPU_PS) {
+ if ((Info->getPSInputAddr() & 0x7F) == 0 ||
+ ((Info->getPSInputAddr() & 0xF) == 0 &&
+ Info->isPSInputAllocated(11))) {
+ CCInfo.AllocateReg(AMDGPU::VGPR0);
+ CCInfo.AllocateReg(AMDGPU::VGPR1);
+ Info->markPSInputAllocated(0);
+ Info->markPSInputEnabled(0);
+ }
- if (ValEVT.isVector()) {
- EVT ElemVT = ValEVT.getVectorElementType();
- if (!ValEVT.isSimple())
- return false;
- MVT ValVT = ElemVT.getSimpleVT();
- bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
- OrigArg.Flags, CCInfo);
- if (!Res)
- return false;
- } else {
- MVT ValVT = ValEVT.getSimpleVT();
- if (!ValEVT.isSimple())
- return false;
- bool Res =
- AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
-
- // Fail if we don't know how to handle this type.
- if (Res)
- return false;
+ if (Subtarget.isAmdPalOS()) {
+ // For isAmdPalOS, the user does not enable some bits after compilation
+ // based on run-time states; the register values being generated here are
+ // the final ones set in hardware. Therefore we need to apply the
+ // workaround to PSInputAddr and PSInputEnable together. (The case where
+ // a bit is set in PSInputAddr but not PSInputEnable is where the frontend
+ // set up an input arg for a particular interpolation mode, but nothing
+ // uses that input arg. Really we should have an earlier pass that removes
+ // such an arg.)
+ unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
+ if ((PsInputBits & 0x7F) == 0 ||
+ ((PsInputBits & 0xF) == 0 &&
+ (PsInputBits >> 11 & 1)))
+ Info->markPSInputEnabled(
+ countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
}
}
- Function::const_arg_iterator Arg = F.arg_begin();
+ const SITargetLowering &TLI = *getTLI<SITargetLowering>();
+ CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CC, F.isVarArg());
- if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
- F.getCallingConv() == CallingConv::AMDGPU_PS) {
- for (unsigned i = 0, OrigArgIdx = 0;
- OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
- if (Skipped.test(OrigArgIdx))
- continue;
- assert(VRegs[OrigArgIdx].size() == 1 &&
- "Can't lower into more than 1 reg");
- CCValAssign &VA = ArgLocs[i++];
- MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx][0]);
- MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
- MIRBuilder.buildCopy(VRegs[OrigArgIdx][0], VA.getLocReg());
- }
+ if (!MBB.empty())
+ MIRBuilder.setInstr(*MBB.begin());
- allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader);
- return true;
+ FormalArgHandler Handler(MIRBuilder, MRI, AssignFn);
+ if (!handleAssignments(CCInfo, ArgLocs, MIRBuilder, SplitArgs, Handler))
+ return false;
+
+ if (!IsEntryFunc) {
+ // Special inputs come after user arguments.
+ TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
+ }
+
+ // Start adding system SGPRs.
+ if (IsEntryFunc) {
+ TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsShader);
+ } else {
+ CCInfo.AllocateReg(Info->getScratchRSrcReg());
+ CCInfo.AllocateReg(Info->getScratchWaveOffsetReg());
+ CCInfo.AllocateReg(Info->getFrameOffsetReg());
+ TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info);
}
- return false;
+ // Move back to the end of the basic block.
+ MIRBuilder.setMBB(MBB);
+
+ return true;
}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h?rev=366582&r1=366581&r2=366582&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h Fri Jul 19 07:15:18 2019
@@ -29,7 +29,16 @@ class AMDGPUCallLowering: public CallLow
uint64_t Offset, unsigned Align,
Register DstReg) const;
- public:
+ /// A function of this type is used to perform value split action.
+ using SplitArgTy = std::function<void(ArrayRef<Register>, LLT, LLT)>;
+
+ void splitToValueTypes(const ArgInfo &OrigArgInfo,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL, MachineRegisterInfo &MRI,
+ CallingConv::ID CallConv,
+ SplitArgTy SplitArg) const;
+
+public:
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);
bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=366582&r1=366581&r2=366582&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Fri Jul 19 07:15:18 2019
@@ -1596,29 +1596,32 @@ static void processShaderInputArgs(Small
}
// Allocate special inputs passed in VGPRs.
-static void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
- MachineFunction &MF,
- const SIRegisterInfo &TRI,
- SIMachineFunctionInfo &Info) {
+void SITargetLowering::allocateSpecialEntryInputVGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const {
+ const LLT S32 = LLT::scalar(32);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
if (Info.hasWorkItemIDX()) {
- unsigned Reg = AMDGPU::VGPR0;
- MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ Register Reg = AMDGPU::VGPR0;
+ MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
CCInfo.AllocateReg(Reg);
Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg));
}
if (Info.hasWorkItemIDY()) {
- unsigned Reg = AMDGPU::VGPR1;
- MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ Register Reg = AMDGPU::VGPR1;
+ MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
CCInfo.AllocateReg(Reg);
Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg));
}
if (Info.hasWorkItemIDZ()) {
- unsigned Reg = AMDGPU::VGPR2;
- MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ Register Reg = AMDGPU::VGPR2;
+ MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
CCInfo.AllocateReg(Reg);
Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg));
@@ -1678,10 +1681,10 @@ static ArgDescriptor allocateSGPR64Input
return allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_64RegClass, 16);
}
-static void allocateSpecialInputVGPRs(CCState &CCInfo,
- MachineFunction &MF,
- const SIRegisterInfo &TRI,
- SIMachineFunctionInfo &Info) {
+void SITargetLowering::allocateSpecialInputVGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const {
const unsigned Mask = 0x3ff;
ArgDescriptor Arg;
@@ -1699,10 +1702,11 @@ static void allocateSpecialInputVGPRs(CC
Info.setWorkItemIDZ(allocateVGPR32Input(CCInfo, Mask << 20, Arg));
}
-static void allocateSpecialInputSGPRs(CCState &CCInfo,
- MachineFunction &MF,
- const SIRegisterInfo &TRI,
- SIMachineFunctionInfo &Info) {
+void SITargetLowering::allocateSpecialInputSGPRs(
+ CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const {
auto &ArgInfo = Info.getArgInfo();
// TODO: Unify handling with private memory pointers.
@@ -1735,10 +1739,10 @@ static void allocateSpecialInputSGPRs(CC
}
// Allocate special inputs passed in user SGPRs.
-static void allocateHSAUserSGPRs(CCState &CCInfo,
- MachineFunction &MF,
- const SIRegisterInfo &TRI,
- SIMachineFunctionInfo &Info) {
+void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const {
if (Info.hasImplicitBufferPtr()) {
unsigned ImplicitBufferPtrReg = Info.addImplicitBufferPtr(TRI);
MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
@@ -1765,9 +1769,12 @@ static void allocateHSAUserSGPRs(CCState
}
if (Info.hasKernargSegmentPtr()) {
- unsigned InputPtrReg = Info.addKernargSegmentPtr(TRI);
- MF.addLiveIn(InputPtrReg, &AMDGPU::SGPR_64RegClass);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
CCInfo.AllocateReg(InputPtrReg);
+
+ Register VReg = MF.addLiveIn(InputPtrReg, &AMDGPU::SGPR_64RegClass);
+ MRI.setType(VReg, LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
}
if (Info.hasDispatchID()) {
@@ -1787,11 +1794,11 @@ static void allocateHSAUserSGPRs(CCState
}
// Allocate special input registers that are initialized per-wave.
-static void allocateSystemSGPRs(CCState &CCInfo,
- MachineFunction &MF,
- SIMachineFunctionInfo &Info,
- CallingConv::ID CallConv,
- bool IsShader) {
+void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ SIMachineFunctionInfo &Info,
+ CallingConv::ID CallConv,
+ bool IsShader) const {
if (Info.hasWorkGroupIDX()) {
unsigned Reg = Info.addWorkGroupIDX();
MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h?rev=366582&r1=366581&r2=366582&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h Fri Jul 19 07:15:18 2019
@@ -375,6 +375,33 @@ public:
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
unsigned getPrefLoopAlignment(MachineLoop *ML) const override;
+
+
+ void allocateHSAUserSGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const;
+
+ void allocateSystemSGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ SIMachineFunctionInfo &Info,
+ CallingConv::ID CallConv,
+ bool IsShader) const;
+
+ void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const;
+ void allocateSpecialInputSGPRs(
+ CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const;
+
+ void allocateSpecialInputVGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) const;
};
} // End namespace llvm
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll?rev=366582&r1=366581&r2=366582&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll Fri Jul 19 07:15:18 2019
@@ -1,4 +1,3 @@
-; XFAIL: *
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=irtranslator -global-isel %s -o - | FileCheck %s
@@ -19,18 +18,22 @@ define amdgpu_vs void @test_f32(float %a
}
; CHECK-LABEL: name: test_ptr2_inreg
-; CHECK: [[S01:%[0-9]+]]:_(p4) = COPY $sgpr2_sgpr3
-; CHECK: G_LOAD [[S01]]
+; CHECK: [[S2:%[0-9]+]]:_(s32) = COPY $sgpr2
+; CHECK: [[S3:%[0-9]+]]:_(s32) = COPY $sgpr3
+; CHECK: [[PTR:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[S2]](s32), [[S3]](s32)
+; CHECK: G_LOAD [[PTR]]
define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) {
%tmp0 = load volatile i32, i32 addrspace(4)* %arg0
ret void
}
; CHECK-LABEL: name: test_sgpr_alignment0
-; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr2
-; CHECK: [[S23:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
-; CHECK: G_LOAD [[S23]]
-; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]]
+; CHECK: [[S2:%[0-9]+]]:_(s32) = COPY $sgpr2
+; CHECK: [[S3:%[0-9]+]]:_(s32) = COPY $sgpr3
+; CHECK: [[S4:%[0-9]+]]:_(s32) = COPY $sgpr4
+; CHECK: [[S34:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[S3]](s32), [[S4]](s32)
+; CHECK: G_LOAD [[S34]]
+; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S2]]
define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)* inreg %arg1) {
%tmp0 = load volatile i32, i32 addrspace(4)* %arg1
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll?rev=366582&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll Fri Jul 19 07:15:18 2019
@@ -0,0 +1,1984 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=tahiti -O0 -stop-after=irtranslator -global-isel -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs -o - %s 2> %t | FileCheck %s
+; RUN: FileCheck -check-prefix=ERR %s < %t
+
+; ERR-NOT: remark
+; ERR: remark: <unknown>:0:0: unable to lower arguments: void ({ i8, i32 })* (in function: void_func_struct_i8_i32)
+; ERR-NOT: remark
+
+define void @void_func_i1(i1 %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_i1
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store 1 into `i1 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store i1 %arg0, i1 addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_i1_zeroext
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1)
+ ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]]
+ ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ %ext = zext i1 %arg0 to i32
+ %add = add i32 %ext, 12
+ store i32 %add, i32 addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_i1_signext(i1 signext %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_i1_signext
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1)
+ ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]]
+ ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ %ext = sext i1 %arg0 to i32
+ %add = add i32 %ext, 12
+ store i32 %add, i32 addrspace(1)* undef
+ ret void
+}
+
+define void @i1_arg_i1_use(i1 %arg) #0 {
+ ; CHECK-LABEL: name: i1_arg_i1_use
+ ; CHECK: bb.1.bb:
+ ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[TRUNC]], [[C]]
+ ; CHECK: [[INT:%[0-9]+]]:_(s1), [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), [[XOR]](s1)
+ ; CHECK: G_BRCOND [[INT]](s1), %bb.2
+ ; CHECK: G_BR %bb.3
+ ; CHECK: bb.2.bb1:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: G_STORE [[C1]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: bb.3.bb2:
+ ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s64)
+ ; CHECK: S_ENDPGM 0
+bb:
+ br i1 %arg, label %bb2, label %bb1
+
+bb1:
+ store volatile i32 0, i32 addrspace(1)* undef
+ br label %bb2
+
+bb2:
+ ret void
+}
+
+define void @void_func_i8(i8 %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_i8
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[TRUNC]](s8), [[DEF]](p1) :: (store 1 into `i8 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store i8 %arg0, i8 addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_i8_zeroext
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)
+ ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]]
+ ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ %ext = zext i8 %arg0 to i32
+ %add = add i32 %ext, 12
+ store i32 %add, i32 addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_i8_signext(i8 signext %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_i8_signext
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8)
+ ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]]
+ ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ %ext = sext i8 %arg0 to i32
+ %add = add i32 %ext, 12
+ store i32 %add, i32 addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_i16(i16 %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_i16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store 2 into `i16 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store i16 %arg0, i16 addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_i16_zeroext
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
+ ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]]
+ ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ %ext = zext i16 %arg0 to i32
+ %add = add i32 %ext, 12
+ store i32 %add, i32 addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_i16_signext(i16 signext %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_i16_signext
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
+ ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]]
+ ; CHECK: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ %ext = sext i16 %arg0 to i32
+ %add = add i32 %ext, 12
+ store i32 %add, i32 addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_i32(i32 %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_i32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store i32 %arg0, i32 addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_i64(i64 %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_i64
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[MV]](s64), [[DEF]](p1) :: (store 8 into `i64 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store i64 %arg0, i64 addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_f16(half %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_f16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store 2 into `half addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store half %arg0, half addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_f32(float %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_f32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `float addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store float %arg0, float addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_f64(double %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_f64
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[MV]](s64), [[DEF]](p1) :: (store 8 into `double addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store double %arg0, double addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v2i32(<2 x i32> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v2i32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store 8 into `<2 x i32> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <2 x i32> %arg0, <2 x i32> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v3i32(<3 x i32> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v3i32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store 12 into `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v4i32(<4 x i32> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v4i32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x i32> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <4 x i32> %arg0, <4 x i32> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v5i32(<5 x i32> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v5i32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store 20 into `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <5 x i32> %arg0, <5 x i32> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v8i32(<8 x i32> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v8i32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store 32 into `<8 x i32> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <8 x i32> %arg0, <8 x i32> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v16i32(<16 x i32> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v16i32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store 64 into `<16 x i32> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <16 x i32> %arg0, <16 x i32> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v32i32(<32 x i32> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v32i32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
+ ret void
+}
+
+; 1 over register limit
+define void @void_func_v33i32(<33 x i32> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v33i32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[LOAD]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store 132 into `<33 x i32> addrspace(1)* undef`, align 256, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <33 x i32> %arg0, <33 x i32> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v2i64(<2 x i64> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v2i64
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<2 x i64> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <2 x i64> %arg0, <2 x i64> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v3i64(<3 x i64> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v3i64
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store 24 into `<3 x i64> addrspace(1)* undef`, align 32, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <3 x i64> %arg0, <3 x i64> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v4i64(<4 x i64> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v4i64
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store 32 into `<4 x i64> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <4 x i64> %arg0, <4 x i64> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v5i64(<5 x i64> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v5i64
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store 40 into `<5 x i64> addrspace(1)* undef`, align 64, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <5 x i64> %arg0, <5 x i64> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v8i64(<8 x i64> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v8i64
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store 64 into `<8 x i64> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <8 x i64> %arg0, <8 x i64> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v16i64(<16 x i64> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v16i64
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; CHECK: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+ ; CHECK: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
+ ; CHECK: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
+ ; CHECK: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
+ ; CHECK: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32)
+ ; CHECK: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32)
+ ; CHECK: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32)
+ ; CHECK: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[COPY31]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store 128 into `<16 x i64> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <16 x i64> %arg0, <16 x i64> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v2i16(<2 x i16> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v2i16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[TRUNC]](<2 x s16>), [[DEF]](p1) :: (store 4 into `<2 x i16> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <2 x i16> %arg0, <2 x i16> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v3i16(<3 x i16> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v3i16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[TRUNC]](<3 x s16>), [[DEF]](p1) :: (store 6 into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v4i16(<4 x i16> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v4i16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[TRUNC]](<4 x s16>), [[DEF]](p1) :: (store 8 into `<4 x i16> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <4 x i16> %arg0, <4 x i16> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v5i16(<5 x i16> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v5i16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(<5 x s16>) = G_TRUNC [[BUILD_VECTOR]](<5 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[TRUNC]](<5 x s16>), [[DEF]](p1) :: (store 10 into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v8i16(<8 x i16> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v8i16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[TRUNC]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<8 x i16> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <8 x i16> %arg0, <8 x i16> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v16i16(<16 x i16> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v16i16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s16>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[TRUNC]](<16 x s16>), [[DEF]](p1) :: (store 32 into `<16 x i16> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <16 x i16> %arg0, <16 x i16> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v2f32(<2 x float> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v2f32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store 8 into `<2 x float> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <2 x float> %arg0, <2 x float> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v3f32(<3 x float> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v3f32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store 12 into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <3 x float> %arg0, <3 x float> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v4f32(<4 x float> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v4f32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <4 x float> %arg0, <4 x float> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v8f32(<8 x float> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v8f32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store 32 into `<8 x float> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <8 x float> %arg0, <8 x float> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v16f32(<16 x float> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v16f32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store 64 into `<16 x float> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <16 x float> %arg0, <16 x float> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v2f64(<2 x double> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v2f64
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store 16 into `<2 x double> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <2 x double> %arg0, <2 x double> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v3f64(<3 x double> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v3f64
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store 24 into `<3 x double> addrspace(1)* undef`, align 32, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <3 x double> %arg0, <3 x double> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v4f64(<4 x double> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v4f64
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store 32 into `<4 x double> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <4 x double> %arg0, <4 x double> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v8f64(<8 x double> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v8f64
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store 64 into `<8 x double> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <8 x double> %arg0, <8 x double> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v16f64(<16 x double> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v16f64
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; CHECK: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+ ; CHECK: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
+ ; CHECK: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
+ ; CHECK: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
+ ; CHECK: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY24]](s32), [[COPY25]](s32)
+ ; CHECK: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY26]](s32), [[COPY27]](s32)
+ ; CHECK: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32)
+ ; CHECK: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[COPY31]](s32)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store 128 into `<16 x double> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <16 x double> %arg0, <16 x double> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v2f16(<2 x half> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v2f16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[TRUNC]](<2 x s16>), [[DEF]](p1) :: (store 4 into `<2 x half> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <2 x half> %arg0, <2 x half> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v3f16(<3 x half> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v3f16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[TRUNC]](<3 x s16>), [[DEF]](p1) :: (store 6 into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <3 x half> %arg0, <3 x half> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v4f16(<4 x half> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v4f16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[TRUNC]](<4 x s16>), [[DEF]](p1) :: (store 8 into `<4 x half> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <4 x half> %arg0, <4 x half> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v8f16(<8 x half> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v8f16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(<8 x s16>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[TRUNC]](<8 x s16>), [[DEF]](p1) :: (store 16 into `<8 x half> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <8 x half> %arg0, <8 x half> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v16f16(<16 x half> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v16f16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s16>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[TRUNC]](<16 x s16>), [[DEF]](p1) :: (store 32 into `<16 x half> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store <16 x half> %arg0, <16 x half> addrspace(1)* undef
+ ret void
+}
+
+; Make sure there is no alignment requirement for passed vgprs.
+define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 {
+ ; CHECK-LABEL: name: void_func_i32_i64_i32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[MV]](s64), [[DEF1]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store volatile i32 %arg0, i32 addrspace(1)* undef
+ store volatile i64 %arg1, i64 addrspace(1)* undef
+ store volatile i32 %arg2, i32 addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_struct_i32({ i32 } %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_struct_i32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `{ i32 } addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store { i32 } %arg0, { i32 } addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_struct_i8_i32
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: bb.1 (%ir-block.0):
+ store { i8, i32 } %arg0, { i8, i32 } addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_byval_struct_i8_i32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (load 1 from %ir.arg0, align 4, addrspace 5)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; CHECK: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[LOAD]], [[C]](s32)
+ ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (load 4 from %ir.arg0 + 4, addrspace 5)
+ ; CHECK: G_STORE [[LOAD1]](s8), [[DEF]](p1) :: (store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK: [[GEP1:%[0-9]+]]:_(p1) = G_GEP [[DEF]], [[C1]](s64)
+ ; CHECK: G_STORE [[LOAD2]](s32), [[GEP1]](p1) :: (store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ %arg0.load = load { i8, i32 }, { i8, i32 } addrspace(5)* %arg0
+ store { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval %arg0, { i8, i32 } addrspace(5)* byval %arg1, i32 %arg2) #0 {
+ ; CHECK-LABEL: name: void_func_byval_struct_i8_i32_x2
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+ ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5)
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
+ ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (volatile load 1 from %ir.arg0, align 4, addrspace 5)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; CHECK: [[GEP:%[0-9]+]]:_(p5) = G_GEP [[LOAD]], [[C]](s32)
+ ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p5) :: (volatile load 4 from %ir.arg0 + 4, addrspace 5)
+ ; CHECK: [[LOAD4:%[0-9]+]]:_(s8) = G_LOAD [[LOAD1]](p5) :: (volatile load 1 from %ir.arg1, align 4, addrspace 5)
+ ; CHECK: [[GEP1:%[0-9]+]]:_(p5) = G_GEP [[LOAD1]], [[C]](s32)
+ ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p5) :: (volatile load 4 from %ir.arg1 + 4, addrspace 5)
+ ; CHECK: G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK: [[GEP2:%[0-9]+]]:_(p1) = G_GEP [[DEF]], [[C1]](s64)
+ ; CHECK: G_STORE [[LOAD3]](s32), [[GEP2]](p1) :: (volatile store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
+ ; CHECK: G_STORE [[LOAD4]](s8), [[DEF]](p1) :: (volatile store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
+ ; CHECK: [[GEP3:%[0-9]+]]:_(p1) = G_GEP [[DEF]], [[C1]](s64)
+ ; CHECK: G_STORE [[LOAD5]](s32), [[GEP3]](p1) :: (volatile store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
+ ; CHECK: G_STORE [[COPY]](s32), [[DEF1]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3)
+ ; CHECK: S_ENDPGM 0
+ %arg0.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg0
+ %arg1.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg1
+ store volatile { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef
+ store volatile { i8, i32 } %arg1.load, { i8, i32 } addrspace(1)* undef
+ store volatile i32 %arg2, i32 addrspace(3)* undef
+ ret void
+}
+
+define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval %arg0, i64 addrspace(5)* byval %arg1) #0 {
+ ; CHECK-LABEL: name: void_func_byval_i32_byval_i64
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+ ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p5) :: (load 4 from %ir.arg0, addrspace 5)
+ ; CHECK: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[LOAD1]](p5) :: (load 8 from %ir.arg1, addrspace 5)
+ ; CHECK: G_STORE [[LOAD2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[LOAD3]](s64), [[DEF1]](p1) :: (store 8 into `i64 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ %arg0.load = load i32, i32 addrspace(5)* %arg0
+ %arg1.load = load i64, i64 addrspace(5)* %arg1
+ store i32 %arg0.load, i32 addrspace(1)* undef
+ store i64 %arg1.load, i64 addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 {
+ ; CHECK-LABEL: name: void_func_v32i32_i32_i64
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.2, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+ ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD1]](s32), [[LOAD2]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF2:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[LOAD]](s32), [[DEF1]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[MV]](s64), [[DEF2]](p1) :: (volatile store 8 into `i64 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
+ store volatile i32 %arg1, i32 addrspace(1)* undef
+ store volatile i64 %arg2, i64 addrspace(1)* undef
+ ret void
+}
+
+; FIXME: Different ext load types on CI vs. VI
+define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4) #0 {
+ ; CHECK-LABEL: name: void_func_v32i32_i1_i8_i16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 1 from %fixed-stack.3, addrspace 5)
+ ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+ ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 1 from %fixed-stack.2, addrspace 5)
+ ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+ ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 2 from %fixed-stack.1, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 2 from %fixed-stack.0, align 1, addrspace 5)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF2:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF3:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF4:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[LOAD]](s1), [[DEF1]](p1) :: (volatile store 1 into `i1 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[LOAD1]](s8), [[DEF2]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[LOAD2]](s16), [[DEF3]](p1) :: (volatile store 2 into `i16 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[LOAD3]](s16), [[DEF4]](p1) :: (volatile store 2 into `half addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
+ store volatile i1 %arg1, i1 addrspace(1)* undef
+ store volatile i8 %arg2, i8 addrspace(1)* undef
+ store volatile i16 %arg3, i16 addrspace(1)* undef
+ store volatile half %arg4, half addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 {
+ ; CHECK-LABEL: name: void_func_v32i32_v2i32_v2f32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.3, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+ ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.2, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+ ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)
+ ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+ ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD2]](s32), [[LOAD3]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF2:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[DEF1]](p1) :: (volatile store 8 into `<2 x i32> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[DEF2]](p1) :: (volatile store 8 into `<2 x float> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
+ store volatile <2 x i32> %arg1, <2 x i32> addrspace(1)* undef
+ store volatile <2 x float> %arg2, <2 x float> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2) #0 {
+ ; CHECK-LABEL: name: void_func_v32i32_v2i16_v2f16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.3, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+ ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.2, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+ ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)
+ ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>)
+ ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD2]](s32), [[LOAD3]](s32)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF2:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[TRUNC]](<2 x s16>), [[DEF1]](p1) :: (volatile store 4 into `<2 x i16> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[TRUNC1]](<2 x s16>), [[DEF2]](p1) :: (volatile store 4 into `<2 x half> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
+ store volatile <2 x i16> %arg1, <2 x i16> addrspace(1)* undef
+ store volatile <2 x half> %arg2, <2 x half> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 {
+ ; CHECK-LABEL: name: void_func_v32i32_v2i64_v2f64
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.7, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
+ ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.6, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
+ ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.5, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
+ ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.4, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+ ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.3, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+ ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.2, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+ ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+ ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
+ ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
+ ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
+ ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32)
+ ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF2:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[DEF1]](p1) :: (volatile store 16 into `<2 x i64> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[DEF2]](p1) :: (volatile store 16 into `<2 x double> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
+ store volatile <2 x i64> %arg1, <2 x i64> addrspace(1)* undef
+ store volatile <2 x double> %arg2, <2 x double> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 {
+ ; CHECK-LABEL: name: void_func_v32i32_v4i32_v4f32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.7, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
+ ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.6, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
+ ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.5, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
+ ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.4, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+ ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.3, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+ ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.2, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+ ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)
+ ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32)
+ ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF2:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[DEF1]](p1) :: (volatile store 16 into `<4 x i32> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[DEF2]](p1) :: (volatile store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
+ store volatile <4 x i32> %arg1, <4 x i32> addrspace(1)* undef
+ store volatile <4 x float> %arg2, <4 x float> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 {
+ ; CHECK-LABEL: name: void_func_v32i32_v8i32_v8f32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.15, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14
+ ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.14, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13
+ ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.13, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12
+ ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.12, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11
+ ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.11, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10
+ ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.10, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9
+ ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.9, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
+ ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.8, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
+ ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 4 from %fixed-stack.7, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
+ ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load 4 from %fixed-stack.6, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
+ ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load 4 from %fixed-stack.5, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
+ ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load 4 from %fixed-stack.4, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+ ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load 4 from %fixed-stack.3, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+ ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load 4 from %fixed-stack.2, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+ ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)
+ ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
+ ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF2:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[DEF1]](p1) :: (volatile store 32 into `<8 x i32> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[DEF2]](p1) :: (volatile store 32 into `<8 x float> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
+ store volatile <8 x i32> %arg1, <8 x i32> addrspace(1)* undef
+ store volatile <8 x float> %arg2, <8 x float> addrspace(1)* undef
+ ret void
+}
+
+define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 {
+ ; CHECK-LABEL: name: void_func_v32i32_v16i32_v16f32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.31, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30
+ ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.30, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29
+ ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.29, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28
+ ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.28, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27
+ ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.27, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26
+ ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.26, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25
+ ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.25, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24
+ ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.24, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23
+ ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 4 from %fixed-stack.23, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22
+ ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load 4 from %fixed-stack.22, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21
+ ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load 4 from %fixed-stack.21, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20
+ ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load 4 from %fixed-stack.20, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19
+ ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load 4 from %fixed-stack.19, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18
+ ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load 4 from %fixed-stack.18, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17
+ ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.17, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16
+ ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.16, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15
+ ; CHECK: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load 4 from %fixed-stack.15, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14
+ ; CHECK: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load 4 from %fixed-stack.14, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13
+ ; CHECK: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load 4 from %fixed-stack.13, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12
+ ; CHECK: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load 4 from %fixed-stack.12, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11
+ ; CHECK: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load 4 from %fixed-stack.11, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10
+ ; CHECK: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load 4 from %fixed-stack.10, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9
+ ; CHECK: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load 4 from %fixed-stack.9, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
+ ; CHECK: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load 4 from %fixed-stack.8, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
+ ; CHECK: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load 4 from %fixed-stack.7, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
+ ; CHECK: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load 4 from %fixed-stack.6, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
+ ; CHECK: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load 4 from %fixed-stack.5, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
+ ; CHECK: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load 4 from %fixed-stack.4, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+ ; CHECK: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load 4 from %fixed-stack.3, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+ ; CHECK: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load 4 from %fixed-stack.2, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+ ; CHECK: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)
+ ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32)
+ ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD16]](s32), [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32), [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32), [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32), [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF2:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[DEF1]](p1) :: (volatile store 64 into `<16 x i32> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[DEF2]](p1) :: (volatile store 64 into `<16 x float> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
+ store volatile <16 x i32> %arg1, <16 x i32> addrspace(1)* undef
+ store volatile <16 x float> %arg2, <16 x float> addrspace(1)* undef
+ ret void
+}
+
+; Make sure v3 isn't a wasted register because of v3 types being promoted to v4
+define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 {
+ ; CHECK-LABEL: name: void_func_v3f32_wasted_reg
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
+ ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32)
+ ; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32)
+ ; CHECK: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32)
+ ; CHECK: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store 4 into `float addrspace(3)* undef`, addrspace 3)
+ ; CHECK: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store 4 into `float addrspace(3)* undef`, addrspace 3)
+ ; CHECK: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store 4 into `float addrspace(3)* undef`, addrspace 3)
+ ; CHECK: G_STORE [[COPY3]](s32), [[DEF1]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3)
+ ; CHECK: S_ENDPGM 0
+ %arg0.0 = extractelement <3 x float> %arg0, i32 0
+ %arg0.1 = extractelement <3 x float> %arg0, i32 1
+ %arg0.2 = extractelement <3 x float> %arg0, i32 2
+ store volatile float %arg0.0, float addrspace(3)* undef
+ store volatile float %arg0.1, float addrspace(3)* undef
+ store volatile float %arg0.2, float addrspace(3)* undef
+ store volatile i32 %arg1, i32 addrspace(3)* undef
+ ret void
+}
+
+define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 {
+ ; CHECK-LABEL: name: void_func_v3i32_wasted_reg
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
+ ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32)
+ ; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32)
+ ; CHECK: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32)
+ ; CHECK: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3)
+ ; CHECK: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3)
+ ; CHECK: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3)
+ ; CHECK: G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3)
+ ; CHECK: S_ENDPGM 0
+ %arg0.0 = extractelement <3 x i32> %arg0, i32 0
+ %arg0.1 = extractelement <3 x i32> %arg0, i32 1
+ %arg0.2 = extractelement <3 x i32> %arg0, i32 2
+ store volatile i32 %arg0.0, i32 addrspace(3)* undef
+ store volatile i32 %arg0.1, i32 addrspace(3)* undef
+ store volatile i32 %arg0.2, i32 addrspace(3)* undef
+ store volatile i32 %arg1, i32 addrspace(3)* undef
+ ret void
+}
+
+; Check there is no crash.
+define void @void_func_v16i8(<16 x i8> %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_v16i8
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[TRUNC]](<16 x s8>), [[DEF]](p1) :: (volatile store 16 into `<16 x i8> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store volatile <16 x i8> %arg0, <16 x i8> addrspace(1)* undef
+ ret void
+}
+
+; Check there is no crash.
+define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
+ ; CHECK-LABEL: name: void_func_v32i32_v16i8
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24
+ ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25
+ ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26
+ ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27
+ ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28
+ ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29
+ ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30
+ ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.15, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14
+ ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.14, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13
+ ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.13, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12
+ ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.12, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11
+ ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.11, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10
+ ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.10, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9
+ ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.9, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8
+ ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.8, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7
+ ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 4 from %fixed-stack.7, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6
+ ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load 4 from %fixed-stack.6, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5
+ ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load 4 from %fixed-stack.5, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4
+ ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load 4 from %fixed-stack.4, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3
+ ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load 4 from %fixed-stack.3, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
+ ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load 4 from %fixed-stack.2, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
+ ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.1, align 1, addrspace 5)
+ ; CHECK: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
+ ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.0, align 1, addrspace 5)
+ ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32)
+ ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<16 x s32>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store 128 into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[TRUNC]](<16 x s8>), [[DEF1]](p1) :: (volatile store 16 into `<16 x i8> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: S_ENDPGM 0
+ store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
+ store volatile <16 x i8> %arg1, <16 x i8> addrspace(1)* undef
+ ret void
+}
+
+attributes #0 = { nounwind }
More information about the llvm-commits
mailing list