[llvm] ac0b9b4 - AMDPGPU/GlobalISel: Select more MUBUF global addressing modes
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 27 07:28:49 PST 2020
Author: Matt Arsenault
Date: 2020-01-27T07:28:36-08:00
New Revision: ac0b9b4ccf3e356061f66f54b99588bc71071e73
URL: https://github.com/llvm/llvm-project/commit/ac0b9b4ccf3e356061f66f54b99588bc71071e73
DIFF: https://github.com/llvm/llvm-project/commit/ac0b9b4ccf3e356061f66f54b99588bc71071e73.diff
LOG: AMDPGPU/GlobalISel: Select more MUBUF global addressing modes
The handling of the high bits of the resource descriptor seem weird to
me, where the 3rd dword changes based on the instruction.
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 5cbbc283821f..a02555e6a7a5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -88,6 +88,9 @@ def gi_mubuf_addr64 :
GIComplexOperandMatcher<s64, "selectMUBUFAddr64">,
GIComplexPatternEquiv<MUBUFAddr64>;
+def gi_mubuf_offset :
+ GIComplexOperandMatcher<s64, "selectMUBUFOffset">,
+ GIComplexPatternEquiv<MUBUFOffset>;
// Separate load nodes are defined to glue m0 initialization in
// SelectionDAG. The GISel selector can just insert m0 initialization
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index c580e72aefbb..4b972ce9b03e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2552,32 +2552,51 @@ AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
}};
}
+/// If \p Root is a G_PTR_ADD with a G_CONSTANT on the right hand side, return
+/// the base value with the constant offset. There may be intervening copies
+/// between \p Root and the identified constant. Returns \p Root, 0 if this does
+/// not match the pattern.
+std::pair<Register, int64_t>
+AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
+ Register Root, const MachineRegisterInfo &MRI) const {
+ MachineInstr *RootI = MRI.getVRegDef(Root);
+ if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD)
+ return {Root, 0};
+
+ MachineOperand &RHS = RootI->getOperand(2);
+ Optional<ValueAndVReg> MaybeOffset
+ = getConstantVRegValWithLookThrough(RHS.getReg(), MRI, true);
+ if (!MaybeOffset)
+ return {Root, 0};
+ return {RootI->getOperand(1).getReg(), MaybeOffset->Value};
+}
+
static void addZeroImm(MachineInstrBuilder &MIB) {
MIB.addImm(0);
}
/// Return a resource descriptor for use with an arbitrary 64-bit pointer. If \p
-/// BasePtr is not valid, a null base pointer will be ussed.
-static Register buildRSrc(MachineInstr *MI, MachineRegisterInfo &MRI,
- const SIInstrInfo &TII, Register BasePtr) {
+/// BasePtr is not valid, a null base pointer will be used.
+static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ uint32_t FormatLo, uint32_t FormatHi,
+ Register BasePtr) {
Register RSrc2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
Register RSrc3 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
Register RSrcHi = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
Register RSrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
- const DebugLoc &DL = MI->getDebugLoc();
- MachineBasicBlock *BB = MI->getParent();
-
- // TODO: Try to use a real pointer if available.
- BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_MOV_B32), RSrc2)
- .addImm(0);
- BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_MOV_B32), RSrc3)
- .addImm(TII.getDefaultRsrcDataFormat() >> 32);
+ B.buildInstr(AMDGPU::S_MOV_B32)
+ .addDef(RSrc2)
+ .addImm(FormatLo);
+ B.buildInstr(AMDGPU::S_MOV_B32)
+ .addDef(RSrc3)
+ .addImm(FormatHi);
// Build the half of the subregister with the constants before building the
// full 128-bit register. If we are building multiple resource descriptors,
// this will allow CSEing of the 2-component register.
- BuildMI(*BB, MI, DL, TII.get(AMDGPU::REG_SEQUENCE), RSrcHi)
+ B.buildInstr(AMDGPU::REG_SEQUENCE)
+ .addDef(RSrcHi)
.addReg(RSrc2)
.addImm(AMDGPU::sub0)
.addReg(RSrc3)
@@ -2586,11 +2605,13 @@ static Register buildRSrc(MachineInstr *MI, MachineRegisterInfo &MRI,
Register RSrcLo = BasePtr;
if (!BasePtr) {
RSrcLo = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_MOV_B64), RSrcLo)
+ B.buildInstr(AMDGPU::S_MOV_B64)
+ .addDef(RSrcLo)
.addImm(0);
}
- BuildMI(*BB, MI, DL, TII.get(AMDGPU::REG_SEQUENCE), RSrc)
+ B.buildInstr(AMDGPU::REG_SEQUENCE)
+ .addDef(RSrc)
.addReg(RSrcLo)
.addImm(AMDGPU::sub0_sub1)
.addReg(RSrcHi)
@@ -2599,6 +2620,82 @@ static Register buildRSrc(MachineInstr *MI, MachineRegisterInfo &MRI,
return RSrc;
}
+static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ const SIInstrInfo &TII, Register BasePtr) {
+ uint64_t DefaultFormat = TII.getDefaultRsrcDataFormat();
+
+ // FIXME: Why are half the "default" bits ignored based on the addressing
+ // mode?
+ return buildRSRC(B, MRI, 0, Hi_32(DefaultFormat), BasePtr);
+}
+
+static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ const SIInstrInfo &TII, Register BasePtr) {
+ uint64_t DefaultFormat = TII.getDefaultRsrcDataFormat();
+
+ // FIXME: Why are half the "default" bits ignored based on the addressing
+ // mode?
+ return buildRSRC(B, MRI, -1, Hi_32(DefaultFormat), BasePtr);
+}
+
+AMDGPUInstructionSelector::MUBUFAddressData
+AMDGPUInstructionSelector::parseMUBUFAddress(Register Src) const {
+ MUBUFAddressData Data;
+ Data.N0 = Src;
+
+ Register PtrBase;
+ int64_t Offset;
+
+ std::tie(PtrBase, Offset) = getPtrBaseWithConstantOffset(Src, *MRI);
+ if (isUInt<32>(Offset)) {
+ Data.N0 = PtrBase;
+ Data.Offset = Offset;
+ }
+
+ if (MachineInstr *InputAdd
+ = getOpcodeDef(TargetOpcode::G_PTR_ADD, Data.N0, *MRI)) {
+ Data.N2 = InputAdd->getOperand(1).getReg();
+ Data.N3 = InputAdd->getOperand(2).getReg();
+
+ // FIXME: Need to fix extra SGPR->VGPRcopies inserted
+ // FIXME: Don't know this was defined by operand 0
+ //
+ // TODO: Remove this when we have copy folding optimizations after
+ // RegBankSelect.
+ Data.N2 = getDefIgnoringCopies(Data.N2, *MRI)->getOperand(0).getReg();
+ Data.N3 = getDefIgnoringCopies(Data.N3, *MRI)->getOperand(0).getReg();
+ }
+
+ return Data;
+}
+
+/// Return if the addr64 mubuf mode should be used for the given address.
+bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr) const {
+ // (ptr_add N2, N3) -> addr64, or
+ // (ptr_add (ptr_add N2, N3), C1) -> addr64
+ if (Addr.N2)
+ return true;
+
+ const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
+ return N0Bank->getID() == AMDGPU::VGPRRegBankID;
+}
+
+/// Split an immediate offset \p ImmOffset depending on whether it fits in the
+/// immediate field. Modifies \p ImmOffset and sets \p SOffset to the variable
+/// component.
+void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
+ MachineIRBuilder &B, Register &SOffset, int64_t &ImmOffset) const {
+ if (SIInstrInfo::isLegalMUBUFImmOffset(ImmOffset))
+ return;
+
+ // Illegal offset, store it in soffset.
+ SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ B.buildInstr(AMDGPU::S_MOV_B32)
+ .addDef(SOffset)
+ .addImm(ImmOffset);
+ ImmOffset = 0;
+}
+
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectMUBUFAddr64(MachineOperand &Root) const {
// FIXME: Predicates should stop this from reaching here.
@@ -2606,22 +2703,108 @@ AMDGPUInstructionSelector::selectMUBUFAddr64(MachineOperand &Root) const {
if (!STI.hasAddr64() || STI.useFlatForGlobal())
return {};
- MachineInstr *MI = MRI->getVRegDef(Root.getReg());
- Register VAddr = Root.getReg();
- int64_t Offset = 0;
+ MUBUFAddressData AddrData = parseMUBUFAddress(Root.getReg());
+ if (!shouldUseAddr64(AddrData))
+ return {};
+
+ Register N0 = AddrData.N0;
+ Register N2 = AddrData.N2;
+ Register N3 = AddrData.N3;
+ int64_t Offset = AddrData.Offset;
+
+ // VGPR pointer
+ Register VAddr;
+
+ // SGPR offset.
+ Register SOffset;
+
+ // Base pointer for the SRD.
+ Register SRDPtr;
+
+ if (N2) {
+ if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
+ assert(N3);
+ if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
+ // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
+ // addr64, and construct the default resource from a 0 address.
+ VAddr = N0;
+ } else {
+ SRDPtr = N3;
+ VAddr = N2;
+ }
+ } else {
+ // N2 is not divergent.
+ SRDPtr = N2;
+ VAddr = N3;
+ }
+ } else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
+ // Use the default null pointer in the resource
+ VAddr = N0;
+ } else {
+ // N0 -> offset, or
+ // (N0 + C1) -> offset
+ SRDPtr = N0;
+ }
- // TODO: Attempt to use addressing modes. We need to look back through regbank
- // copies to find a 64-bit SGPR base and VGPR offset.
+ MachineIRBuilder B(*Root.getParent());
+ Register RSrcReg = buildAddr64RSrc(B, *MRI, TII, SRDPtr);
+ splitIllegalMUBUFOffset(B, SOffset, Offset);
// FIXME: Use defaulted operands for trailing 0s and remove from the complex
// pattern.
return {{
[=](MachineInstrBuilder &MIB) { // rsrc
- MIB.addReg(buildRSrc(MI, *MRI, TII, Register()));
+ MIB.addReg(RSrcReg);
+ },
+ [=](MachineInstrBuilder &MIB) { // vaddr
+ MIB.addReg(VAddr);
+ },
+ [=](MachineInstrBuilder &MIB) { // soffset
+ if (SOffset)
+ MIB.addReg(SOffset);
+ else
+ MIB.addImm(0);
+ },
+ [=](MachineInstrBuilder &MIB) { // offset
+ MIB.addImm(Offset);
+ },
+ addZeroImm, // glc
+ addZeroImm, // slc
+ addZeroImm, // tfe
+ addZeroImm, // dlc
+ addZeroImm // swz
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {
+ MUBUFAddressData AddrData = parseMUBUFAddress(Root.getReg());
+ if (shouldUseAddr64(AddrData))
+ return {};
+
+ // N0 -> offset, or
+ // (N0 + C1) -> offset
+ Register SRDPtr = AddrData.N0;
+ int64_t Offset = AddrData.Offset;
+ Register SOffset;
+
+ // TODO: Look through extensions for 32-bit soffset.
+ MachineIRBuilder B(*Root.getParent());
+
+ Register RSrcReg = buildOffsetSrc(B, *MRI, TII, SRDPtr);
+ splitIllegalMUBUFOffset(B, SOffset, Offset);
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { // rsrc
+ MIB.addReg(RSrcReg);
+ },
+ [=](MachineInstrBuilder &MIB) { // soffset
+ if (SOffset)
+ MIB.addReg(SOffset);
+ else
+ MIB.addImm(0);
},
- [=](MachineInstrBuilder &MIB) { MIB.addReg(VAddr); }, // vaddr
- [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }, // soffset
- addZeroImm, // offset
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }, // offset
addZeroImm, // glc
addZeroImm, // slc
addZeroImm, // tfe
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 94019ddf8ff5..1c0db098922c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -180,9 +180,31 @@ class AMDGPUInstructionSelector : public InstructionSelector {
InstructionSelector::ComplexRendererFns
selectDS1Addr1Offset(MachineOperand &Root) const;
+ std::pair<Register, int64_t>
+ getPtrBaseWithConstantOffset(Register Root,
+ const MachineRegisterInfo &MRI) const;
+
+ // Parse out a chain of up to two g_ptr_add instructions.
+ // g_ptr_add (n0, _)
+ // g_ptr_add (n0, (n1 = g_ptr_add n2, n3))
+ struct MUBUFAddressData {
+ Register N0, N2, N3;
+ int64_t Offset = 0;
+ };
+
+ bool shouldUseAddr64(MUBUFAddressData AddrData) const;
+
+ void splitIllegalMUBUFOffset(MachineIRBuilder &B,
+ Register &SOffset, int64_t &ImmOffset) const;
+
+ MUBUFAddressData parseMUBUFAddress(Register Src) const;
+
InstructionSelector::ComplexRendererFns
selectMUBUFAddr64(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
+ selectMUBUFOffset(MachineOperand &Root) const;
+
void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 81cfcd3bbc93..2937946bbf26 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -2497,6 +2497,22 @@ AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI,
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), NumOps);
}
+/// Return the mapping for a pointer arugment.
+const RegisterBankInfo::ValueMapping *
+AMDGPURegisterBankInfo::getValueMappingForPtr(const MachineRegisterInfo &MRI,
+ Register PtrReg) const {
+ LLT PtrTy = MRI.getType(PtrReg);
+ unsigned Size = PtrTy.getSizeInBits();
+ if (Subtarget.useFlatForGlobal() ||
+ !SITargetLowering::isFlatGlobalAddrSpace(PtrTy.getAddressSpace()))
+ return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+
+ // If we're using MUBUF instructions for global memory, an SGPR base register
+ // is possible. Otherwise this needs to be a VGPR.
+ const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI);
+ return AMDGPU::getValueMapping(PtrBank->getID(), Size);
+}
+
const RegisterBankInfo::InstructionMapping &
AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
@@ -2516,12 +2532,23 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI);
if (PtrBank == &AMDGPU::SGPRRegBank &&
- (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
- AS != AMDGPUAS::PRIVATE_ADDRESS) &&
- isScalarLoadLegal(MI)) {
- // We have a uniform instruction so we want to use an SMRD load
- ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
- PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
+ SITargetLowering::isFlatGlobalAddrSpace(AS)) {
+ if (isScalarLoadLegal(MI)) {
+ // We have a uniform instruction so we want to use an SMRD load
+ ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
+ } else {
+ ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+
+ // If we're using MUBUF instructions for global memory, an SGPR base
+ // register is possible. Otherwise this needs to be a VGPR.
+ unsigned PtrBankID = Subtarget.useFlatForGlobal() ?
+ AMDGPU::VGPRRegBankID : AMDGPU::SGPRRegBankID;
+
+ PtrMapping = AMDGPU::getValueMapping(PtrBankID, PtrSize);
+ ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID,
+ LoadTy);
+ }
} else {
ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy);
PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
@@ -2945,21 +2972,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_STORE: {
assert(MI.getOperand(0).isReg());
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- // FIXME: We need to specify a
diff erent reg bank once scalar stores
- // are supported.
+
+ // FIXME: We need to specify a
diff erent reg bank once scalar stores are
+ // supported.
const ValueMapping *ValMapping =
AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
- // FIXME: Depending on the type of store, the pointer could be in
- // the SGPR Reg bank.
- // FIXME: Pointer size should be based on the address space.
- const ValueMapping *PtrMapping =
- AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
-
OpdsMapping[0] = ValMapping;
- OpdsMapping[1] = PtrMapping;
+ OpdsMapping[1] = getValueMappingForPtr(MRI, MI.getOperand(1).getReg());
break;
}
-
case AMDGPU::G_ICMP: {
auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
@@ -3478,11 +3499,20 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_ATOMICRMW_UMAX:
case AMDGPU::G_ATOMICRMW_UMIN:
case AMDGPU::G_ATOMICRMW_FADD:
- case AMDGPU::G_ATOMIC_CMPXCHG:
case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG:
case AMDGPU::G_AMDGPU_ATOMIC_INC:
case AMDGPU::G_AMDGPU_ATOMIC_DEC: {
- return getDefaultMappingAllVGPR(MI);
+ OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[1] = getValueMappingForPtr(MRI, MI.getOperand(1).getReg());
+ OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ break;
+ }
+ case AMDGPU::G_ATOMIC_CMPXCHG: {
+ OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[1] = getValueMappingForPtr(MRI, MI.getOperand(1).getReg());
+ OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+ break;
}
case AMDGPU::G_BRCOND: {
unsigned Bank = getRegBankID(MI.getOperand(0).getReg(), MRI, *TRI,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index 1ac7d3652a8b..e53b0b046d77 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -91,6 +91,9 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
/// See RegisterBankInfo::applyMapping.
void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
+ const ValueMapping *getValueMappingForPtr(const MachineRegisterInfo &MRI,
+ Register Ptr) const;
+
const RegisterBankInfo::InstructionMapping &
getInstrMappingForLoad(const MachineInstr &MI) const;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir
index ed7007fe5818..deb59ffa10c4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir
@@ -17,22 +17,22 @@ body: |
; GFX6-LABEL: name: load_atomic_global_s32_seq_cst
; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-LABEL: name: load_atomic_global_s32_seq_cst
; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst
@@ -138,22 +138,22 @@ body: |
; GFX6-LABEL: name: load_atomic_global_s64_seq_cst
; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
; GFX7-LABEL: name: load_atomic_global_s64_seq_cst
; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX7: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst
@@ -337,19 +337,19 @@ body: |
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
+ ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
; GFX7: liveins: $vgpr0_vgpr1
@@ -357,19 +357,19 @@ body: |
; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
+ ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -423,42 +423,22 @@ body: |
; GFX6-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
; GFX6: liveins: $vgpr0_vgpr1
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
- ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
- ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
; GFX7-FLAT: liveins: $vgpr0_vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
index 3bbc8f0e016d..25e5790c06cb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
@@ -22,22 +22,22 @@ body: |
; GFX6-LABEL: name: load_global_s32_from_4
; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_4
; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_4
@@ -80,22 +80,22 @@ body: |
; GFX6-LABEL: name: load_global_s32_from_2
; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_2
; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_2
@@ -138,22 +138,22 @@ body: |
; GFX6-LABEL: name: load_global_s32_from_1
; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1
; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1
@@ -196,22 +196,22 @@ body: |
; GFX6-LABEL: name: load_global_v2s32
; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
; GFX7-LABEL: name: load_global_v2s32
; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX7: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_v2s32
@@ -254,22 +254,22 @@ body: |
; GFX6-LABEL: name: load_global_v3s32
; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[BUFFER_LOAD_DWORDX3_ADDR64_:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[BUFFER_LOAD_DWORDX3_ADDR64_]]
; GFX7-LABEL: name: load_global_v3s32
; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[BUFFER_LOAD_DWORDX3_ADDR64_:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUFFER_LOAD_DWORDX3_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_v3s32
@@ -312,22 +312,22 @@ body: |
; GFX6-LABEL: name: load_global_v4s32
; GFX6: liveins: $vgpr0_vgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
; GFX7-LABEL: name: load_global_v4s32
; GFX7: liveins: $vgpr0_vgpr1
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_v4s32
@@ -992,42 +992,22 @@ body: |
; GFX6-LABEL: name: load_global_s32_from_1_gep_2047
; GFX6: liveins: $vgpr0_vgpr1
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
- ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_2047
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
- ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2047
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -1092,42 +1072,22 @@ body: |
; GFX6-LABEL: name: load_global_s32_from_1_gep_2048
; GFX6: liveins: $vgpr0_vgpr1
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
- ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_2048
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
- ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2048
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -1205,19 +1165,19 @@ body: |
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_m2047
; GFX7: liveins: $vgpr0_vgpr1
@@ -1225,19 +1185,19 @@ body: |
; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec
; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2047
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -1305,19 +1265,19 @@ body: |
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_m2048
; GFX7: liveins: $vgpr0_vgpr1
@@ -1325,19 +1285,19 @@ body: |
; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2048
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -1402,42 +1362,22 @@ body: |
; GFX6-LABEL: name: load_global_s32_from_1_gep_4095
; GFX6: liveins: $vgpr0_vgpr1
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
- ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_4095
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
- ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4095
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -1512,42 +1452,24 @@ body: |
; GFX6-LABEL: name: load_global_s32_from_1_gep_4096
; GFX6: liveins: $vgpr0_vgpr1
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
- ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_4096
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
- ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4096
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -1635,19 +1557,19 @@ body: |
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_m4095
; GFX7: liveins: $vgpr0_vgpr1
@@ -1655,19 +1577,19 @@ body: |
; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec
; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4095
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -1745,19 +1667,19 @@ body: |
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_m4096
; GFX7: liveins: $vgpr0_vgpr1
@@ -1765,19 +1687,19 @@ body: |
; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4096
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -1852,42 +1774,24 @@ body: |
; GFX6-LABEL: name: load_global_s32_from_1_gep_8191
; GFX6: liveins: $vgpr0_vgpr1
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
- ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_8191
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
- ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8191
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -1972,42 +1876,24 @@ body: |
; GFX6-LABEL: name: load_global_s32_from_1_gep_8192
; GFX6: liveins: $vgpr0_vgpr1
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
- ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_8192
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
- ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
- ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8192
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -2095,19 +1981,19 @@ body: |
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_m8191
; GFX7: liveins: $vgpr0_vgpr1
@@ -2115,19 +2001,19 @@ body: |
; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec
; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8191
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@@ -2215,19 +2101,19 @@ body: |
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
- ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+ ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
+ ; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_m8192
; GFX7: liveins: $vgpr0_vgpr1
@@ -2235,19 +2121,19 @@ body: |
; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec
; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
- ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
- ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+ ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
+ ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+ ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
+ ; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8192
; GFX7-FLAT: liveins: $vgpr0_vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
index 04735c7c5ac9..49a22c1ca01e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
@@ -19,23 +19,23 @@ body: |
; GFX6-LABEL: name: store_global_s32_to_4
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX7-LABEL: name: store_global_s32_to_4
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_s32_to_4
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
@@ -76,23 +76,23 @@ body: |
; GFX6-LABEL: name: store_global_s32_to_2
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX6: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
; GFX7-LABEL: name: store_global_s32_to_2
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_s32_to_2
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
@@ -133,23 +133,23 @@ body: |
; GFX6-LABEL: name: store_global_s32_to_1
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX6: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
; GFX7-LABEL: name: store_global_s32_to_1
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_s32_to_1
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
@@ -331,23 +331,23 @@ body: |
; GFX6-LABEL: name: store_global_v2s32
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX6: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX7-LABEL: name: store_global_v2s32
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_v2s32
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
@@ -388,23 +388,23 @@ body: |
; GFX6-LABEL: name: store_global_v3s32
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
; GFX6: BUFFER_STORE_DWORDX3_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
; GFX7-LABEL: name: store_global_v3s32
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
; GFX7: BUFFER_STORE_DWORDX3_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_v3s32
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
@@ -445,23 +445,23 @@ body: |
; GFX6-LABEL: name: store_global_v4s32
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX6: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX6: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; GFX7-LABEL: name: store_global_v4s32
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
+ ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
- ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX7: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_v4s32
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
@@ -1025,42 +1025,22 @@ body: |
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
- ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
- ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX7-LABEL: name: store_global_s32_gep_2047
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
- ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
- ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+ ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
- ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
- ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
- ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
- ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
- ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
- ; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
+ ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_s32_gep_2047
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
index 07f45eaf2b8a..4205f8ab7f8c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
new file mode 100644
index 000000000000..f721baf78351
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
@@ -0,0 +1,1097 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
+
+; Test end to end matching of addressing modes when MUBUF is used for
+; global memory.
+
+define amdgpu_ps void @mubuf_store_sgpr_ptr(i32 addrspace(1)* inreg %ptr) {
+; GFX6-LABEL: mubuf_store_sgpr_ptr:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s0, s2
+; GFX6-NEXT: s_mov_b32 s1, s3
+; GFX6-NEXT: v_mov_b32_e32 v0, 0
+; GFX6-NEXT: s_mov_b32 s2, -1
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: mubuf_store_sgpr_ptr:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s0, s2
+; GFX7-NEXT: s_mov_b32 s1, s3
+; GFX7-NEXT: v_mov_b32_e32 v0, 0
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+ store i32 0, i32 addrspace(1)* %ptr
+ ret void
+}
+
+define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) {
+; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s0, s2
+; GFX6-NEXT: s_mov_b32 s1, s3
+; GFX6-NEXT: v_mov_b32_e32 v0, 0
+; GFX6-NEXT: s_mov_b32 s2, -1
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
+; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], s4
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4095:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s0, s2
+; GFX7-NEXT: s_mov_b32 s1, s3
+; GFX7-NEXT: v_mov_b32_e32 v0, 0
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s4
+; GFX7-NEXT: s_endpgm
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
+ store i32 0, i32 addrspace(1)* %gep
+ ret void
+}
+
+define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr) {
+; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967296:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s4, 0
+; GFX6-NEXT: s_mov_b32 s5, 4
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: s_mov_b32 s0, s2
+; GFX6-NEXT: s_mov_b32 s1, s3
+; GFX6-NEXT: v_mov_b32_e32 v2, 0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b32 s2, s4
+; GFX6-NEXT: v_mov_b32_e32 v1, s5
+; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4294967296:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s4, 0
+; GFX7-NEXT: s_mov_b32 s5, 4
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: s_mov_b32 s0, s2
+; GFX7-NEXT: s_mov_b32 s1, s3
+; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b32 s2, s4
+; GFX7-NEXT: v_mov_b32_e32 v1, s5
+; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GFX7-NEXT: s_endpgm
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
+ store i32 0, i32 addrspace(1)* %gep
+ ret void
+}
+
+define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967297(i32 addrspace(1)* inreg %ptr) {
+; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967297:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s4, 4
+; GFX6-NEXT: s_mov_b32 s5, s4
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: s_mov_b32 s0, s2
+; GFX6-NEXT: s_mov_b32 s1, s3
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: v_mov_b32_e32 v2, 0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: v_mov_b32_e32 v1, s5
+; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4294967297:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s4, 4
+; GFX7-NEXT: s_mov_b32 s5, s4
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: s_mov_b32 s0, s2
+; GFX7-NEXT: s_mov_b32 s1, s3
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: v_mov_b32_e32 v1, s5
+; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GFX7-NEXT: s_endpgm
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967297
+ store i32 0, i32 addrspace(1)* %gep
+ ret void
+}
+
+define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4096(i32 addrspace(1)* inreg %ptr) {
+; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4096:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s0, s2
+; GFX6-NEXT: s_mov_b32 s1, s3
+; GFX6-NEXT: v_mov_b32_e32 v0, 0
+; GFX6-NEXT: s_mov_b32 s2, -1
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_movk_i32 s4, 0x4000
+; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], s4
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4096:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s0, s2
+; GFX7-NEXT: s_mov_b32 s1, s3
+; GFX7-NEXT: v_mov_b32_e32 v0, 0
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_movk_i32 s4, 0x4000
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s4
+; GFX7-NEXT: s_endpgm
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4096
+ store i32 0, i32 addrspace(1)* %gep
+ ret void
+}
+
+define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) {
+; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4095:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: v_mov_b32_e32 v2, 0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b64 s[0:1], 0
+; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
+; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4095:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b64 s[0:1], 0
+; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
+; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
+; GFX7-NEXT: s_endpgm
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
+ store i32 0, i32 addrspace(1)* %gep
+ ret void
+}
+
+define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr) {
+; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967296:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s0, 0
+; GFX6-NEXT: s_mov_b32 s1, 4
+; GFX6-NEXT: v_mov_b32_e32 v2, 0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b32 s2, s0
+; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4294967296:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s0, 0
+; GFX7-NEXT: s_mov_b32 s1, 4
+; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b32 s2, s0
+; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GFX7-NEXT: s_endpgm
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
+ store i32 0, i32 addrspace(1)* %gep
+ ret void
+}
+
+define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(i32 addrspace(1)* %ptr) {
+; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s0, 4
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: s_mov_b32 s1, s0
+; GFX6-NEXT: v_mov_b32_e32 v2, 0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s0, 4
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: s_mov_b32 s1, s0
+; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GFX7-NEXT: s_endpgm
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967297
+ store i32 0, i32 addrspace(1)* %gep
+ ret void
+}
+
+define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4096(i32 addrspace(1)* %ptr) {
+; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4096:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: v_mov_b32_e32 v2, 0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b64 s[0:1], 0
+; GFX6-NEXT: s_movk_i32 s4, 0x4000
+; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4096:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b64 s[0:1], 0
+; GFX7-NEXT: s_movk_i32 s4, 0x4000
+; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
+; GFX7-NEXT: s_endpgm
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4096
+ store i32 0, i32 addrspace(1)* %gep
+ ret void
+}
+
+define amdgpu_ps void @mubuf_store_sgpr_ptr_sgpr_offset(i32 addrspace(1)* inreg %ptr, i32 inreg %soffset) {
+; GFX6-LABEL: mubuf_store_sgpr_ptr_sgpr_offset:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x200000
+; GFX6-NEXT: v_mul_hi_u32 v0, 4, s4
+; GFX6-NEXT: s_mov_b32 s1, s3
+; GFX6-NEXT: s_mul_i32 s3, s4, 4
+; GFX6-NEXT: s_mul_i32 s6, s4, 0
+; GFX6-NEXT: s_mul_i32 s4, s5, 4
+; GFX6-NEXT: s_add_u32 s4, s6, s4
+; GFX6-NEXT: s_mov_b32 s0, s2
+; GFX6-NEXT: v_add_i32_e32 v1, vcc, s4, v0
+; GFX6-NEXT: v_mov_b32_e32 v0, s3
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: v_mov_b32_e32 v2, 0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: mubuf_store_sgpr_ptr_sgpr_offset:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x200000
+; GFX7-NEXT: v_mul_hi_u32 v0, 4, s4
+; GFX7-NEXT: s_mov_b32 s1, s3
+; GFX7-NEXT: s_mul_i32 s3, s4, 4
+; GFX7-NEXT: s_mul_i32 s6, s4, 0
+; GFX7-NEXT: s_mul_i32 s4, s5, 4
+; GFX7-NEXT: s_add_u32 s4, s6, s4
+; GFX7-NEXT: s_mov_b32 s0, s2
+; GFX7-NEXT: v_add_i32_e32 v1, vcc, s4, v0
+; GFX7-NEXT: v_mov_b32_e32 v0, s3
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GFX7-NEXT: s_endpgm
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset
+ store i32 0, i32 addrspace(1)* %gep
+ ret void
+}
+
+define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) {
+; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
+; GFX6-NEXT: v_mul_hi_u32 v2, 4, s0
+; GFX6-NEXT: s_mul_i32 s2, s0, 4
+; GFX6-NEXT: s_mul_i32 s3, s0, 0
+; GFX6-NEXT: s_mul_i32 s0, s1, 4
+; GFX6-NEXT: s_add_u32 s0, s3, s0
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, s0, v2
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v0
+; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
+; GFX6-NEXT: s_mov_b32 s6, 0
+; GFX6-NEXT: v_mov_b32_e32 v2, 0
+; GFX6-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
+; GFX7-NEXT: v_mul_hi_u32 v2, 4, s0
+; GFX7-NEXT: s_mul_i32 s2, s0, 4
+; GFX7-NEXT: s_mul_i32 s3, s0, 0
+; GFX7-NEXT: s_mul_i32 s0, s1, 4
+; GFX7-NEXT: s_add_u32 s0, s3, s0
+; GFX7-NEXT: v_add_i32_e32 v2, vcc, s0, v2
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, s2, v0
+; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
+; GFX7-NEXT: s_mov_b32 s6, 0
+; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
+; GFX7-NEXT: s_endpgm
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset
+ store i32 0, i32 addrspace(1)* %gep
+ ret void
+}
+
+define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(i32 addrspace(1)* %ptr, i32 inreg %soffset) {
+; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset_offset256:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
+; GFX6-NEXT: v_mul_hi_u32 v2, 4, s0
+; GFX6-NEXT: s_mul_i32 s2, s0, 4
+; GFX6-NEXT: s_mul_i32 s3, s0, 0
+; GFX6-NEXT: s_mul_i32 s0, s1, 4
+; GFX6-NEXT: s_add_u32 s0, s3, s0
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, s0, v2
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v0
+; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
+; GFX6-NEXT: s_mov_b32 s6, 0
+; GFX6-NEXT: v_mov_b32_e32 v2, 0
+; GFX6-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-NEXT: s_mov_b64 s[4:5], 0
+; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 offset:1024
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset_offset256:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
+; GFX7-NEXT: v_mul_hi_u32 v2, 4, s0
+; GFX7-NEXT: s_mul_i32 s2, s0, 4
+; GFX7-NEXT: s_mul_i32 s3, s0, 0
+; GFX7-NEXT: s_mul_i32 s0, s1, 4
+; GFX7-NEXT: s_add_u32 s0, s3, s0
+; GFX7-NEXT: v_add_i32_e32 v2, vcc, s0, v2
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, s2, v0
+; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
+; GFX7-NEXT: s_mov_b32 s6, 0
+; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 offset:1024
+; GFX7-NEXT: s_endpgm
+ %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset
+ %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 256
+ store i32 0, i32 addrspace(1)* %gep1
+ ret void
+}
+
+define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) {
+; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s1, 0
+; GFX6-NEXT: s_movk_i32 s0, 0x400
+; GFX6-NEXT: v_mov_b32_e32 v3, s1
+; GFX6-NEXT: v_mov_b32_e32 v2, s0
+; GFX6-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x200000
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GFX6-NEXT: v_mul_hi_u32 v2, 4, s2
+; GFX6-NEXT: s_mul_i32 s0, s2, 4
+; GFX6-NEXT: s_mul_i32 s4, s2, 0
+; GFX6-NEXT: s_mul_i32 s2, s3, 4
+; GFX6-NEXT: s_add_u32 s2, s4, s2
+; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, s2, v2
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0
+; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
+; GFX6-NEXT: s_mov_b32 s2, s1
+; GFX6-NEXT: v_mov_b32_e32 v2, 0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b64 s[0:1], 0
+; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s1, 0
+; GFX7-NEXT: s_movk_i32 s0, 0x400
+; GFX7-NEXT: v_mov_b32_e32 v3, s1
+; GFX7-NEXT: v_mov_b32_e32 v2, s0
+; GFX7-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x200000
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_mul_hi_u32 v2, 4, s2
+; GFX7-NEXT: s_mul_i32 s0, s2, 4
+; GFX7-NEXT: s_mul_i32 s4, s2, 0
+; GFX7-NEXT: s_mul_i32 s2, s3, 4
+; GFX7-NEXT: s_add_u32 s2, s4, s2
+; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT: v_add_i32_e32 v2, vcc, s2, v2
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, s0, v0
+; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
+; GFX7-NEXT: s_mov_b32 s2, s1
+; GFX7-NEXT: v_mov_b32_e32 v2, 0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b64 s[0:1], 0
+; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
+; GFX7-NEXT: s_endpgm
+ %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 256
+ %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %soffset
+ store i32 0, i32 addrspace(1)* %gep1
+ ret void
+}
+
+define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) {
+; GFX6-LABEL: mubuf_store_sgpr_ptr_vgpr_offset:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX6-NEXT: v_mul_lo_u32 v3, 4, v1
+; GFX6-NEXT: v_mul_lo_u32 v2, 0, v0
+; GFX6-NEXT: v_mul_lo_u32 v1, 4, v0
+; GFX6-NEXT: v_mul_hi_u32 v0, 4, v0
+; GFX6-NEXT: s_mov_b32 s0, s2
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GFX6-NEXT: s_mov_b32 s1, s3
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v0
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: v_mov_b32_e32 v0, 0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: mubuf_store_sgpr_ptr_vgpr_offset:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX7-NEXT: v_mul_lo_u32 v3, 4, v1
+; GFX7-NEXT: v_mul_lo_u32 v2, 0, v0
+; GFX7-NEXT: v_mul_lo_u32 v1, 4, v0
+; GFX7-NEXT: v_mul_hi_u32 v0, 4, v0
+; GFX7-NEXT: s_mov_b32 s0, s2
+; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GFX7-NEXT: s_mov_b32 s1, s3
+; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v0
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: v_mov_b32_e32 v0, 0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
+; GFX7-NEXT: s_endpgm
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset
+ store i32 0, i32 addrspace(1)* %gep
+ ret void
+}
+
+define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset_offset4095(i32 addrspace(1)* inreg %ptr, i32 %voffset) {
+; GFX6-LABEL: mubuf_store_sgpr_ptr_vgpr_offset_offset4095:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX6-NEXT: v_mul_lo_u32 v3, 4, v1
+; GFX6-NEXT: v_mul_lo_u32 v2, 0, v0
+; GFX6-NEXT: v_mul_lo_u32 v1, 4, v0
+; GFX6-NEXT: v_mul_hi_u32 v0, 4, v0
+; GFX6-NEXT: s_mov_b32 s0, s2
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GFX6-NEXT: s_mov_b32 s1, s3
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v0
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: v_mov_b32_e32 v0, 0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
+; GFX6-NEXT: buffer_store_dword v0, v[1:2], s[0:3], s4 addr64
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: mubuf_store_sgpr_ptr_vgpr_offset_offset4095:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX7-NEXT: v_mul_lo_u32 v3, 4, v1
+; GFX7-NEXT: v_mul_lo_u32 v2, 0, v0
+; GFX7-NEXT: v_mul_lo_u32 v1, 4, v0
+; GFX7-NEXT: v_mul_hi_u32 v0, 4, v0
+; GFX7-NEXT: s_mov_b32 s0, s2
+; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GFX7-NEXT: s_mov_b32 s1, s3
+; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v0
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: v_mov_b32_e32 v0, 0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
+; GFX7-NEXT: buffer_store_dword v0, v[1:2], s[0:3], s4 addr64
+; GFX7-NEXT: s_endpgm
+ %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset
+ %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 4095
+ store i32 0, i32 addrspace(1)* %gep1
+ ret void
+}
+define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) {
+; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX6-NEXT: v_mul_lo_u32 v3, 4, v1
+; GFX6-NEXT: v_mul_lo_u32 v2, 0, v0
+; GFX6-NEXT: v_mul_lo_u32 v1, 4, v0
+; GFX6-NEXT: v_mul_hi_u32 v0, 4, v0
+; GFX6-NEXT: s_add_u32 s4, s2, 0x3ffc
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GFX6-NEXT: s_mov_b32 s6, 0
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v0
+; GFX6-NEXT: s_addc_u32 s5, s3, 0
+; GFX6-NEXT: v_mov_b32_e32 v0, 0
+; GFX6-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
+; GFX6-NEXT: s_endpgm
+;
+; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX7-NEXT: v_mul_lo_u32 v3, 4, v1
+; GFX7-NEXT: v_mul_lo_u32 v2, 0, v0
+; GFX7-NEXT: v_mul_lo_u32 v1, 4, v0
+; GFX7-NEXT: v_mul_hi_u32 v0, 4, v0
+; GFX7-NEXT: s_add_u32 s4, s2, 0x3ffc
+; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GFX7-NEXT: s_mov_b32 s6, 0
+; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v0
+; GFX7-NEXT: s_addc_u32 s5, s3, 0
+; GFX7-NEXT: v_mov_b32_e32 v0, 0
+; GFX7-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
+; GFX7-NEXT: s_endpgm
+ %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 4095
+ %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %voffset
+ store i32 0, i32 addrspace(1)* %gep1
+ ret void
+}
+
+define amdgpu_ps float @mubuf_load_sgpr_ptr(float addrspace(1)* inreg %ptr) {
+; GFX6-LABEL: mubuf_load_sgpr_ptr:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s0, s2
+; GFX6-NEXT: s_mov_b32 s1, s3
+; GFX6-NEXT: s_mov_b32 s2, -1
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], 0
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: mubuf_load_sgpr_ptr:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s0, s2
+; GFX7-NEXT: s_mov_b32 s1, s3
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+ %val = load float, float addrspace(1)* %ptr
+ ret float %val
+}
+
+define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095(float addrspace(1)* inreg %ptr) {
+; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s0, s2
+; GFX6-NEXT: s_mov_b32 s1, s3
+; GFX6-NEXT: s_mov_b32 s2, -1
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
+; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], s4
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4095:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s0, s2
+; GFX7-NEXT: s_mov_b32 s1, s3
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
+; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s4
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+ %gep = getelementptr float, float addrspace(1)* %ptr, i64 4095
+ %val = load float, float addrspace(1)* %gep
+ ret float %val
+}
+
+define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(float addrspace(1)* inreg %ptr) {
+; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967296:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s4, 0
+; GFX6-NEXT: s_mov_b32 s5, 4
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: s_mov_b32 s0, s2
+; GFX6-NEXT: s_mov_b32 s1, s3
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b32 s2, s4
+; GFX6-NEXT: v_mov_b32_e32 v1, s5
+; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4294967296:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s4, 0
+; GFX7-NEXT: s_mov_b32 s5, 4
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: s_mov_b32 s0, s2
+; GFX7-NEXT: s_mov_b32 s1, s3
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b32 s2, s4
+; GFX7-NEXT: v_mov_b32_e32 v1, s5
+; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+ %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967296
+ %val = load float, float addrspace(1)* %gep
+ ret float %val
+}
+
+define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967297(float addrspace(1)* inreg %ptr) {
+; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967297:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s4, 4
+; GFX6-NEXT: s_mov_b32 s5, s4
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: s_mov_b32 s0, s2
+; GFX6-NEXT: s_mov_b32 s1, s3
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: v_mov_b32_e32 v1, s5
+; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4294967297:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s4, 4
+; GFX7-NEXT: s_mov_b32 s5, s4
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: s_mov_b32 s0, s2
+; GFX7-NEXT: s_mov_b32 s1, s3
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: v_mov_b32_e32 v1, s5
+; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+ %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967297
+ %val = load float, float addrspace(1)* %gep
+ ret float %val
+}
+
+define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4096(float addrspace(1)* inreg %ptr) {
+; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4096:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s0, s2
+; GFX6-NEXT: s_mov_b32 s1, s3
+; GFX6-NEXT: s_mov_b32 s2, -1
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_movk_i32 s4, 0x4000
+; GFX6-NEXT: buffer_load_dword v0, off, s[0:3], s4
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4096:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s0, s2
+; GFX7-NEXT: s_mov_b32 s1, s3
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_movk_i32 s4, 0x4000
+; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s4
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+ %gep = getelementptr float, float addrspace(1)* %ptr, i64 4096
+ %val = load float, float addrspace(1)* %gep
+ ret float %val
+}
+
+define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4095(float addrspace(1)* %ptr) {
+; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4095:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b64 s[0:1], 0
+; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
+; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4095:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b64 s[0:1], 0
+; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
+; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+ %gep = getelementptr float, float addrspace(1)* %ptr, i64 4095
+ %val = load float, float addrspace(1)* %gep
+ ret float %val
+}
+
+define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(float addrspace(1)* %ptr) {
+; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967296:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s0, 0
+; GFX6-NEXT: s_mov_b32 s1, 4
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b32 s2, s0
+; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4294967296:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s0, 0
+; GFX7-NEXT: s_mov_b32 s1, 4
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b32 s2, s0
+; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+ %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967296
+ %val = load float, float addrspace(1)* %gep
+ ret float %val
+}
+
+define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(float addrspace(1)* %ptr) {
+; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967297:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s0, 4
+; GFX6-NEXT: s_mov_b32 s1, s0
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4294967297:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s0, 4
+; GFX7-NEXT: s_mov_b32 s1, s0
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+ %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967297
+ %val = load float, float addrspace(1)* %gep
+ ret float %val
+}
+
+define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4096(float addrspace(1)* %ptr) {
+; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4096:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b64 s[0:1], 0
+; GFX6-NEXT: s_movk_i32 s4, 0x4000
+; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: mubuf_load_vgpr_ptr_offset4096:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b64 s[0:1], 0
+; GFX7-NEXT: s_movk_i32 s4, 0x4000
+; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+ %gep = getelementptr float, float addrspace(1)* %ptr, i64 4096
+ %val = load float, float addrspace(1)* %gep
+ ret float %val
+}
+
+define amdgpu_ps float @mubuf_load_sgpr_ptr_sgpr_offset(float addrspace(1)* inreg %ptr, i32 inreg %soffset) {
+; GFX6-LABEL: mubuf_load_sgpr_ptr_sgpr_offset:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_mov_b32 s0, s2
+; GFX6-NEXT: s_mov_b32 s1, s3
+; GFX6-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x200000
+; GFX6-NEXT: v_mul_hi_u32 v0, 4, s2
+; GFX6-NEXT: s_mul_i32 s4, s2, 0
+; GFX6-NEXT: s_mul_i32 s3, s3, 4
+; GFX6-NEXT: s_add_u32 s3, s4, s3
+; GFX6-NEXT: s_mul_i32 s2, s2, 4
+; GFX6-NEXT: v_add_i32_e32 v1, vcc, s3, v0
+; GFX6-NEXT: v_mov_b32_e32 v0, s2
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: mubuf_load_sgpr_ptr_sgpr_offset:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s0, s2
+; GFX7-NEXT: s_mov_b32 s1, s3
+; GFX7-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x200000
+; GFX7-NEXT: v_mul_hi_u32 v0, 4, s2
+; GFX7-NEXT: s_mul_i32 s4, s2, 0
+; GFX7-NEXT: s_mul_i32 s3, s3, 4
+; GFX7-NEXT: s_add_u32 s3, s4, s3
+; GFX7-NEXT: s_mul_i32 s2, s2, 4
+; GFX7-NEXT: v_add_i32_e32 v1, vcc, s3, v0
+; GFX7-NEXT: v_mov_b32_e32 v0, s2
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+ %gep = getelementptr float, float addrspace(1)* %ptr, i32 %soffset
+ %val = load float, float addrspace(1)* %gep
+ ret float %val
+}
+
+define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset(float addrspace(1)* %ptr, i32 inreg %soffset) {
+; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
+; GFX6-NEXT: v_mul_hi_u32 v2, 4, s0
+; GFX6-NEXT: s_mul_i32 s2, s0, 0
+; GFX6-NEXT: s_mul_i32 s1, s1, 4
+; GFX6-NEXT: s_add_u32 s1, s2, s1
+; GFX6-NEXT: s_mul_i32 s0, s0, 4
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, s1, v2
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0
+; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b64 s[0:1], 0
+; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
+; GFX7-NEXT: v_mul_hi_u32 v2, 4, s0
+; GFX7-NEXT: s_mul_i32 s2, s0, 0
+; GFX7-NEXT: s_mul_i32 s1, s1, 4
+; GFX7-NEXT: s_add_u32 s1, s2, s1
+; GFX7-NEXT: s_mul_i32 s0, s0, 4
+; GFX7-NEXT: v_add_i32_e32 v2, vcc, s1, v2
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, s0, v0
+; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b64 s[0:1], 0
+; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+ %gep = getelementptr float, float addrspace(1)* %ptr, i32 %soffset
+ %val = load float, float addrspace(1)* %gep
+ ret float %val
+}
+
+define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset_offset256(float addrspace(1)* %ptr, i32 inreg %soffset) {
+; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset_offset256:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
+; GFX6-NEXT: v_mul_hi_u32 v2, 4, s0
+; GFX6-NEXT: s_mul_i32 s2, s0, 0
+; GFX6-NEXT: s_mul_i32 s1, s1, 4
+; GFX6-NEXT: s_add_u32 s1, s2, s1
+; GFX6-NEXT: s_mul_i32 s0, s0, 4
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, s1, v2
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0
+; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b64 s[0:1], 0
+; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset_offset256:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
+; GFX7-NEXT: v_mul_hi_u32 v2, 4, s0
+; GFX7-NEXT: s_mul_i32 s2, s0, 0
+; GFX7-NEXT: s_mul_i32 s1, s1, 4
+; GFX7-NEXT: s_add_u32 s1, s2, s1
+; GFX7-NEXT: s_mul_i32 s0, s0, 4
+; GFX7-NEXT: v_add_i32_e32 v2, vcc, s1, v2
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, s0, v0
+; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b64 s[0:1], 0
+; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+ %gep0 = getelementptr float, float addrspace(1)* %ptr, i32 %soffset
+ %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 256
+ %val = load float, float addrspace(1)* %gep1
+ ret float %val
+}
+
+define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(float addrspace(1)* %ptr, i32 inreg %soffset) {
+; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
+; GFX6-NEXT: v_mul_hi_u32 v4, 4, s0
+; GFX6-NEXT: s_movk_i32 s4, 0x400
+; GFX6-NEXT: s_mov_b32 s5, 0
+; GFX6-NEXT: v_mov_b32_e32 v2, s4
+; GFX6-NEXT: s_mul_i32 s2, s0, 0
+; GFX6-NEXT: s_mul_i32 s1, s1, 4
+; GFX6-NEXT: s_add_u32 s1, s2, s1
+; GFX6-NEXT: v_mov_b32_e32 v3, s5
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX6-NEXT: s_mul_i32 s0, s0, 4
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, s1, v4
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0
+; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b32 s2, s5
+; GFX6-NEXT: s_mov_b64 s[0:1], 0
+; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000
+; GFX7-NEXT: v_mul_hi_u32 v4, 4, s0
+; GFX7-NEXT: s_movk_i32 s4, 0x400
+; GFX7-NEXT: s_mov_b32 s5, 0
+; GFX7-NEXT: v_mov_b32_e32 v2, s4
+; GFX7-NEXT: s_mul_i32 s2, s0, 0
+; GFX7-NEXT: s_mul_i32 s1, s1, 4
+; GFX7-NEXT: s_add_u32 s1, s2, s1
+; GFX7-NEXT: v_mov_b32_e32 v3, s5
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT: s_mul_i32 s0, s0, 4
+; GFX7-NEXT: v_add_i32_e32 v2, vcc, s1, v4
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, s0, v0
+; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_mov_b32 s2, s5
+; GFX7-NEXT: s_mov_b64 s[0:1], 0
+; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+ %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 256
+ %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 %soffset
+ %val = load float, float addrspace(1)* %gep1
+ ret float %val
+}
+
+define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset(float addrspace(1)* inreg %ptr, i32 %voffset) {
+; GFX6-LABEL: mubuf_load_sgpr_ptr_vgpr_offset:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX6-NEXT: v_mul_lo_u32 v3, 4, v1
+; GFX6-NEXT: v_mul_lo_u32 v2, 0, v0
+; GFX6-NEXT: v_mul_lo_u32 v1, 4, v0
+; GFX6-NEXT: v_mul_hi_u32 v0, 4, v0
+; GFX6-NEXT: s_mov_b32 s0, s2
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GFX6-NEXT: s_mov_b32 s1, s3
+; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v0
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: buffer_load_dword v0, v[1:2], s[0:3], 0 addr64
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: mubuf_load_sgpr_ptr_vgpr_offset:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX7-NEXT: v_mul_lo_u32 v3, 4, v1
+; GFX7-NEXT: v_mul_lo_u32 v2, 0, v0
+; GFX7-NEXT: v_mul_lo_u32 v1, 4, v0
+; GFX7-NEXT: v_mul_hi_u32 v0, 4, v0
+; GFX7-NEXT: s_mov_b32 s0, s2
+; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v3
+; GFX7-NEXT: s_mov_b32 s1, s3
+; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v0
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: buffer_load_dword v0, v[1:2], s[0:3], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+ %gep = getelementptr float, float addrspace(1)* %ptr, i32 %voffset
+ %val = load float, float addrspace(1)* %gep
+ ret float %val
+}
+
+define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset_offset4095(float addrspace(1)* inreg %ptr, i32 %voffset) {
+; GFX6-LABEL: mubuf_load_sgpr_ptr_vgpr_offset_offset4095:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX6-NEXT: v_mul_lo_u32 v2, 0, v0
+; GFX6-NEXT: v_mul_lo_u32 v1, 4, v1
+; GFX6-NEXT: v_mul_hi_u32 v3, 4, v0
+; GFX6-NEXT: v_mul_lo_u32 v0, 4, v0
+; GFX6-NEXT: s_mov_b32 s0, s2
+; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
+; GFX6-NEXT: s_mov_b32 s1, s3
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_movk_i32 s4, 0x3ffc
+; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: mubuf_load_sgpr_ptr_vgpr_offset_offset4095:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX7-NEXT: v_mul_lo_u32 v2, 0, v0
+; GFX7-NEXT: v_mul_lo_u32 v1, 4, v1
+; GFX7-NEXT: v_mul_hi_u32 v3, 4, v0
+; GFX7-NEXT: v_mul_lo_u32 v0, 4, v0
+; GFX7-NEXT: s_mov_b32 s0, s2
+; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v1
+; GFX7-NEXT: s_mov_b32 s1, s3
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: s_movk_i32 s4, 0x3ffc
+; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], s4 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+ %gep0 = getelementptr float, float addrspace(1)* %ptr, i32 %voffset
+ %gep1 = getelementptr float, float addrspace(1)* %gep0, i64 4095
+ %val = load float, float addrspace(1)* %gep1
+ ret float %val
+}
+define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095_vgpr_offset(float addrspace(1)* inreg %ptr, i32 %voffset) {
+; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX6-NEXT: v_mul_lo_u32 v2, 0, v0
+; GFX6-NEXT: v_mul_lo_u32 v1, 4, v1
+; GFX6-NEXT: v_mul_hi_u32 v3, 4, v0
+; GFX6-NEXT: v_mul_lo_u32 v0, 4, v0
+; GFX6-NEXT: s_add_u32 s0, s2, 0x3ffc
+; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
+; GFX6-NEXT: s_addc_u32 s1, s3, 0
+; GFX6-NEXT: s_mov_b32 s2, 0
+; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
+; GFX6-NEXT: s_waitcnt vmcnt(0)
+; GFX6-NEXT: ; return to shader part epilog
+;
+; GFX7-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX7-NEXT: v_mul_lo_u32 v2, 0, v0
+; GFX7-NEXT: v_mul_lo_u32 v1, 4, v1
+; GFX7-NEXT: v_mul_hi_u32 v3, 4, v0
+; GFX7-NEXT: v_mul_lo_u32 v0, 4, v0
+; GFX7-NEXT: s_add_u32 s0, s2, 0x3ffc
+; GFX7-NEXT: v_add_i32_e32 v1, vcc, v2, v1
+; GFX7-NEXT: s_addc_u32 s1, s3, 0
+; GFX7-NEXT: s_mov_b32 s2, 0
+; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: ; return to shader part epilog
+ %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 4095
+ %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 %voffset
+ %val = load float, float addrspace(1)* %gep1
+ ret float %val
+}
+
+; define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) {
+; %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
+; %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
+; %cast = bitcast i32 %result to float
+; ret float %cast
+; }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
index d4a3f4025b37..47cbb9372d3a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
; Natural mapping
define amdgpu_ps void @load_1d_vgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 %s) {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
index e5d67a3f8874..b673d22c3820 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
@@ -24,8 +24,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
- ; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
- ; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[INT]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
@@ -55,8 +54,7 @@ define amdgpu_ps void @sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom "TargetCustom8")
- ; CHECK: [[COPY14:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
- ; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY14]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[INT]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
@@ -121,8 +119,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
- ; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
- ; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[INT]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
@@ -177,8 +174,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
- ; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
- ; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[INT]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
@@ -255,8 +251,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
- ; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
- ; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
+ ; CHECK: G_STORE [[INT]](<4 x s32>), [[DEF]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir
index 70857f8a91c7..906bf9456b16 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sextload.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -march=amdgcn -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
---
name: sextload_constant_i8_to_i32_uniform
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir
index 3ace0d3610db..f9cf086820f4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zextload.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
-# RUN: llc -march=amdgcn -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s
---
name: zextload_constant_i8_to_i32_uniform
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
index 120bbfaba24d..eb0d73c9143b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=regbankselect %s -verify-machineinstrs -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -run-pass=regbankselect %s -verify-machineinstrs -o - | FileCheck %s
--- |
define amdgpu_kernel void @load_constant(i32 addrspace(4)* %ptr0) {
More information about the llvm-commits
mailing list