[llvm] r367498 - AMDGPU/GlobalISel: Select local loads
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 31 17:53:38 PDT 2019
Author: arsenm
Date: Wed Jul 31 17:53:38 2019
New Revision: 367498
URL: http://llvm.org/viewvc/llvm-project?rev=367498&view=rev
Log:
AMDGPU/GlobalISel: Select local loads
Added:
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td?rev=367498&r1=367497&r2=367498&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUGISel.td Wed Jul 31 17:53:38 2019
@@ -64,6 +64,18 @@ def gi_mubuf_scratch_offen :
GIComplexOperandMatcher<s32, "selectMUBUFScratchOffen">,
GIComplexPatternEquiv<MUBUFScratchOffen>;
+def gi_ds_1addr_1offset :
+ GIComplexOperandMatcher<s32, "selectDS1Addr1Offset">,
+ GIComplexPatternEquiv<DS1Addr1Offset>;
+
+
+// Separate load nodes are defined to glue m0 initialization in
+// SelectionDAG. The GISel selector can just insert m0 initialization
+// directly before before selecting a glue-less load, so hide this
+// distinction.
+def : GINodeEquiv<G_LOAD, AMDGPUld_glue>;
+
+
class GISelSop2Pat <
SDPatternOperator node,
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp?rev=367498&r1=367497&r2=367498&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp Wed Jul 31 17:53:38 2019
@@ -1243,10 +1243,22 @@ bool AMDGPUInstructionSelector::hasVgprP
return false;
}
-bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
- // TODO: Can/should we insert m0 initialization here for DS instructions and
- // call the normal selector?
- return false;
+bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I,
+ CodeGenCoverage &CoverageInfo) const {
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
+ unsigned AS = PtrTy.getAddressSpace();
+ if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) &&
+ STI.ldsRequiresM0Init()) {
+ // If DS instructions require M0 initializtion, insert it before selecting.
+ BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addImm(-1);
+ }
+
+ return selectImpl(I, CoverageInfo);
}
bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
@@ -1364,7 +1376,7 @@ bool AMDGPUInstructionSelector::select(M
return true;
return selectImpl(I, CoverageInfo);
case TargetOpcode::G_LOAD:
- return selectImpl(I, CoverageInfo);
+ return selectG_LOAD(I, CoverageInfo);
case TargetOpcode::G_SELECT:
return selectG_SELECT(I);
case TargetOpcode::G_STORE:
@@ -1698,6 +1710,22 @@ AMDGPUInstructionSelector::selectMUBUFSc
}}};
}
+bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI,
+ const MachineOperand &Base,
+ int64_t Offset,
+ unsigned OffsetBits) const {
+ if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
+ (OffsetBits == 8 && !isUInt<8>(Offset)))
+ return false;
+
+ if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
+ return true;
+
+ // On Southern Islands instruction with a negative base value and an offset
+ // don't seem to work.
+ return signBitIsZero(Base, MRI);
+}
+
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectMUBUFScratchOffset(
MachineOperand &Root) const {
@@ -1726,3 +1754,49 @@ AMDGPUInstructionSelector::selectMUBUFSc
[=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset
}};
}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
+ MachineInstr *MI = Root.getParent();
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
+ if (!RootDef) {
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
+ }};
+ }
+
+ int64_t ConstAddr = 0;
+ if (isBaseWithConstantOffset(Root, MRI)) {
+ const MachineOperand &LHS = RootDef->getOperand(1);
+ const MachineOperand &RHS = RootDef->getOperand(2);
+ const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
+ const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
+ if (LHSDef && RHSDef) {
+ int64_t PossibleOffset =
+ RHSDef->getOperand(1).getCImm()->getSExtValue();
+ if (isDSOffsetLegal(MRI, LHS, PossibleOffset, 16)) {
+ // (add n0, c0)
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(PossibleOffset); }
+ }};
+ }
+ }
+ } else if (RootDef->getOpcode() == AMDGPU::G_SUB) {
+
+
+
+ } else if (mi_match(Root.getReg(), MRI, m_ICst(ConstAddr))) {
+
+
+ }
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
+ }};
+}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h?rev=367498&r1=367497&r2=367498&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h Wed Jul 31 17:53:38 2019
@@ -90,7 +90,7 @@ private:
void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
SmallVectorImpl<GEPInfo> &AddrInfo) const;
bool selectSMRD(MachineInstr &I, ArrayRef<GEPInfo> AddrInfo) const;
- bool selectG_LOAD(MachineInstr &I) const;
+ bool selectG_LOAD(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
bool selectG_SELECT(MachineInstr &I) const;
bool selectG_STORE(MachineInstr &I) const;
bool selectG_BRCOND(MachineInstr &I) const;
@@ -133,6 +133,13 @@ private:
InstructionSelector::ComplexRendererFns
selectMUBUFScratchOffset(MachineOperand &Root) const;
+ bool isDSOffsetLegal(const MachineRegisterInfo &MRI,
+ const MachineOperand &Base,
+ int64_t Offset, unsigned OffsetBits) const;
+
+ InstructionSelector::ComplexRendererFns
+ selectDS1Addr1Offset(MachineOperand &Root) const;
+
const SIInstrInfo &TII;
const SIRegisterInfo &TRI;
const AMDGPURegisterBankInfo &RBI;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td?rev=367498&r1=367497&r2=367498&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td Wed Jul 31 17:53:38 2019
@@ -480,11 +480,13 @@ def atomic_store_local : LocalStore <ato
def load_align8_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> {
let IsLoad = 1;
+ let IsNonExtLoad = 1;
let MinAlignment = 8;
}
def load_align16_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> {
let IsLoad = 1;
+ let IsNonExtLoad = 1;
let MinAlignment = 16;
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=367498&r1=367497&r2=367498&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Wed Jul 31 17:53:38 2019
@@ -328,13 +328,13 @@ def AMDGPUatomic_ld_glue : SDNode <"ISD:
>;
def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> {
- let IsUnindexed = 1;
let IsLoad = 1;
+ let IsUnindexed = 1;
}
def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
- let IsNonExtLoad = 1;
let IsLoad = 1;
+ let IsNonExtLoad = 1;
}
def atomic_load_32_glue : PatFrag<(ops node:$ptr),
@@ -396,7 +396,9 @@ def sextloadi16_glue : PatFrag<(ops node
let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
-def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)>;
+def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> {
+ let IsNonExtLoad = 1;
+}
let MemoryVT = i8 in {
def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>;
@@ -412,9 +414,11 @@ def zextloadi16_local_m0 : PatFrag<(ops
def load_align8_local_m0 : LoadFrag <load_glue>, LocalAddress {
let MinAlignment = 8;
+ let IsNonExtLoad = 1;
}
def load_align16_local_m0 : LoadFrag <load_glue>, LocalAddress {
let MinAlignment = 16;
+ let IsNonExtLoad = 1;
}
} // End IsLoad = 1
Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir?rev=367498&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir Wed Jul 31 17:53:38 2019
@@ -0,0 +1,906 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+
+
+---
+
+name: load_local_s32_from_4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s32_from_4
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]]
+ ; GFX7-LABEL: name: load_local_s32_from_4
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]]
+ ; GFX9-LABEL: name: load_local_s32_from_4
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[DS_READ_B32_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 3)
+ $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_local_s32_from_2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s32_from_2
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 2, addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[DS_READ_U16_]]
+ ; GFX7-LABEL: name: load_local_s32_from_2
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 2, addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[DS_READ_U16_]]
+ ; GFX9-LABEL: name: load_local_s32_from_2
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 2, addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[DS_READ_U16_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 3)
+ $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_local_s32_from_1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ scratchWaveOffsetReg: $sgpr4
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s32_from_1
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX7-LABEL: name: load_local_s32_from_1
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX9-LABEL: name: load_local_s32_from_1
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 3)
+ $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_local_v2s32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ ; GFX6-LABEL: name: load_local_v2s32
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
+ ; GFX7-LABEL: name: load_local_v2s32
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]]
+ ; GFX9-LABEL: name: load_local_v2s32
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 3)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_local_v2s32_align4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+ ; GFX6-LABEL: name: load_local_v2s32_align4
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+ ; GFX7-LABEL: name: load_local_v2s32_align4
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+ ; GFX9-LABEL: name: load_local_v2s32_align4
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 3)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_local_v3s32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_v3s32
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: [[LOAD:%[0-9]+]]:vgpr(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+ ; GFX7-LABEL: name: load_local_v3s32
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+ ; GFX9-LABEL: name: load_local_v3s32
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 3)
+ $vgpr0_vgpr1_vgpr2 = COPY %1
+
+...
+
+---
+
+name: load_local_v4s32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_v4s32
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+ ; GFX7-LABEL: name: load_local_v4s32
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+ ; GFX9-LABEL: name: load_local_v4s32
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_local_s64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s64
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+ ; GFX7-LABEL: name: load_local_s64
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+ ; GFX9-LABEL: name: load_local_s64
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 3)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_local_s64_align4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s64_align4
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+ ; GFX7-LABEL: name: load_local_s64_align4
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+ ; GFX9-LABEL: name: load_local_s64_align4
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 4, addrspace 3)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_local_v2s64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_v2s64
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+ ; GFX7-LABEL: name: load_local_v2s64
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+ ; GFX9-LABEL: name: load_local_v2s64
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_local_v2p1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_v2p1
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
+ ; GFX7-LABEL: name: load_local_v2p1
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
+ ; GFX9-LABEL: name: load_local_v2p1
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_local_s96
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s96
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: [[LOAD:%[0-9]+]]:vgpr(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
+ ; GFX7-LABEL: name: load_local_s96
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
+ ; GFX9-LABEL: name: load_local_s96
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 3)
+ $vgpr0_vgpr1_vgpr2 = COPY %1
+
+...
+
+---
+
+name: load_local_s128
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s128
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+ ; GFX7-LABEL: name: load_local_s128
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+ ; GFX9-LABEL: name: load_local_s128
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+---
+
+name: load_local_p3_from_4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_p3_from_4
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[LOAD]](p3)
+ ; GFX7-LABEL: name: load_local_p3_from_4
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[LOAD]](p3)
+ ; GFX9-LABEL: name: load_local_p3_from_4
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[LOAD]](p3)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 3)
+ $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_local_p5_from_4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_p5_from_4
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[LOAD]](p3)
+ ; GFX7-LABEL: name: load_local_p5_from_4
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[LOAD]](p3)
+ ; GFX9-LABEL: name: load_local_p5_from_4
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[LOAD]](p3)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 3)
+ $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_local_p1_align8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_p1_align8
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+ ; GFX7-LABEL: name: load_local_p1_align8
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+ ; GFX9-LABEL: name: load_local_p1_align8
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 3)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_local_p1_align4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_p1_align4
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+ ; GFX7-LABEL: name: load_local_p1_align4
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+ ; GFX9-LABEL: name: load_local_p1_align4
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(p1) = G_LOAD %0 :: (load 8, align 4, addrspace 3)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_local_p999_from_8
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_p999_from_8
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+ ; GFX7-LABEL: name: load_local_p999_from_8
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+ ; GFX9-LABEL: name: load_local_p999_from_8
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p999)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(p999) = G_LOAD %0 :: (load 8, align 8, addrspace 3)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_local_v2p3
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_v2p3
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX7-LABEL: name: load_local_v2p3
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ ; GFX9-LABEL: name: load_local_v2p3
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 3)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+
+name: load_local_v2s16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_v2s16
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+ ; GFX7-LABEL: name: load_local_v2s16
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+ ; GFX9-LABEL: name: load_local_v2s16
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 3)
+ $vgpr0 = COPY %1
+
+...
+
+---
+
+name: load_local_v4s16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_v4s16
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+ ; GFX7-LABEL: name: load_local_v4s16
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+ ; GFX9-LABEL: name: load_local_v4s16
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 3)
+ $vgpr0_vgpr1 = COPY %1
+
+...
+
+# ---
+
+# name: load_local_v6s16
+# legalized: true
+# regBankSelected: true
+# tracksRegLiveness: true
+# machineFunctionInfo:
+# scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+# scratchWaveOffsetReg: $sgpr4
+# stackPtrOffsetReg: $sgpr32
+
+# body: |
+# bb.0:
+# liveins: $vgpr0
+
+# %0:vgpr(p3) = COPY $vgpr0
+# %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load 12, align 4, addrspace 3)
+# $vgpr0_vgpr1_vgpr2 = COPY %1
+
+# ...
+
+---
+
+name: load_local_v8s16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_v8s16
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+ ; GFX7-LABEL: name: load_local_v8s16
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+ ; GFX9-LABEL: name: load_local_v8s16
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+ ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+
+...
+
+################################################################################
+### Stress addressing modes
+################################################################################
+
+---
+
+name: load_local_s32_from_1_gep_65535
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s32_from_1_gep_65535
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec
+ ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX7-LABEL: name: load_local_s32_from_1_gep_65535
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX9-LABEL: name: load_local_s32_from_1_gep_65535
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_CONSTANT i32 65535
+ %2:vgpr(p3) = G_GEP %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 3)
+ $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_local_s32_from_1_gep_65536
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s32_from_1_gep_65536
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec
+ ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX7-LABEL: name: load_local_s32_from_1_gep_65536
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec
+ ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX9-LABEL: name: load_local_s32_from_1_gep_65536
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec
+ ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_CONSTANT i32 65536
+ %2:vgpr(p3) = G_GEP %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 3)
+ $vgpr0 = COPY %3
+
+...
+
+---
+
+name: load_local_s32_from_1_gep_m1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GFX6-LABEL: name: load_local_s32_from_1_gep_m1
+ ; GFX6: liveins: $vgpr0
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
+ ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX6: $m0 = S_MOV_B32 -1
+ ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX7-LABEL: name: load_local_s32_from_1_gep_m1
+ ; GFX7: liveins: $vgpr0
+ ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
+ ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX7: $m0 = S_MOV_B32 -1
+ ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
+ ; GFX9-LABEL: name: load_local_s32_from_1_gep_m1
+ ; GFX9: liveins: $vgpr0
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
+ ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+ ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load 1, addrspace 3)
+ ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = G_CONSTANT i32 -1
+ %2:vgpr(p3) = G_GEP %0, %1
+ %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 3)
+ $vgpr0 = COPY %3
+
+...
More information about the llvm-commits
mailing list