[llvm] r287935 - Revert "AMDGPU: Fix MMO when splitting spill"
Marek Olsak via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 25 08:03:27 PST 2016
Author: mareko
Date: Fri Nov 25 10:03:27 2016
New Revision: 287935
URL: http://llvm.org/viewvc/llvm-project?rev=287935&view=rev
Log:
Revert "AMDGPU: Fix MMO when splitting spill"
This reverts commit 79d4f8b8b1ce430c3d5dac4fc72a9eebaed24fe1.
Modified:
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h
llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
llvm/trunk/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=287935&r1=287934&r2=287935&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Fri Nov 25 10:03:27 2016
@@ -407,36 +407,28 @@ static bool buildMUBUFOffsetLoadStore(co
void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp,
- int Index,
- unsigned ValueReg,
- bool IsKill,
+ const MachineOperand *SrcDst,
unsigned ScratchRsrcReg,
- unsigned ScratchOffsetReg,
- int64_t InstOffset,
- MachineMemOperand *MMO,
+ unsigned ScratchOffset,
+ int64_t Offset,
RegScavenger *RS) const {
+ unsigned Value = SrcDst->getReg();
+ bool IsKill = SrcDst->isKill();
MachineBasicBlock *MBB = MI->getParent();
MachineFunction *MF = MI->getParent()->getParent();
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
- const MachineFrameInfo &MFI = MF->getFrameInfo();
- const MCInstrDesc &Desc = TII->get(LoadStoreOp);
- const DebugLoc &DL = MI->getDebugLoc();
- bool IsStore = Desc.mayStore();
+ DebugLoc DL = MI->getDebugLoc();
+ bool IsStore = MI->mayStore();
bool RanOutOfSGPRs = false;
bool Scavenged = false;
- unsigned SOffset = ScratchOffsetReg;
+ unsigned SOffset = ScratchOffset;
+ unsigned OriginalImmOffset = Offset;
- const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
- unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / 32;
+ unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
unsigned Size = NumSubRegs * 4;
- int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
- const int64_t OriginalImmOffset = Offset;
-
- unsigned Align = MFI.getObjectAlignment(Index);
- const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
if (!isUInt<12>(Offset + Size)) {
SOffset = AMDGPU::NoRegister;
@@ -455,23 +447,19 @@ void SIRegisterInfo::buildSpillLoadStore
// subtract the offset after the spill to return ScratchOffset to it's
// original value.
RanOutOfSGPRs = true;
- SOffset = ScratchOffsetReg;
+ SOffset = ScratchOffset;
} else {
Scavenged = true;
}
-
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
- .addReg(ScratchOffsetReg)
- .addImm(Offset);
-
+ .addReg(ScratchOffset)
+ .addImm(Offset);
Offset = 0;
}
- const unsigned EltSize = 4;
-
- for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
+ for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) {
unsigned SubReg = NumSubRegs == 1 ?
- ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
+ Value : getSubReg(Value, getSubRegFromChannel(i));
unsigned SOffsetRegState = 0;
unsigned SrcDstRegState = getDefRegState(!IsStore);
@@ -481,12 +469,7 @@ void SIRegisterInfo::buildSpillLoadStore
SrcDstRegState |= getKillRegState(IsKill);
}
- MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
- MachineMemOperand *NewMMO
- = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
- EltSize, MinAlign(Align, EltSize * i));
-
- BuildMI(*MBB, MI, DL, Desc)
+ BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
.addReg(SubReg, getDefRegState(!IsStore))
.addReg(ScratchRsrcReg)
.addReg(SOffset, SOffsetRegState)
@@ -494,15 +477,14 @@ void SIRegisterInfo::buildSpillLoadStore
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // tfe
- .addMemOperand(NewMMO)
- .addReg(ValueReg, RegState::Implicit | SrcDstRegState);
+ .addReg(Value, RegState::Implicit | SrcDstRegState)
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
}
-
if (RanOutOfSGPRs) {
// Subtract the offset we added to the ScratchOffset register.
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
- .addReg(ScratchOffsetReg)
- .addImm(OriginalImmOffset);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffset)
+ .addReg(ScratchOffset)
+ .addImm(OriginalImmOffset);
}
}
@@ -525,8 +507,6 @@ void SIRegisterInfo::spillSGPR(MachineBa
bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;
- const unsigned EltSize = 4;
-
// SubReg carries the "Kill" flag when SubReg == SuperReg.
unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
@@ -548,12 +528,13 @@ void SIRegisterInfo::spillSGPR(MachineBa
}
int64_t FrOffset = FrameInfo.getObjectOffset(Index);
+ unsigned Size = FrameInfo.getObjectSize(Index);
unsigned Align = FrameInfo.getObjectAlignment(Index);
MachinePointerInfo PtrInfo
- = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
+ = MachinePointerInfo::getFixedStack(*MF, Index);
MachineMemOperand *MMO
= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
- EltSize, MinAlign(Align, EltSize * i));
+ Size, Align);
unsigned OffsetReg = AMDGPU::M0;
// Add i * 4 wave offset.
@@ -626,12 +607,13 @@ void SIRegisterInfo::spillSGPR(MachineBa
Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
}
+ unsigned Size = FrameInfo.getObjectSize(Index);
unsigned Align = FrameInfo.getObjectAlignment(Index);
MachinePointerInfo PtrInfo
- = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
+ = MachinePointerInfo::getFixedStack(*MF, Index);
MachineMemOperand *MMO
= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
- EltSize, MinAlign(Align, EltSize * i));
+ Size, Align);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
.addReg(TmpReg, RegState::Kill) // src
.addFrameIndex(Index) // vaddr
@@ -672,19 +654,18 @@ void SIRegisterInfo::restoreSGPR(Machine
int64_t FrOffset = FrameInfo.getObjectOffset(Index);
- const unsigned EltSize = 4;
-
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
unsigned SubReg = NumSubRegs == 1 ?
SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i));
if (SpillToSMEM) {
+ unsigned Size = FrameInfo.getObjectSize(Index);
unsigned Align = FrameInfo.getObjectAlignment(Index);
MachinePointerInfo PtrInfo
- = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
+ = MachinePointerInfo::getFixedStack(*MF, Index);
MachineMemOperand *MMO
= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
- EltSize, MinAlign(Align, EltSize * i));
+ Size, Align);
unsigned OffsetReg = AMDGPU::M0;
@@ -722,15 +703,16 @@ void SIRegisterInfo::restoreSGPR(Machine
} else {
// Restore SGPR from a stack slot.
// FIXME: We should use S_LOAD_DWORD here for VI.
+
unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
unsigned Align = FrameInfo.getObjectAlignment(Index);
+ unsigned Size = FrameInfo.getObjectSize(Index);
MachinePointerInfo PtrInfo
- = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
+ = MachinePointerInfo::getFixedStack(*MF, Index);
- MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
- MachineMemOperand::MOLoad, EltSize,
- MinAlign(Align, EltSize * i));
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOLoad, Size, Align);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
.addFrameIndex(Index) // vaddr
@@ -738,7 +720,8 @@ void SIRegisterInfo::restoreSGPR(Machine
.addReg(MFI->getScratchWaveOffsetReg()) // soffset
.addImm(i * 4) // offset
.addMemOperand(MMO);
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
+ BuildMI(*MBB, MI, DL,
+ TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
.addReg(TmpReg, RegState::Kill)
.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
}
@@ -794,38 +777,28 @@ void SIRegisterInfo::eliminateFrameIndex
case AMDGPU::SI_SPILL_V128_SAVE:
case AMDGPU::SI_SPILL_V96_SAVE:
case AMDGPU::SI_SPILL_V64_SAVE:
- case AMDGPU::SI_SPILL_V32_SAVE: {
- const MachineOperand *VData = TII->getNamedOperand(*MI,
- AMDGPU::OpName::vdata);
+ case AMDGPU::SI_SPILL_V32_SAVE:
buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
- Index,
- VData->getReg(), VData->isKill(),
+ TII->getNamedOperand(*MI, AMDGPU::OpName::vdata),
TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
- TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
- *MI->memoperands_begin(),
- RS);
+ FrameInfo.getObjectOffset(Index) +
+ TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS);
MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
MI->eraseFromParent();
break;
- }
case AMDGPU::SI_SPILL_V32_RESTORE:
case AMDGPU::SI_SPILL_V64_RESTORE:
case AMDGPU::SI_SPILL_V96_RESTORE:
case AMDGPU::SI_SPILL_V128_RESTORE:
case AMDGPU::SI_SPILL_V256_RESTORE:
case AMDGPU::SI_SPILL_V512_RESTORE: {
- const MachineOperand *VData = TII->getNamedOperand(*MI,
- AMDGPU::OpName::vdata);
-
buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
- Index,
- VData->getReg(), VData->isKill(),
+ TII->getNamedOperand(*MI, AMDGPU::OpName::vdata),
TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
- TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
- *MI->memoperands_begin(),
- RS);
+ FrameInfo.getObjectOffset(Index) +
+ TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS);
MI->eraseFromParent();
break;
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h?rev=287935&r1=287934&r2=287935&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h Fri Nov 25 10:03:27 2016
@@ -253,14 +253,9 @@ public:
private:
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
- unsigned LoadStoreOp,
- int Index,
- unsigned ValueReg,
- bool ValueIsKill,
- unsigned ScratchRsrcReg,
- unsigned ScratchOffsetReg,
- int64_t InstrOffset,
- MachineMemOperand *MMO,
+ unsigned LoadStoreOp, const MachineOperand *SrcDst,
+ unsigned ScratchRsrcReg, unsigned ScratchOffset,
+ int64_t Offset,
RegScavenger *RS) const;
};
Modified: llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll?rev=287935&r1=287934&r2=287935&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll Fri Nov 25 10:03:27 2016
@@ -26,9 +26,9 @@
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]]
-; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 ; 4-byte Folded Spill
+; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 ; 8-byte Folded Spill
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]]
-; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:4 ; 4-byte Folded Spill
+; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:4 ; 8-byte Folded Spill
; Spill load
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill
@@ -55,11 +55,11 @@
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 ; 4-byte Folded Reload
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 ; 8-byte Folded Reload
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]]
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:4 ; 8-byte Folded Reload
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]]
@@ -108,9 +108,9 @@ endif:
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]]
-; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:16 ; 4-byte Folded Spill
+; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:16 ; 8-byte Folded Spill
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]]
-; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:20 ; 4-byte Folded Spill
+; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:20 ; 8-byte Folded Spill
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
@@ -133,11 +133,11 @@ endif:
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]]
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:16 ; 4-byte Folded Reload
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:16 ; 8-byte Folded Reload
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]]
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:20 ; 4-byte Folded Reload
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:20 ; 8-byte Folded Reload
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]]
@@ -187,9 +187,9 @@ end:
; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]]
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]]
-; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:[[SAVEEXEC_LO_OFFSET:[0-9]+]] ; 4-byte Folded Spill
+; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:[[SAVEEXEC_LO_OFFSET:[0-9]+]] ; 8-byte Folded Spill
; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]]
-; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:[[SAVEEXEC_HI_OFFSET:[0-9]+]] ; 4-byte Folded Spill
+; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:[[SAVEEXEC_HI_OFFSET:[0-9]+]] ; 8-byte Folded Spill
; GCN: s_mov_b64 exec, [[CMP0]]
; GCN: s_waitcnt vmcnt(0) expcnt(0)
@@ -208,7 +208,7 @@ end:
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readfirstlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC_LO]]
-; VMEM: buffer_load_dword v[[FLOW_V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:[[SAVEEXEC_HI_OFFSET]] ; 4-byte Folded Reload
+; VMEM: buffer_load_dword v[[FLOW_V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:[[SAVEEXEC_HI_OFFSET]] ; 8-byte Folded Reload
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readfirstlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC_HI]]
@@ -224,9 +224,9 @@ end:
; VMEM: v_mov_b32_e32 v[[FLOW_V_SAVEEXEC_LO:[0-9]+]], s[[FLOW_S_RELOAD_SAVEEXEC_LO]]
-; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC_LO]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_LO_OFFSET:[0-9]+]] ; 4-byte Folded Spill
+; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC_LO]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_LO_OFFSET:[0-9]+]] ; 8-byte Folded Spill
; VMEM: v_mov_b32_e32 v[[FLOW_V_SAVEEXEC_HI:[0-9]+]], s[[FLOW_S_RELOAD_SAVEEXEC_HI]]
-; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC_HI]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_HI_OFFSET:[0-9]+]] ; 4-byte Folded Spill
+; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC_HI]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_HI_OFFSET:[0-9]+]] ; 8-byte Folded Spill
; GCN: buffer_store_dword [[FLOW_VAL]], off, s[0:3], s7 offset:[[RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill
; GCN: s_xor_b64 exec, exec, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}}
@@ -255,11 +255,11 @@ end:
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_HI_LANE]]
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_LO_OFFSET]] ; 4-byte Folded Reload
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_LO_OFFSET]] ; 8-byte Folded Reload
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]]
-; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_HI_OFFSET]] ; 4-byte Folded Reload
+; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:[[FLOW_SAVEEXEC_HI_OFFSET]] ; 8-byte Folded Reload
; VMEM: s_waitcnt vmcnt(0)
; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]]
Modified: llvm/trunk/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll?rev=287935&r1=287934&r2=287935&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll Fri Nov 25 10:03:27 2016
@@ -17,26 +17,26 @@
; Make sure scratch wave offset register is correctly incremented and
; then restored.
; SMEM: s_mov_b32 m0, s91{{$}}
-; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Spill
+; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill
; SMEM: s_add_u32 m0, s91, 0x100{{$}}
-; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Spill
+; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill
; SMEM: s_add_u32 m0, s91, 0x200{{$}}
-; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Spill
+; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill
; SMEM: s_add_u32 m0, s91, 0x300{{$}}
-; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Spill
+; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill
; SMEM: s_mov_b32 m0, s91{{$}}
-; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Reload
+; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload
; SMEM: s_add_u32 m0, s91, 0x100{{$}}
; SMEM: s_waitcnt lgkmcnt(0)
-; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Reload
+; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload
; SMEM: s_add_u32 m0, s91, 0x200{{$}}
; SMEM: s_waitcnt lgkmcnt(0)
-; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Reload
+; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload
; SMEM: s_add_u32 m0, s91, 0x300{{$}}
; SMEM: s_waitcnt lgkmcnt(0)
-; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 4-byte Folded Reload
+; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload
; ALL: s_endpgm
define void @test(i32 addrspace(1)* %out, i32 %in) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll?rev=287935&r1=287934&r2=287935&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll Fri Nov 25 10:03:27 2016
@@ -20,8 +20,8 @@
; VI-DAG: s_mov_b32 s15, 0xe80000
; s11 is offset system SGPR
-; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s11 offset:{{[0-9]+}} ; 4-byte Folded Spill
-; GCN: buffer_load_dword v{{[0-9]+}}, off, s[12:15], s11 offset:{{[0-9]+}} ; 4-byte Folded Reload
+; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s11 offset:{{[0-9]+}} ; 16-byte Folded Spill
+; GCN: buffer_load_dword v{{[0-9]+}}, off, s[12:15], s11 offset:{{[0-9]+}} ; 16-byte Folded Reload
; GCN: NumVgprs: 256
; GCN: ScratchSize: 1024
More information about the llvm-commits
mailing list