[llvm] r366223 - [AMDGPU] Add the adjusted FP as a livein register.
Michael Liao via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 16 08:57:12 PDT 2019
Author: hliao
Date: Tue Jul 16 08:57:12 2019
New Revision: 366223
URL: http://llvm.org/viewvc/llvm-project?rev=366223&view=rev
Log:
[AMDGPU] Add the adjusted FP as a livein register.
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64145
Added:
llvm/trunk/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.h
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=366223&r1=366222&r2=366223&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Tue Jul 16 08:57:12 2019
@@ -1067,15 +1067,15 @@ bool GCNTargetMachine::parseMachineFunct
auto parseAndCheckArgument = [&](const Optional<yaml::SIArgument> &A,
const TargetRegisterClass &RC,
- ArgDescriptor &Arg) {
+ ArgDescriptor &Arg, unsigned UserSGPRs,
+ unsigned SystemSGPRs) {
// Skip parsing if it's not present.
if (!A)
return false;
if (A->IsRegister) {
unsigned Reg;
- if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value,
- Error)) {
+ if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) {
SourceRange = A->RegisterName.SourceRange;
return true;
}
@@ -1088,60 +1088,62 @@ bool GCNTargetMachine::parseMachineFunct
if (A->Mask)
Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue());
+ MFI->NumUserSGPRs += UserSGPRs;
+ MFI->NumSystemSGPRs += SystemSGPRs;
return false;
};
if (YamlMFI.ArgInfo &&
(parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,
AMDGPU::SReg_128RegClass,
- MFI->ArgInfo.PrivateSegmentBuffer) ||
+ MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) ||
parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,
- AMDGPU::SReg_64RegClass,
- MFI->ArgInfo.DispatchPtr) ||
+ AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,
+ 2, 0) ||
parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
- MFI->ArgInfo.QueuePtr) ||
+ MFI->ArgInfo.QueuePtr, 2, 0) ||
parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr,
AMDGPU::SReg_64RegClass,
- MFI->ArgInfo.KernargSegmentPtr) ||
+ MFI->ArgInfo.KernargSegmentPtr, 2, 0) ||
parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID,
- AMDGPU::SReg_64RegClass,
- MFI->ArgInfo.DispatchID) ||
+ AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID,
+ 2, 0) ||
parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit,
AMDGPU::SReg_64RegClass,
- MFI->ArgInfo.FlatScratchInit) ||
+ MFI->ArgInfo.FlatScratchInit, 2, 0) ||
parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,
AMDGPU::SGPR_32RegClass,
- MFI->ArgInfo.PrivateSegmentSize) ||
+ MFI->ArgInfo.PrivateSegmentSize, 0, 0) ||
parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,
- AMDGPU::SGPR_32RegClass,
- MFI->ArgInfo.WorkGroupIDX) ||
+ AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,
+ 0, 1) ||
parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY,
- AMDGPU::SGPR_32RegClass,
- MFI->ArgInfo.WorkGroupIDY) ||
+ AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY,
+ 0, 1) ||
parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ,
- AMDGPU::SGPR_32RegClass,
- MFI->ArgInfo.WorkGroupIDZ) ||
+ AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ,
+ 0, 1) ||
parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo,
AMDGPU::SGPR_32RegClass,
- MFI->ArgInfo.WorkGroupInfo) ||
+ MFI->ArgInfo.WorkGroupInfo, 0, 1) ||
parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset,
AMDGPU::SGPR_32RegClass,
- MFI->ArgInfo.PrivateSegmentWaveByteOffset) ||
+ MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) ||
parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr,
AMDGPU::SReg_64RegClass,
- MFI->ArgInfo.ImplicitArgPtr) ||
+ MFI->ArgInfo.ImplicitArgPtr, 0, 0) ||
parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr,
AMDGPU::SReg_64RegClass,
- MFI->ArgInfo.ImplicitBufferPtr) ||
+ MFI->ArgInfo.ImplicitBufferPtr, 2, 0) ||
parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX,
AMDGPU::VGPR_32RegClass,
- MFI->ArgInfo.WorkItemIDX) ||
+ MFI->ArgInfo.WorkItemIDX, 0, 0) ||
parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY,
AMDGPU::VGPR_32RegClass,
- MFI->ArgInfo.WorkItemIDY) ||
+ MFI->ArgInfo.WorkItemIDY, 0, 0) ||
parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ,
AMDGPU::VGPR_32RegClass,
- MFI->ArgInfo.WorkItemIDZ)))
+ MFI->ArgInfo.WorkItemIDZ, 0, 0)))
return true;
MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
Modified: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp?rev=366223&r1=366222&r2=366223&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp Tue Jul 16 08:57:12 2019
@@ -311,7 +311,8 @@ unsigned SIFrameLowering::getReservedPri
}
// Shift down registers reserved for the scratch wave offset.
-unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
+std::pair<unsigned, bool>
+SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
SIMachineFunctionInfo *MFI, MachineFunction &MF) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -322,17 +323,17 @@ unsigned SIFrameLowering::getReservedPri
// No replacement necessary.
if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
(!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) {
- return AMDGPU::NoRegister;
+ return std::make_pair(AMDGPU::NoRegister, false);
}
if (ST.hasSGPRInitBug())
- return ScratchWaveOffsetReg;
+ return std::make_pair(ScratchWaveOffsetReg, false);
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
if (NumPreloaded > AllSGPRs.size())
- return ScratchWaveOffsetReg;
+ return std::make_pair(ScratchWaveOffsetReg, false);
AllSGPRs = AllSGPRs.slice(NumPreloaded);
@@ -353,10 +354,11 @@ unsigned SIFrameLowering::getReservedPri
unsigned ReservedRegCount = 13;
if (AllSGPRs.size() < ReservedRegCount)
- return ScratchWaveOffsetReg;
+ return std::make_pair(ScratchWaveOffsetReg, false);
bool HandledScratchWaveOffsetReg =
ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
+ bool FPAdjusted = false;
for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {
// Pick the first unallocated SGPR. Be careful not to pick an alias of the
@@ -374,12 +376,13 @@ unsigned SIFrameLowering::getReservedPri
MFI->setScratchWaveOffsetReg(Reg);
MFI->setFrameOffsetReg(Reg);
ScratchWaveOffsetReg = Reg;
+ FPAdjusted = true;
break;
}
}
}
- return ScratchWaveOffsetReg;
+ return std::make_pair(ScratchWaveOffsetReg, FPAdjusted);
}
void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
@@ -415,7 +418,9 @@ void SIFrameLowering::emitEntryFunctionP
unsigned ScratchRsrcReg
= getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
- unsigned ScratchWaveOffsetReg =
+ unsigned ScratchWaveOffsetReg;
+ bool FPAdjusted;
+ std::tie(ScratchWaveOffsetReg, FPAdjusted) =
getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
// We need to insert initialization of the scratch resource descriptor.
@@ -453,7 +458,7 @@ void SIFrameLowering::emitEntryFunctionP
if (&OtherBB == &MBB)
continue;
- if (OffsetRegUsed)
+ if (OffsetRegUsed || FPAdjusted)
OtherBB.addLiveIn(ScratchWaveOffsetReg);
if (ResourceRegUsed)
Modified: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.h?rev=366223&r1=366222&r2=366223&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.h Tue Jul 16 08:57:12 2019
@@ -66,7 +66,7 @@ private:
SIMachineFunctionInfo *MFI,
MachineFunction &MF) const;
- unsigned getReservedPrivateSegmentWaveByteOffsetReg(
+ std::pair<unsigned, bool> getReservedPrivateSegmentWaveByteOffsetReg(
const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
SIMachineFunctionInfo *MFI, MachineFunction &MF) const;
Added: llvm/trunk/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir?rev=366223&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir Tue Jul 16 08:57:12 2019
@@ -0,0 +1,50 @@
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s
+
+
+# CHECK-LABEL: name: foo
+# CHECK: BUFFER_STORE_DWORD_OFFSET
+--- |
+
+ define amdgpu_kernel void @foo() #0 {
+ ret void
+ }
+
+ attributes #0 = { "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }
+...
+---
+name: foo
+tracksRegLiveness: true
+liveins:
+ - { reg: '$vgpr0' }
+ - { reg: '$sgpr4_sgpr5' }
+ - { reg: '$sgpr6_sgpr7' }
+ - { reg: '$sgpr8' }
+frameInfo:
+ maxAlignment: 4
+stack:
+ - { id: 0, type: spill-slot, size: 4, alignment: 4 }
+machineFunctionInfo:
+ explicitKernArgSize: 660
+ maxKernArgAlign: 4
+ isEntryFunction: true
+ waveLimiter: true
+ scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
+ scratchWaveOffsetReg: '$sgpr101'
+ frameOffsetReg: '$sgpr101'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ dispatchPtr: { reg: '$sgpr4_sgpr5' }
+ kernargSegmentPtr: { reg: '$sgpr6_sgpr7' }
+ workGroupIDX: { reg: '$sgpr8' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr9' }
+body: |
+ bb.0:
+ successors: %bb.1
+ liveins: $sgpr8, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7
+
+ bb.1:
+ liveins: $sgpr4, $sgpr5, $sgpr9, $sgpr22, $vgpr0, $sgpr6_sgpr7
+
+ renamable $vgpr2 = IMPLICIT_DEF
+ SI_SPILL_V32_SAVE killed $vgpr2, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
More information about the llvm-commits
mailing list