[PATCHES] R600/SI: Small VI improvements
Marek Olšák
maraeo at gmail.com
Fri Mar 6 04:24:59 PST 2015
An updated patch is attached. Please review.
Marek
On Wed, Mar 4, 2015 at 8:25 PM, Tom Stellard <tom at stellard.net> wrote:
> On Wed, Mar 04, 2015 at 06:41:19PM +0100, Marek Olšák wrote:
>> Please review.
>>
>> I'm not sure how important the second patch is.
>>
>> Marek
>
>> From c89e3bcc8475b3519967e1b34187164a20080250 Mon Sep 17 00:00:00 2001
>> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
>> Date: Wed, 4 Mar 2015 15:40:53 +0100
>> Subject: [PATCH 1/2] R600/SI: Limit SGPRs to 80 on Tonga and Iceland
>>
>> This is a candidate for stable.
>> ---
>> lib/Target/R600/SIRegisterInfo.cpp | 8 ++++++++
>> 1 file changed, 8 insertions(+)
>>
>> diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
>> index e2138d2..4b9bee3 100644
>> --- a/lib/Target/R600/SIRegisterInfo.cpp
>> +++ b/lib/Target/R600/SIRegisterInfo.cpp
>> @@ -47,6 +47,14 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
>> Reserved.set(AMDGPU::VGPR255);
>> Reserved.set(AMDGPU::VGPR254);
>>
>> + // Tonga and Iceland can only allocate 80 SGPRs due to a hw bug.
>> + // That's 74 SGPRs if all XNACK_MASK, FLAT_SCRATCH, and VCC are used.
>> + // For now, assume XNACK_MASK is unused.
>
> This should be added as a subtarget feature in AMDGPU.td / AMDGPUSubtarget.h
> and applied to these GPUs in Processors.td
>
>> + StringRef Cpu = ST.getTargetLowering()->getTargetMachine().getTargetCPU();
>> + if (Cpu == "tonga" || Cpu == "iceland")
>> + for (int i = AMDGPU::SGPR76; i <= AMDGPU::SGPR101; i++)
>> + Reserved.set(i);
>> +
>
> You also need to reserve super registers. Something like:
>
> for (unsigned i = 76, e = AMDGPU::SGPR_32RegClass.getReg(i); i !=e; ++i) {
> for (MCRegAliasIterator R = MCRegAliasIterator(i, this, true); R.isValid() ++R) {
> Reserved.set(*R);
> }
> }
>
> You should also update AMDGPUAsmPrinter.cpp to always report at least 80 SGPRs
> used.
>
>> return Reserved;
>> }
>>
>> --
>> 2.1.0
>>
>
>> From 932eaffc444fa581008c5039eaa63a6727beb534 Mon Sep 17 00:00:00 2001
>> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
>> Date: Wed, 4 Mar 2015 17:59:50 +0100
>> Subject: [PATCH 2/2] R600/SI: Fix getNumSGPRsAllowed for VI
>>
>
> This function is only used by the scheduler and is important for getting
> the best performance.
>
> LGTM.
>
>> ---
>> lib/Target/R600/SIRegisterInfo.cpp | 32 +++++++++++++++++++++-----------
>> lib/Target/R600/SIRegisterInfo.h | 4 +++-
>> 2 files changed, 24 insertions(+), 12 deletions(-)
>>
>> diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
>> index 4b9bee3..14413e9 100644
>> --- a/lib/Target/R600/SIRegisterInfo.cpp
>> +++ b/lib/Target/R600/SIRegisterInfo.cpp
>> @@ -14,7 +14,6 @@
>>
>>
>> #include "SIRegisterInfo.h"
>> -#include "AMDGPUSubtarget.h"
>> #include "SIInstrInfo.h"
>> #include "SIMachineFunctionInfo.h"
>> #include "llvm/CodeGen/MachineFrameInfo.h"
>> @@ -61,7 +60,8 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
>> unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
>>
>> // FIXME: We should adjust the max number of waves based on LDS size.
>> - unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU());
>> + unsigned SGPRLimit = getNumSGPRsAllowed(ST.getGeneration(),
>> + ST.getMaxWavesPerCU());
>> unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
>>
>> for (regclass_iterator I = regclass_begin(), E = regclass_end();
>> @@ -502,14 +502,24 @@ unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
>> }
>> }
>>
>> -unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const {
>> - switch(WaveCount) {
>> - case 10: return 48;
>> - case 9: return 56;
>> - case 8: return 64;
>> - case 7: return 72;
>> - case 6: return 80;
>> - case 5: return 96;
>> - default: return 103;
>> +unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
>> + unsigned WaveCount) const {
>> + if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
>> + switch (WaveCount) {
>> + case 10: return 80;
>> + case 9: return 80;
>> + case 8: return 96;
>> + default: return 102;
>> + }
>> + } else {
>> + switch(WaveCount) {
>> + case 10: return 48;
>> + case 9: return 56;
>> + case 8: return 64;
>> + case 7: return 72;
>> + case 6: return 80;
>> + case 5: return 96;
>> + default: return 103;
>> + }
>> }
>> }
>> diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
>> index d908ffd..1dfe530 100644
>> --- a/lib/Target/R600/SIRegisterInfo.h
>> +++ b/lib/Target/R600/SIRegisterInfo.h
>> @@ -17,6 +17,7 @@
>> #define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
>>
>> #include "AMDGPURegisterInfo.h"
>> +#include "AMDGPUSubtarget.h"
>> #include "llvm/Support/Debug.h"
>>
>> namespace llvm {
>> @@ -111,7 +112,8 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
>>
>> /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
>> /// concurrent waves.
>> - unsigned getNumSGPRsAllowed(unsigned WaveCount) const;
>> + unsigned getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
>> + unsigned WaveCount) const;
>>
>> unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
>> const TargetRegisterClass *RC) const;
>> --
>> 2.1.0
>>
>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-R600-SI-Limit-SGPRs-to-80-on-Tonga-and-Iceland.patch
Type: text/x-patch
Size: 5475 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150306/fbe26756/attachment.bin>
More information about the llvm-commits
mailing list