[PATCHES] R600/SI: Small VI improvements

Marek Olšák maraeo at gmail.com
Fri Mar 6 05:45:14 PST 2015


Well, the patch breaks a lot of tests due to some instructions being
scheduled differently for some reason. I'm fixing the tests.
Apparently, the reserved registers have an effect on the scheduler.

Marek

On Fri, Mar 6, 2015 at 1:24 PM, Marek Olšák <maraeo at gmail.com> wrote:
> An updated patch is attached. Please review.
>
> Marek
>
> On Wed, Mar 4, 2015 at 8:25 PM, Tom Stellard <tom at stellard.net> wrote:
>> On Wed, Mar 04, 2015 at 06:41:19PM +0100, Marek Olšák wrote:
>>> Please review.
>>>
>>> I'm not sure how important the second patch is.
>>>
>>> Marek
>>
>>> From c89e3bcc8475b3519967e1b34187164a20080250 Mon Sep 17 00:00:00 2001
>>> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
>>> Date: Wed, 4 Mar 2015 15:40:53 +0100
>>> Subject: [PATCH 1/2] R600/SI: Limit SGPRs to 80 on Tonga and Iceland
>>>
>>> This is a candidate for stable.
>>> ---
>>>  lib/Target/R600/SIRegisterInfo.cpp | 8 ++++++++
>>>  1 file changed, 8 insertions(+)
>>>
>>> diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
>>> index e2138d2..4b9bee3 100644
>>> --- a/lib/Target/R600/SIRegisterInfo.cpp
>>> +++ b/lib/Target/R600/SIRegisterInfo.cpp
>>> @@ -47,6 +47,14 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
>>>    Reserved.set(AMDGPU::VGPR255);
>>>    Reserved.set(AMDGPU::VGPR254);
>>>
>>> +  // Tonga and Iceland can only allocate 80 SGPRs due to a hw bug.
>>> +  // That's 74 SGPRs if all XNACK_MASK, FLAT_SCRATCH, and VCC are used.
>>> +  // For now, assume XNACK_MASK is unused.
>>
>> This should be added as a subtarget feature in AMDGPU.td / AMDGPUSubtarget.h
>> and applied to these GPUs in Processors.td
>>
>>> +  StringRef Cpu = ST.getTargetLowering()->getTargetMachine().getTargetCPU();
>>> +  if (Cpu == "tonga" || Cpu == "iceland")
>>> +    for (int i = AMDGPU::SGPR76; i <= AMDGPU::SGPR101; i++)
>>> +      Reserved.set(i);
>>> +
>>
>> You also need to reserve super registers.  Something like:
>>
>> for (unsigned i = 76, e = AMDGPU::SGPR_32RegClass.getReg(i); i !=e; ++i) {
>>   for (MCRegAliasIterator R = MCRegAliasIterator(i, this, true); R.isValid() ++R) {
>>     Reserved.set(*R);
>>   }
>> }
>>
>> You should also update AMDGPUAsmPrinter.cpp to always report at least 80 SGPRs
>> used.
>>
>>>    return Reserved;
>>>  }
>>>
>>> --
>>> 2.1.0
>>>
>>
>>> From 932eaffc444fa581008c5039eaa63a6727beb534 Mon Sep 17 00:00:00 2001
>>> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
>>> Date: Wed, 4 Mar 2015 17:59:50 +0100
>>> Subject: [PATCH 2/2] R600/SI: Fix getNumSGPRsAllowed for VI
>>>
>>
>> This function is only used by the scheduler and is important for getting
>> the best performance.
>>
>> LGTM.
>>
>>> ---
>>>  lib/Target/R600/SIRegisterInfo.cpp | 32 +++++++++++++++++++++-----------
>>>  lib/Target/R600/SIRegisterInfo.h   |  4 +++-
>>>  2 files changed, 24 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
>>> index 4b9bee3..14413e9 100644
>>> --- a/lib/Target/R600/SIRegisterInfo.cpp
>>> +++ b/lib/Target/R600/SIRegisterInfo.cpp
>>> @@ -14,7 +14,6 @@
>>>
>>>
>>>  #include "SIRegisterInfo.h"
>>> -#include "AMDGPUSubtarget.h"
>>>  #include "SIInstrInfo.h"
>>>  #include "SIMachineFunctionInfo.h"
>>>  #include "llvm/CodeGen/MachineFrameInfo.h"
>>> @@ -61,7 +60,8 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
>>>  unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
>>>
>>>    // FIXME: We should adjust the max number of waves based on LDS size.
>>> -  unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU());
>>> +  unsigned SGPRLimit = getNumSGPRsAllowed(ST.getGeneration(),
>>> +                                          ST.getMaxWavesPerCU());
>>>    unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
>>>
>>>    for (regclass_iterator I = regclass_begin(), E = regclass_end();
>>> @@ -502,14 +502,24 @@ unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
>>>    }
>>>  }
>>>
>>> -unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const {
>>> -  switch(WaveCount) {
>>> -    case 10: return 48;
>>> -    case 9:  return 56;
>>> -    case 8:  return 64;
>>> -    case 7:  return 72;
>>> -    case 6:  return 80;
>>> -    case 5:  return 96;
>>> -    default: return 103;
>>> +unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
>>> +                                            unsigned WaveCount) const {
>>> +  if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
>>> +    switch (WaveCount) {
>>> +      case 10: return 80;
>>> +      case 9:  return 80;
>>> +      case 8:  return 96;
>>> +      default: return 102;
>>> +    }
>>> +  } else {
>>> +    switch(WaveCount) {
>>> +      case 10: return 48;
>>> +      case 9:  return 56;
>>> +      case 8:  return 64;
>>> +      case 7:  return 72;
>>> +      case 6:  return 80;
>>> +      case 5:  return 96;
>>> +      default: return 103;
>>> +    }
>>>    }
>>>  }
>>> diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
>>> index d908ffd..1dfe530 100644
>>> --- a/lib/Target/R600/SIRegisterInfo.h
>>> +++ b/lib/Target/R600/SIRegisterInfo.h
>>> @@ -17,6 +17,7 @@
>>>  #define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
>>>
>>>  #include "AMDGPURegisterInfo.h"
>>> +#include "AMDGPUSubtarget.h"
>>>  #include "llvm/Support/Debug.h"
>>>
>>>  namespace llvm {
>>> @@ -111,7 +112,8 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
>>>
>>>    /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
>>>    ///        concurrent waves.
>>> -  unsigned getNumSGPRsAllowed(unsigned WaveCount) const;
>>> +  unsigned getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
>>> +                              unsigned WaveCount) const;
>>>
>>>    unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
>>>                                const TargetRegisterClass *RC) const;
>>> --
>>> 2.1.0
>>>
>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>




More information about the llvm-commits mailing list