[PATCHES] R600/SI: Small VI improvements

Tom Stellard tom at stellard.net
Wed Mar 4 11:25:20 PST 2015


On Wed, Mar 04, 2015 at 06:41:19PM +0100, Marek Olšák wrote:
> Please review.
> 
> I'm not sure how important the second patch is.
> 
> Marek

> From c89e3bcc8475b3519967e1b34187164a20080250 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Wed, 4 Mar 2015 15:40:53 +0100
> Subject: [PATCH 1/2] R600/SI: Limit SGPRs to 80 on Tonga and Iceland
> 
> This is a candidate for stable.
> ---
>  lib/Target/R600/SIRegisterInfo.cpp | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
> index e2138d2..4b9bee3 100644
> --- a/lib/Target/R600/SIRegisterInfo.cpp
> +++ b/lib/Target/R600/SIRegisterInfo.cpp
> @@ -47,6 +47,14 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
>    Reserved.set(AMDGPU::VGPR255);
>    Reserved.set(AMDGPU::VGPR254);
>  
> +  // Tonga and Iceland can only allocate 80 SGPRs due to a hw bug.
> +  // That's 74 SGPRs if all XNACK_MASK, FLAT_SCRATCH, and VCC are used.
> +  // For now, assume XNACK_MASK is unused.

This should be added as a subtarget feature in AMDGPU.td / AMDGPUSubtarget.h
and applied to these GPUs in Processors.td

> +  StringRef Cpu = ST.getTargetLowering()->getTargetMachine().getTargetCPU();
> +  if (Cpu == "tonga" || Cpu == "iceland")
> +    for (int i = AMDGPU::SGPR76; i <= AMDGPU::SGPR101; i++)
> +      Reserved.set(i);
> +

You also need to reserve super registers.  Something like:

for (unsigned i = 76, e = AMDGPU::SGPR_32RegClass.getReg(i); i !=e; ++i) {
  for (MCRegAliasIterator R = MCRegAliasIterator(i, this, true); R.isValid() ++R) {
    Reserved.set(*R);
  }
}

You should also update AMDGPUAsmPrinter.cpp to always report at least 80 SGPRs
used.

>    return Reserved;
>  }
>  
> -- 
> 2.1.0
> 

> From 932eaffc444fa581008c5039eaa63a6727beb534 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Wed, 4 Mar 2015 17:59:50 +0100
> Subject: [PATCH 2/2] R600/SI: Fix getNumSGPRsAllowed for VI
> 

This function is only used by the scheduler and is important for getting
the best performance.

LGTM.

> ---
>  lib/Target/R600/SIRegisterInfo.cpp | 32 +++++++++++++++++++++-----------
>  lib/Target/R600/SIRegisterInfo.h   |  4 +++-
>  2 files changed, 24 insertions(+), 12 deletions(-)
> 
> diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
> index 4b9bee3..14413e9 100644
> --- a/lib/Target/R600/SIRegisterInfo.cpp
> +++ b/lib/Target/R600/SIRegisterInfo.cpp
> @@ -14,7 +14,6 @@
>  
>  
>  #include "SIRegisterInfo.h"
> -#include "AMDGPUSubtarget.h"
>  #include "SIInstrInfo.h"
>  #include "SIMachineFunctionInfo.h"
>  #include "llvm/CodeGen/MachineFrameInfo.h"
> @@ -61,7 +60,8 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
>  unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
>  
>    // FIXME: We should adjust the max number of waves based on LDS size.
> -  unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU());
> +  unsigned SGPRLimit = getNumSGPRsAllowed(ST.getGeneration(),
> +                                          ST.getMaxWavesPerCU());
>    unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
>  
>    for (regclass_iterator I = regclass_begin(), E = regclass_end();
> @@ -502,14 +502,24 @@ unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
>    }
>  }
>  
> -unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const {
> -  switch(WaveCount) {
> -    case 10: return 48;
> -    case 9:  return 56;
> -    case 8:  return 64;
> -    case 7:  return 72;
> -    case 6:  return 80;
> -    case 5:  return 96;
> -    default: return 103;
> +unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
> +                                            unsigned WaveCount) const {
> +  if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
> +    switch (WaveCount) {
> +      case 10: return 80;
> +      case 9:  return 80;
> +      case 8:  return 96;
> +      default: return 102;
> +    }
> +  } else {
> +    switch(WaveCount) {
> +      case 10: return 48;
> +      case 9:  return 56;
> +      case 8:  return 64;
> +      case 7:  return 72;
> +      case 6:  return 80;
> +      case 5:  return 96;
> +      default: return 103;
> +    }
>    }
>  }
> diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
> index d908ffd..1dfe530 100644
> --- a/lib/Target/R600/SIRegisterInfo.h
> +++ b/lib/Target/R600/SIRegisterInfo.h
> @@ -17,6 +17,7 @@
>  #define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
>  
>  #include "AMDGPURegisterInfo.h"
> +#include "AMDGPUSubtarget.h"
>  #include "llvm/Support/Debug.h"
>  
>  namespace llvm {
> @@ -111,7 +112,8 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
>  
>    /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
>    ///        concurrent waves.
> -  unsigned getNumSGPRsAllowed(unsigned WaveCount) const;
> +  unsigned getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
> +                              unsigned WaveCount) const;
>  
>    unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
>                                const TargetRegisterClass *RC) const;
> -- 
> 2.1.0
> 

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits





More information about the llvm-commits mailing list