[PATCHES] R600/SI: Small VI improvements
Tom Stellard
tom at stellard.net
Wed Mar 4 11:25:20 PST 2015
On Wed, Mar 04, 2015 at 06:41:19PM +0100, Marek Olšák wrote:
> Please review.
>
> I'm not sure how important the second patch is.
>
> Marek
> From c89e3bcc8475b3519967e1b34187164a20080250 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Wed, 4 Mar 2015 15:40:53 +0100
> Subject: [PATCH 1/2] R600/SI: Limit SGPRs to 80 on Tonga and Iceland
>
> This is a candidate for stable.
> ---
> lib/Target/R600/SIRegisterInfo.cpp | 8 ++++++++
> 1 file changed, 8 insertions(+)
>
> diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
> index e2138d2..4b9bee3 100644
> --- a/lib/Target/R600/SIRegisterInfo.cpp
> +++ b/lib/Target/R600/SIRegisterInfo.cpp
> @@ -47,6 +47,14 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
> Reserved.set(AMDGPU::VGPR255);
> Reserved.set(AMDGPU::VGPR254);
>
> + // Tonga and Iceland can only allocate 80 SGPRs due to a hw bug.
> + // That's 74 SGPRs if all XNACK_MASK, FLAT_SCRATCH, and VCC are used.
> + // For now, assume XNACK_MASK is unused.
This should be added as a subtarget feature in AMDGPU.td / AMDGPUSubtarget.h
and applied to these GPUs in Processors.td
> + StringRef Cpu = ST.getTargetLowering()->getTargetMachine().getTargetCPU();
> + if (Cpu == "tonga" || Cpu == "iceland")
> + for (int i = AMDGPU::SGPR76; i <= AMDGPU::SGPR101; i++)
> + Reserved.set(i);
> +
You also need to reserve super registers. Something like:
for (unsigned i = 76, e = AMDGPU::SGPR_32RegClass.getReg(i); i !=e; ++i) {
for (MCRegAliasIterator R = MCRegAliasIterator(i, this, true); R.isValid() ++R) {
Reserved.set(*R);
}
}
You should also update AMDGPUAsmPrinter.cpp to always report at least 80 SGPRs
used.
> return Reserved;
> }
>
> --
> 2.1.0
>
> From 932eaffc444fa581008c5039eaa63a6727beb534 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Wed, 4 Mar 2015 17:59:50 +0100
> Subject: [PATCH 2/2] R600/SI: Fix getNumSGPRsAllowed for VI
>
This function is only used by the scheduler and is important for getting
the best performance.
LGTM.
> ---
> lib/Target/R600/SIRegisterInfo.cpp | 32 +++++++++++++++++++++-----------
> lib/Target/R600/SIRegisterInfo.h | 4 +++-
> 2 files changed, 24 insertions(+), 12 deletions(-)
>
> diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
> index 4b9bee3..14413e9 100644
> --- a/lib/Target/R600/SIRegisterInfo.cpp
> +++ b/lib/Target/R600/SIRegisterInfo.cpp
> @@ -14,7 +14,6 @@
>
>
> #include "SIRegisterInfo.h"
> -#include "AMDGPUSubtarget.h"
> #include "SIInstrInfo.h"
> #include "SIMachineFunctionInfo.h"
> #include "llvm/CodeGen/MachineFrameInfo.h"
> @@ -61,7 +60,8 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
> unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
>
> // FIXME: We should adjust the max number of waves based on LDS size.
> - unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU());
> + unsigned SGPRLimit = getNumSGPRsAllowed(ST.getGeneration(),
> + ST.getMaxWavesPerCU());
> unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
>
> for (regclass_iterator I = regclass_begin(), E = regclass_end();
> @@ -502,14 +502,24 @@ unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
> }
> }
>
> -unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const {
> - switch(WaveCount) {
> - case 10: return 48;
> - case 9: return 56;
> - case 8: return 64;
> - case 7: return 72;
> - case 6: return 80;
> - case 5: return 96;
> - default: return 103;
> +unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
> + unsigned WaveCount) const {
> + if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
> + switch (WaveCount) {
> + case 10: return 80;
> + case 9: return 80;
> + case 8: return 96;
> + default: return 102;
> + }
> + } else {
> + switch(WaveCount) {
> + case 10: return 48;
> + case 9: return 56;
> + case 8: return 64;
> + case 7: return 72;
> + case 6: return 80;
> + case 5: return 96;
> + default: return 103;
> + }
> }
> }
> diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
> index d908ffd..1dfe530 100644
> --- a/lib/Target/R600/SIRegisterInfo.h
> +++ b/lib/Target/R600/SIRegisterInfo.h
> @@ -17,6 +17,7 @@
> #define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
>
> #include "AMDGPURegisterInfo.h"
> +#include "AMDGPUSubtarget.h"
> #include "llvm/Support/Debug.h"
>
> namespace llvm {
> @@ -111,7 +112,8 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
>
> /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
> /// concurrent waves.
> - unsigned getNumSGPRsAllowed(unsigned WaveCount) const;
> + unsigned getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
> + unsigned WaveCount) const;
>
> unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
> const TargetRegisterClass *RC) const;
> --
> 2.1.0
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list