[PATCHES] R600/SI: Small VI improvements
Tom Stellard
tom at stellard.net
Fri Mar 6 09:50:21 PST 2015
On Fri, Mar 06, 2015 at 06:31:04PM +0100, Marek Olšák wrote:
> Thanks Tom for fixing the scheduler weirdness. The attached patch only
> adds the fix for the elf test.
>
> Marek
>
> On Fri, Mar 6, 2015 at 2:45 PM, Marek Olšák <maraeo at gmail.com> wrote:
> > Well, the patch breaks a lot of tests due to some instructions being
> > scheduled differently for some reason. I'm fixing the tests.
> > Apparently, the reserved registers have an effect on the scheduler.
> >
> > Marek
> >
> > On Fri, Mar 6, 2015 at 1:24 PM, Marek Olšák <maraeo at gmail.com> wrote:
> >> An updated patch is attached. Please review.
> >>
> >> Marek
> >>
> >> On Wed, Mar 4, 2015 at 8:25 PM, Tom Stellard <tom at stellard.net> wrote:
> >>> On Wed, Mar 04, 2015 at 06:41:19PM +0100, Marek Olšák wrote:
> >>>> Please review.
> >>>>
> >>>> I'm not sure how important the second patch is.
> >>>>
> >>>> Marek
> >>>
> >>>> From c89e3bcc8475b3519967e1b34187164a20080250 Mon Sep 17 00:00:00 2001
> >>>> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> >>>> Date: Wed, 4 Mar 2015 15:40:53 +0100
> >>>> Subject: [PATCH 1/2] R600/SI: Limit SGPRs to 80 on Tonga and Iceland
> >>>>
> >>>> This is a candidate for stable.
> >>>> ---
> >>>> lib/Target/R600/SIRegisterInfo.cpp | 8 ++++++++
> >>>> 1 file changed, 8 insertions(+)
> >>>>
> >>>> diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
> >>>> index e2138d2..4b9bee3 100644
> >>>> --- a/lib/Target/R600/SIRegisterInfo.cpp
> >>>> +++ b/lib/Target/R600/SIRegisterInfo.cpp
> >>>> @@ -47,6 +47,14 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
> >>>> Reserved.set(AMDGPU::VGPR255);
> >>>> Reserved.set(AMDGPU::VGPR254);
> >>>>
> >>>> + // Tonga and Iceland can only allocate 80 SGPRs due to a hw bug.
> >>>> + // That's 74 SGPRs if all XNACK_MASK, FLAT_SCRATCH, and VCC are used.
> >>>> + // For now, assume XNACK_MASK is unused.
> >>>
> >>> This should be added as a subtarget feature in AMDGPU.td / AMDGPUSubtarget.h
> >>> and applied to these GPUs in Processors.td
> >>>
> >>>> + StringRef Cpu = ST.getTargetLowering()->getTargetMachine().getTargetCPU();
> >>>> + if (Cpu == "tonga" || Cpu == "iceland")
> >>>> + for (int i = AMDGPU::SGPR76; i <= AMDGPU::SGPR101; i++)
> >>>> + Reserved.set(i);
> >>>> +
> >>>
> >>> You also need to reserve super registers. Something like:
> >>>
> >>> for (unsigned i = 76, e = AMDGPU::SGPR_32RegClass.getReg(i); i !=e; ++i) {
> >>> for (MCRegAliasIterator R = MCRegAliasIterator(i, this, true); R.isValid() ++R) {
> >>> Reserved.set(*R);
> >>> }
> >>> }
> >>>
> >>> You should also update AMDGPUAsmPrinter.cpp to always report at least 80 SGPRs
> >>> used.
> >>>
> >>>> return Reserved;
> >>>> }
> >>>>
> >>>> --
> >>>> 2.1.0
> >>>>
> >>>
> >>>> From 932eaffc444fa581008c5039eaa63a6727beb534 Mon Sep 17 00:00:00 2001
> >>>> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> >>>> Date: Wed, 4 Mar 2015 17:59:50 +0100
> >>>> Subject: [PATCH 2/2] R600/SI: Fix getNumSGPRsAllowed for VI
> >>>>
> >>>
> >>> This function is only used by the scheduler and is important for getting
> >>> the best performance.
> >>>
> >>> LGTM.
> >>>
> >>>> ---
> >>>> lib/Target/R600/SIRegisterInfo.cpp | 32 +++++++++++++++++++++-----------
> >>>> lib/Target/R600/SIRegisterInfo.h | 4 +++-
> >>>> 2 files changed, 24 insertions(+), 12 deletions(-)
> >>>>
> >>>> diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
> >>>> index 4b9bee3..14413e9 100644
> >>>> --- a/lib/Target/R600/SIRegisterInfo.cpp
> >>>> +++ b/lib/Target/R600/SIRegisterInfo.cpp
> >>>> @@ -14,7 +14,6 @@
> >>>>
> >>>>
> >>>> #include "SIRegisterInfo.h"
> >>>> -#include "AMDGPUSubtarget.h"
> >>>> #include "SIInstrInfo.h"
> >>>> #include "SIMachineFunctionInfo.h"
> >>>> #include "llvm/CodeGen/MachineFrameInfo.h"
> >>>> @@ -61,7 +60,8 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
> >>>> unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
> >>>>
> >>>> // FIXME: We should adjust the max number of waves based on LDS size.
> >>>> - unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU());
> >>>> + unsigned SGPRLimit = getNumSGPRsAllowed(ST.getGeneration(),
> >>>> + ST.getMaxWavesPerCU());
> >>>> unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
> >>>>
> >>>> for (regclass_iterator I = regclass_begin(), E = regclass_end();
> >>>> @@ -502,14 +502,24 @@ unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
> >>>> }
> >>>> }
> >>>>
> >>>> -unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const {
> >>>> - switch(WaveCount) {
> >>>> - case 10: return 48;
> >>>> - case 9: return 56;
> >>>> - case 8: return 64;
> >>>> - case 7: return 72;
> >>>> - case 6: return 80;
> >>>> - case 5: return 96;
> >>>> - default: return 103;
> >>>> +unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
> >>>> + unsigned WaveCount) const {
> >>>> + if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
> >>>> + switch (WaveCount) {
> >>>> + case 10: return 80;
> >>>> + case 9: return 80;
> >>>> + case 8: return 96;
> >>>> + default: return 102;
> >>>> + }
> >>>> + } else {
> >>>> + switch(WaveCount) {
> >>>> + case 10: return 48;
> >>>> + case 9: return 56;
> >>>> + case 8: return 64;
> >>>> + case 7: return 72;
> >>>> + case 6: return 80;
> >>>> + case 5: return 96;
> >>>> + default: return 103;
> >>>> + }
> >>>> }
> >>>> }
> >>>> diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
> >>>> index d908ffd..1dfe530 100644
> >>>> --- a/lib/Target/R600/SIRegisterInfo.h
> >>>> +++ b/lib/Target/R600/SIRegisterInfo.h
> >>>> @@ -17,6 +17,7 @@
> >>>> #define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
> >>>>
> >>>> #include "AMDGPURegisterInfo.h"
> >>>> +#include "AMDGPUSubtarget.h"
> >>>> #include "llvm/Support/Debug.h"
> >>>>
> >>>> namespace llvm {
> >>>> @@ -111,7 +112,8 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
> >>>>
> >>>> /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
> >>>> /// concurrent waves.
> >>>> - unsigned getNumSGPRsAllowed(unsigned WaveCount) const;
> >>>> + unsigned getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
> >>>> + unsigned WaveCount) const;
> >>>>
> >>>> unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
> >>>> const TargetRegisterClass *RC) const;
> >>>> --
> >>>> 2.1.0
> >>>>
> >>>
> >>>> _______________________________________________
> >>>> llvm-commits mailing list
> >>>> llvm-commits at cs.uiuc.edu
> >>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> >>>
> From d31e390cfe4c9adce557bea249d2afe80571074f Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Wed, 4 Mar 2015 15:40:53 +0100
> Subject: [PATCH] R600/SI: Limit SGPRs to 80 on Tonga and Iceland
>
> This is a candidate for stable.
LGTM. I'll make sure it gets into 3.6.1.
-Tom
> ---
> lib/Target/R600/AMDGPU.td | 5 +++++
> lib/Target/R600/AMDGPUAsmPrinter.cpp | 7 +++++++
> lib/Target/R600/AMDGPUSubtarget.cpp | 2 +-
> lib/Target/R600/AMDGPUSubtarget.h | 9 +++++++++
> lib/Target/R600/Processors.td | 8 ++++++--
> lib/Target/R600/SIRegisterInfo.cpp | 17 +++++++++++++++++
> test/CodeGen/R600/elf.ll | 9 ++++++---
> 7 files changed, 51 insertions(+), 6 deletions(-)
>
> diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
> index a7d48b3..e5d5ce2 100644
> --- a/lib/Target/R600/AMDGPU.td
> +++ b/lib/Target/R600/AMDGPU.td
> @@ -103,6 +103,11 @@ def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
> "true",
> "Enable spilling of VGPRs to scratch memory">;
>
> +def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
> + "SGPRInitBug",
> + "true",
> + "VI SGPR initilization bug requiring a fixed SGPR allocation size">;
> +
> class SubtargetFeatureFetchLimit <string Value> :
> SubtargetFeature <"fetch"#Value,
> "TexVTXClauseSize",
> diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
> index 92bc314..5e1b6a3 100644
> --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
> +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
> @@ -339,6 +339,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
> ProgInfo.NumVGPR = MaxVGPR + 1;
> ProgInfo.NumSGPR = MaxSGPR + 1;
>
> + if (STM.hasSGPRInitBug()) {
> + if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG)
> + llvm_unreachable("Too many SGPRs used with the SGPR init bug");
> +
> + ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
> + }
> +
> ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
> ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
> // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
> diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
> index 70c8525..0ead652 100644
> --- a/lib/Target/R600/AMDGPUSubtarget.cpp
> +++ b/lib/Target/R600/AMDGPUSubtarget.cpp
> @@ -70,7 +70,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
> CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true),
> EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
> WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
> - EnableVGPRSpilling(false),
> + EnableVGPRSpilling(false), SGPRInitBug(false),
> FrameLowering(TargetFrameLowering::StackGrowsUp,
> 64 * 16, // Maximum stack alignment (long16)
> 0),
> diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
> index 1b0122c..403a3e4 100644
> --- a/lib/Target/R600/AMDGPUSubtarget.h
> +++ b/lib/Target/R600/AMDGPUSubtarget.h
> @@ -44,6 +44,10 @@ public:
> VOLCANIC_ISLANDS,
> };
>
> + enum {
> + FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
> + };
> +
> private:
> std::string DevName;
> bool Is64bit;
> @@ -66,6 +70,7 @@ private:
> bool CFALUBug;
> int LocalMemorySize;
> bool EnableVGPRSpilling;
> + bool SGPRInitBug;
>
> AMDGPUFrameLowering FrameLowering;
> std::unique_ptr<AMDGPUTargetLowering> TLInfo;
> @@ -206,6 +211,10 @@ public:
> return LocalMemorySize;
> }
>
> + bool hasSGPRInitBug() const {
> + return SGPRInitBug;
> + }
> +
> unsigned getAmdKernelCodeChipID() const;
>
> bool enableMachineScheduler() const override {
> diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
> index fb5aa61..82c6d13 100644
> --- a/lib/Target/R600/Processors.td
> +++ b/lib/Target/R600/Processors.td
> @@ -119,8 +119,12 @@ def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>;
> // Volcanic Islands
> //===----------------------------------------------------------------------===//
>
> -def : ProcessorModel<"tonga", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
> +def : ProcessorModel<"tonga", SIQuarterSpeedModel,
> + [FeatureVolcanicIslands, FeatureSGPRInitBug]
> +>;
>
> -def : ProcessorModel<"iceland", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
> +def : ProcessorModel<"iceland", SIQuarterSpeedModel,
> + [FeatureVolcanicIslands, FeatureSGPRInitBug]
> +>;
>
> def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
> diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
> index f3a4282..2571472 100644
> --- a/lib/Target/R600/SIRegisterInfo.cpp
> +++ b/lib/Target/R600/SIRegisterInfo.cpp
> @@ -46,6 +46,23 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
> Reserved.set(AMDGPU::VGPR255);
> Reserved.set(AMDGPU::VGPR254);
>
> + // Tonga and Iceland can only allocate a fixed number of SGPRs due
> + // to a hw bug.
> + if (ST.hasSGPRInitBug()) {
> + unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
> + // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
> + // Assume XNACK_MASK is unused.
> + unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
> +
> + for (unsigned i = Limit; i < NumSGPRs; ++i) {
> + unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
> + MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true);
> +
> + for (; R.isValid(); ++R)
> + Reserved.set(*R);
> + }
> + }
> +
> return Reserved;
> }
>
> diff --git a/test/CodeGen/R600/elf.ll b/test/CodeGen/R600/elf.ll
> index aca3109..f801b3f 100644
> --- a/test/CodeGen/R600/elf.ll
> +++ b/test/CodeGen/R600/elf.ll
> @@ -1,7 +1,9 @@
> ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s
> -; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG %s
> +; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s
> ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s
> -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG %s
> +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TONGA %s
> +; RUN: llc < %s -march=amdgcn -mcpu=carrizo -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s
> +; RUN: llc < %s -march=amdgcn -mcpu=carrizo -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s
>
> ; ELF: Format: ELF32
> ; ELF: Name: .AMDGPU.config
> @@ -15,7 +17,8 @@
> ; CONFIG: test:
> ; CONFIG: .section .AMDGPU.config
> ; CONFIG-NEXT: .long 45096
> -; CONFIG-NEXT: .long 0
> +; TYPICAL-NEXT: .long 0
> +; TONGA-NEXT: .long 576
> define void @test(i32 %p) #0 {
> %i = add i32 %p, 2
> %r = bitcast i32 %i to float
> --
> 2.1.0
>
More information about the llvm-commits
mailing list