PATCHES: R600/SI: Remove M0Reg register class
Matt Arsenault
Matthew.Arsenault at amd.com
Mon May 4 08:55:34 PDT 2015
On 05/04/2015 07:53 AM, Tom Stellard wrote:
> Hi,
>
> These patches remove the M0Reg register class from the R600/SI backend.
> This was a register class with a single register (m0), and it what used to
> model implicit usage of m0.
>
> In order to remove this register class, I have replaced all explicit m0 uses
> with implicit uses. This was achieved by glueing SDNodes with implicit
> m0 uses to SDNodes which copied values into m0. The MachineCSE pass
> takes care of eliminating unnecessary copies so code quality is on par with what
> was generated before.
>
> The benefit of removing this class is that we no longer have to deal with m0 register
> spills, which were often unnecessary and we can remove special handling for the M0Reg
> class from a few places in the backend.
>
> -Tom
>
> 0001-MachineCSE-Add-a-target-query-for-the-LookAheadLimit.patch
>
>
> From 6e5f3464ed07a3e36aa99167e73089377af7ad91 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Tue, 28 Apr 2015 12:41:39 +0000
> Subject: [PATCH 1/8] MachineCSE: Add a target query for the LookAheadLimit
> heurisitic
>
> This is used to determine whether or not to CSE physical register
> defs.
> ---
> include/llvm/Target/TargetInstrInfo.h | 8 ++++++++
> lib/CodeGen/MachineCSE.cpp | 5 +++--
> lib/Target/R600/SIInstrInfo.h | 2 ++
> 3 files changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
> index f9c1e52..7d95454 100644
> --- a/include/llvm/Target/TargetInstrInfo.h
> +++ b/include/llvm/Target/TargetInstrInfo.h
> @@ -1235,6 +1235,14 @@ public:
> return false;
> }
>
> + /// \brief Return the value to use for the MachineCSE's LookAheadLimit,
> + /// which is a heuristic used for CSE'ing phys reg defs.
> + virtual unsigned getMachineCSELookAheadLimit () const {
> + // 5 is the value for this heuristic that used to be hard-coded into
> + // the MachineCSE pass. I have no idea why 5 was choosen.
> + return 5;
> + }
> +
> private:
> int CallFrameSetupOpcode, CallFrameDestroyOpcode;
> };
> diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
> index f72d72a..87aaaa0 100644
> --- a/lib/CodeGen/MachineCSE.cpp
> +++ b/lib/CodeGen/MachineCSE.cpp
> @@ -48,7 +48,7 @@ namespace {
> MachineRegisterInfo *MRI;
> public:
> static char ID; // Pass identification
> - MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {
> + MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(0), CurrVN(0) {
> initializeMachineCSEPass(*PassRegistry::getPassRegistry());
> }
>
> @@ -69,7 +69,7 @@ namespace {
> }
>
> private:
> - const unsigned LookAheadLimit;
> + unsigned LookAheadLimit;
> typedef RecyclingAllocator<BumpPtrAllocator,
> ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
> typedef ScopedHashTable<MachineInstr*, unsigned,
> @@ -716,5 +716,6 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
> MRI = &MF.getRegInfo();
> AA = &getAnalysis<AliasAnalysis>();
> DT = &getAnalysis<MachineDominatorTree>();
> + LookAheadLimit = TII->getMachineCSELookAheadLimit();
> return PerformCSE(DT->getRootNode());
> }
> diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
> index 7e049dc..a654166 100644
> --- a/lib/Target/R600/SIInstrInfo.h
> +++ b/lib/Target/R600/SIInstrInfo.h
> @@ -142,6 +142,8 @@ public:
> bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
> unsigned Reg, MachineRegisterInfo *MRI) const final;
>
> + unsigned getMachineCSELookAheadLimit() const override { return UINT_MAX; }
Using UINT_MAX for this seems like a bad idea
> +
> bool isSALU(uint16_t Opcode) const {
> return get(Opcode).TSFlags & SIInstrFlags::SALU;
> }
> -- 2.0.4
>
> 0002-R600-SI-Update-tablegen-defs-to-avoid-restoring-spil.patch
>
>
> From 287633eea69877c16fdfcf373a1a437413043cea Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Fri, 1 May 2015 16:51:39 +0000
> Subject: [PATCH 2/8] R600/SI: Update tablegen defs to avoid restoring spilled
> sgprs to m0
>
> We had code to do this in SIRegisterInfo::eliminateFrameIndex(), but
> it is easier to just change the definition of SI_SPILL_S32_RESTORE to
> only allow numbered sgprs.
> ---
> lib/Target/R600/SIInstructions.td | 5 ++++-
> lib/Target/R600/SIRegisterInfo.cpp | 8 --------
> 2 files changed, 4 insertions(+), 9 deletions(-)
>
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 838b645..93d9068 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -2041,7 +2041,10 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
> } // End UseNamedOperandTable = 1
> }
>
> -defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32>;
> +// It's unclear whether you can use M0 as the output of v_readlane_b32
> +// instructions, so use SGPR_32 register class for spills to prevent
> +// this from happening.
> +defm SI_SPILL_S32 : SI_SPILL_SGPR <SGPR_32>;
> defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>;
> defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
> defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
> diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
> index 13a8974..db2ff0b 100644
> --- a/lib/Target/R600/SIRegisterInfo.cpp
> +++ b/lib/Target/R600/SIRegisterInfo.cpp
> @@ -245,7 +245,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
> for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
> unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
> &AMDGPU::SGPR_32RegClass, i);
> - bool isM0 = SubReg == AMDGPU::M0;
> struct SIMachineFunctionInfo::SpilledReg Spill =
> MFI->getSpilledReg(MF, Index, i);
>
> @@ -254,19 +253,12 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
> Ctx.emitError("Ran out of VGPRs for spilling SGPR");
> }
>
> - if (isM0)
> - SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
> -
> BuildMI(*MBB, MI, DL,
> TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
> SubReg)
> .addReg(Spill.VGPR)
> .addImm(Spill.Lane)
> .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
> - if (isM0) {
> - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
> - .addReg(SubReg);
> - }
> }
>
> // TODO: only do this when it is needed
> -- 2.0.4
>
> 0003-R600-SI-Replace-TRI-getRegClass-Reg-with-TRI-getPhys.patch
>
>
> From 3871e266f62995517bd671d4188b3357cbf78a41 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Tue, 14 Apr 2015 15:54:02 +0000
> Subject: [PATCH 3/8] R600/SI: Replace TRI->getRegClass(Reg) with
> TRI->getPhysRegClass(Reg)
>
> TRI->getRegClass() takes a register class ID, not a register. We were
> using this incorrectly in a few places.
> ---
> lib/Target/R600/SIFixSGPRCopies.cpp | 13 ++++++++-----
> lib/Target/R600/SIFoldOperands.cpp | 4 ++--
> lib/Target/R600/SIRegisterInfo.cpp | 1 +
> 3 files changed, 11 insertions(+), 7 deletions(-)
>
> diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp
> index cd1b3ac..142b143 100644
> --- a/lib/Target/R600/SIFixSGPRCopies.cpp
> +++ b/lib/Target/R600/SIFixSGPRCopies.cpp
> @@ -140,7 +140,7 @@ const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses(
> const TargetRegisterClass *RC
> = TargetRegisterInfo::isVirtualRegister(Reg) ?
> MRI.getRegClass(Reg) :
> - TRI->getRegClass(Reg);
> + TRI->getPhysRegClass(Reg);
>
> RC = TRI->getSubRegClass(RC, SubReg);
> for (MachineRegisterInfo::use_instr_iterator
> @@ -183,10 +183,13 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
> unsigned SrcReg = Copy.getOperand(1).getReg();
> unsigned SrcSubReg = Copy.getOperand(1).getSubReg();
>
> - const TargetRegisterClass *DstRC
> - = TargetRegisterInfo::isVirtualRegister(DstReg) ?
> - MRI.getRegClass(DstReg) :
> - TRI->getRegClass(DstReg);
> + if (!TargetRegisterInfo::isVirtualRegister(DstReg)) {
> + // If the destination register is a physical register there isn't really
> + // much we can do to fix this.
> + return false;
> + }
> +
> + const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
>
> const TargetRegisterClass *SrcRC;
>
> diff --git a/lib/Target/R600/SIFoldOperands.cpp b/lib/Target/R600/SIFoldOperands.cpp
> index 7ba5a6d..d14e37a 100644
> --- a/lib/Target/R600/SIFoldOperands.cpp
> +++ b/lib/Target/R600/SIFoldOperands.cpp
> @@ -216,7 +216,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
> const TargetRegisterClass *UseRC
> = TargetRegisterInfo::isVirtualRegister(UseReg) ?
> MRI.getRegClass(UseReg) :
> - TRI.getRegClass(UseReg);
> + TRI.getPhysRegClass(UseReg);
>
> Imm = APInt(64, OpToFold.getImm());
>
> @@ -240,7 +240,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
> const TargetRegisterClass *DestRC
> = TargetRegisterInfo::isVirtualRegister(DestReg) ?
> MRI.getRegClass(DestReg) :
> - TRI.getRegClass(DestReg);
> + TRI.getPhysRegClass(DestReg);
>
> unsigned MovOp = TII->getMovOpcode(DestRC);
> if (MovOp == AMDGPU::COPY)
> diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
> index db2ff0b..62915ee 100644
> --- a/lib/Target/R600/SIRegisterInfo.cpp
> +++ b/lib/Target/R600/SIRegisterInfo.cpp
> @@ -339,6 +339,7 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
> assert(!TargetRegisterInfo::isVirtualRegister(Reg));
>
> static const TargetRegisterClass *BaseClasses[] = {
> + &AMDGPU::M0RegRegClass,
> &AMDGPU::VGPR_32RegClass,
> &AMDGPU::SReg_32RegClass,
> &AMDGPU::VReg_64RegClass,
> -- 2.0.4
>
> 0004-R600-SI-Remove-explicit-m0-operand-from-s_sendmsg.patch
>
>
> From 01f212d6402d2a6434676ff6f355a1fac6a437c3 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Tue, 14 Apr 2015 15:57:00 +0000
> Subject: [PATCH 4/8] R600/SI: Remove explicit m0 operand from s_sendmsg
>
> Instead add m0 as an implicit operand. This allows us to avoid using
> the M0Reg register class and eliminates a number of unnecessary spills
> when using s_sendmsg instructions. This impacts one shader in the
> shader-db:
>
> SGPRS: 48 -> 40 (-16.67 %)
> VGPRS: 112 -> 108 (-3.57 %)
> Code Size: 40132 -> 38796 (-3.33 %) bytes
> LDS: 0 -> 0 (0.00 %) blocks
> Scratch: 2048 -> 0 (-100.00 %) bytes per wave
> ---
> lib/Target/R600/AMDGPUISelLowering.cpp | 1 +
> lib/Target/R600/AMDGPUISelLowering.h | 1 +
> lib/Target/R600/AMDGPUInstrInfo.td | 4 ++++
> lib/Target/R600/SIISelLowering.cpp | 25 ++++++++++++++++++++++++-
> lib/Target/R600/SIISelLowering.h | 1 +
> lib/Target/R600/SIInstructions.td | 12 +++++-------
> 6 files changed, 36 insertions(+), 8 deletions(-)
LGTM
>
> 0005-R600-SI-Make-sendmsg-test-more-strict.patch
>
>
> From 70f1e79acf94cf7cbdf9347c045d5bff9802f451 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Tue, 14 Apr 2015 17:20:43 +0000
> Subject: [PATCH 5/8] R600/SI: Make sendmsg test more strict
>
> We want to make sure that the m0 copies are being cse'd.
> ---
> test/CodeGen/R600/llvm.SI.sendmsg.ll | 2 ++
> 1 file changed, 2 insertions(+)
>
LGTM
> 0006-R600-SI-Remove-explicit-m0-operand-from-v_interp-ins.patch
>
>
> From c69fa555b7076e40275fd2cc8655f8a56a12248a Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Tue, 14 Apr 2015 23:11:50 +0000
> Subject: [PATCH 6/8] R600/SI: Remove explicit m0 operand from v_interp
> instructions
>
> Instead add m0 as an implicit operand. This helps avoid spills
> of the m0 register in some cases.
> ---
> lib/Target/R600/AMDGPUISelLowering.cpp | 3 +++
> lib/Target/R600/AMDGPUISelLowering.h | 3 +++
> lib/Target/R600/AMDGPUInstrInfo.td | 12 +++++++++
> lib/Target/R600/SIISelLowering.cpp | 23 ++++++++++++++++-
> lib/Target/R600/SIInstrInfo.td | 4 +--
> lib/Target/R600/SIInstructions.td | 47 ++++++++++++----------------------
> 6 files changed, 59 insertions(+), 33 deletions(-)
>
LGTM
>
> 0007-R600-SI-Remove-explicit-m0-operand-from-DS-instructi.patch
>
>
> From b1f87d5505b06cc55d406e49bccb567512671f03 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Tue, 28 Apr 2015 12:42:21 +0000
> Subject: [PATCH 7/8] R600/SI: Remove explicit m0 operand from DS instructions
>
> Instead add m0 as an implicit operand. This helps avoid spills
> of the m0 register in some cases.
> ---
> lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 79 ++++++++++++++-----
> lib/Target/R600/AMDGPUInstructions.td | 37 +++++----
> lib/Target/R600/SIInstrFormats.td | 2 +-
> lib/Target/R600/SIInstrInfo.td | 125 +++++++++++++++++++++++++++----
> lib/Target/R600/SIInstructions.td | 96 ++++++++++++------------
> lib/Target/R600/SILoadStoreOptimizer.cpp | 38 +++++-----
> test/CodeGen/R600/ds_read2st64.ll | 2 +-
> test/CodeGen/R600/shl_add_ptr.ll | 2 +-
> 8 files changed, 261 insertions(+), 120 deletions(-)
>
LGTM
> +//===----------------------------------------------------------------------===//
> +// SDNodes and PatFrag for local loads and stores to enable s_mov_b32 m0, -1
> +// to be glued to the memory instructions.
> +//===----------------------------------------------------------------------===//
> +
> +def SIld_local : SDNode <"ISD::LOAD", SDTLoad,
> + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
> +>;
> +
> +def si_ld_local : PatFrag <(ops node:$ptr), (SIld_local node:$ptr), [{
> + return isLocalLoad(cast<LoadSDNode>(N));
> +}]>;
I think this should be simplified to
cast<LoadSDNode>(N)->getMemOperand().getAddrSpace() ==
AMDGPUAS::LOCAL_ADDRESS,
and the various is*Loads be removed. This is still leftover from AMDIL
and for some reason looks at the Value for the memoperand.
>
> From 9c68951c5c29fde832830b0bfd28e3097f1b8278 Mon Sep 17 00:00:00 2001
> From: Tom Stellard<thomas.stellard at amd.com>
> Date: Fri, 1 May 2015 13:21:31 +0000
> Subject: [PATCH 8/8] R600/SI: Remove M0Reg register class
>
> It is no longer used.
> ---
> lib/Target/R600/SIFixSGPRCopies.cpp | 1 -
> lib/Target/R600/SIRegisterInfo.cpp | 1 -
> lib/Target/R600/SIRegisterInfo.td | 3 +--
> test/CodeGen/R600/ds_read2st64.ll | 2 +-
> 4 files changed, 2 insertions(+), 5 deletions(-)
LGTM
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150504/3f313073/attachment.html>
More information about the llvm-commits
mailing list