[PATCH] R600/SI: Remove the M0Reg register class
Tom Stellard
tom at stellard.net
Thu Jun 12 08:08:01 PDT 2014
On Thu, Jun 12, 2014 at 08:27:47AM +0200, Christian König wrote:
> Am 11.06.2014 22:56, schrieb Tom Stellard:
> > This class only contined one register, M0, and was used to ensure
> > some operands always read from the M0 register. With the
> > MachineScheduler enabled we sometimes run into a situation where we
> > have to spill a M0Reg, which is not ideal, so instead of using this
> > special class, we use a custom inserter to force operands to M0.
>
> Mhm, I'm wondering why this should be better than spilling M0?
>
> It sounds to me that we now always insert a move from a SGPR to M0
> before the instruction which uses M0, while in the other case we only
> insert moves to spill M0 in some unusual cases.
>
The problem is code like this:
S_MOV_B32 %vreg0, 0 ; %vreg0 = M0RegClass
%vreg0 use
%vreg0 use
S_MOV_B32 %vreg1, 1 ; %vreg1 = M0RegClass
%vreg1 use
%vreg1 use
The scheduler will reorder the sequence like this, which requires a
spill:
S_MOV_B32 %vreg0, 0 ; %vreg0 = M0RegClass
S_MOV_B32 %vreg1, 1 ; %vreg1 = M0RegClass
%vreg0 use
%vreg0 use
%vreg1 use
%vreg1 use
In this case the spill is completely unnecessary, which is what I'm
trying to avoid.
-Tom
> Thanks,
> Christian.
>
> > ---
> > lib/Target/R600/SIFixSGPRCopies.cpp | 1 -
> > lib/Target/R600/SIISelLowering.cpp | 17 +++++++++++++++++
> > lib/Target/R600/SIInstructions.td | 34 +++++++++++++++++++++++-----------
> > lib/Target/R600/SIRegisterInfo.td | 5 ++---
> > 4 files changed, 42 insertions(+), 15 deletions(-)
> >
> > diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp
> > index 5f71453..782f651 100644
> > --- a/lib/Target/R600/SIFixSGPRCopies.cpp
> > +++ b/lib/Target/R600/SIFixSGPRCopies.cpp
> > @@ -185,7 +185,6 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
> > const TargetRegisterClass *SrcRC;
> >
> > if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
> > - DstRC == &AMDGPU::M0RegRegClass ||
> > MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass)
> > return false;
> >
> > diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> > index 608aad2..01ce64e 100644
> > --- a/lib/Target/R600/SIISelLowering.cpp
> > +++ b/lib/Target/R600/SIISelLowering.cpp
> > @@ -545,6 +545,23 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
> > .addImm(1) // CLAMP
> > .addImm(0); // OMOD
> > MI->eraseFromParent();
> > + break;
> > + }
> > + case AMDGPU::SI_MOV_M0: {
> > + BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
> > + .addOperand(MI->getOperand(1));
> > +
> > + unsigned DstReg = MI->getOperand(0).getReg();
> > + for (MachineOperand &MO : MRI.use_operands(DstReg)) {
> > + // Clear the kill flag from each use. Since we are potentailly inserting
> > + // M0 uses into more than one instruction, then we need to remove the
> > + // kill flag otherwise all uses after the first will be considered dead,
> > + // and the machine verifier will complain.
> > + MO.setIsKill(false);
> > + }
> > + MRI.replaceRegWith(MI->getOperand(0).getReg(), AMDGPU::M0);
> > + MI->eraseFromParent();
> > + break;
> > }
> > }
> > return BB;
> > diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> > index 475f28e..8080d5b 100644
> > --- a/lib/Target/R600/SIInstructions.td
> > +++ b/lib/Target/R600/SIInstructions.td
> > @@ -434,11 +434,12 @@ def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",
> > //def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
> >
> > let Uses = [EXEC] in {
> > - def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16",
> > - [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)]
> > +
> > +def S_SENDMSG : SOPP <
> > + 0x00000010, (ins SendMsgImm:$simm16, SReg_32:$m0), "S_SENDMSG $simm16", []
> > > {
> > - let DisableEncoding = "$m0";
> > - }
> > + let DisableEncoding = "$m0";
> > +}
> > } // End Uses = [EXEC]
> >
> > //def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
> > @@ -1064,7 +1065,7 @@ defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>;
> > def V_INTERP_P1_F32 : VINTRP <
> > 0x00000000,
> > (outs VReg_32:$dst),
> > - (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
> > + (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, SReg_32:$m0),
> > "V_INTERP_P1_F32 $dst, $i, $attr_chan, $attr, [$m0]",
> > []> {
> > let DisableEncoding = "$m0";
> > @@ -1073,7 +1074,7 @@ def V_INTERP_P1_F32 : VINTRP <
> > def V_INTERP_P2_F32 : VINTRP <
> > 0x00000001,
> > (outs VReg_32:$dst),
> > - (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
> > + (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, SReg_32:$m0),
> > "V_INTERP_P2_F32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]",
> > []> {
> >
> > @@ -1085,7 +1086,7 @@ def V_INTERP_P2_F32 : VINTRP <
> > def V_INTERP_MOV_F32 : VINTRP <
> > 0x00000002,
> > (outs VReg_32:$dst),
> > - (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
> > + (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, SReg_32:$m0),
> > "V_INTERP_MOV_F32 $dst, $src0, $attr_chan, $attr, [$m0]",
> > []> {
> > let DisableEncoding = "$m0";
> > @@ -1520,6 +1521,9 @@ def V_SUB_F64 : InstSI <
> > []
> > >;
> >
> > +// Store a value in the M0 register
> > +def SI_MOV_M0 : InstSI <(outs SReg_32:$dst), (ins SReg_32:$src0), "", []>;
> > +
> > } // end usesCustomInserter
> >
> > multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
> > @@ -1635,6 +1639,13 @@ def : Pat <
> > >;
> >
> > //===----------------------------------------------------------------------===//
> > +// SOPP Patterns
> > +//===----------------------------------------------------------------------===//
> > +def : Pat <
> > + (int_SI_sendmsg imm:$simm16, i32:$m0),
> > + (S_SENDMSG imm:$simm16, (SI_MOV_M0 $m0))
> > +>;
> > +//===----------------------------------------------------------------------===//
> > // VOP2 Patterns
> > //===----------------------------------------------------------------------===//
> >
> > @@ -1987,15 +1998,16 @@ def : Pat <
> >
> > def : Pat <
> > (int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params),
> > - (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, $params)
> > + (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, (SI_MOV_M0 $params))
> > >;
> >
> > def : Pat <
> > - (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, v2i32:$ij),
> > + (int_SI_fs_interp imm:$attr_chan, imm:$attr, i32:$params, v2i32:$ij),
> > (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0),
> > - imm:$attr_chan, imm:$attr, i32:$params),
> > + imm:$attr_chan, imm:$attr,
> > + (SI_MOV_M0 $params)),
> > (EXTRACT_SUBREG $ij, sub1),
> > - imm:$attr_chan, imm:$attr, $params)
> > + imm:$attr_chan, imm:$attr, (SI_MOV_M0 $params))
> > >;
> >
> > /********** ================== **********/
> > diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
> > index f1f01de..d6fb960 100644
> > --- a/lib/Target/R600/SIRegisterInfo.td
> > +++ b/lib/Target/R600/SIRegisterInfo.td
> > @@ -151,15 +151,14 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
> > // Register classes used as source and destination
> > //===----------------------------------------------------------------------===//
> >
> > -// Special register classes for predicates and the M0 register
> > +// Special register classes for predicates.
> > def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)>;
> > def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>;
> > def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
> > -def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
> >
> > // Register class for all scalar registers (SGPRs + Special Registers)
> > def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
> > - (add SGPR_32, M0Reg, VCC_LO)
> > + (add SGPR_32, M0, VCC_LO)
> > >;
> >
> > def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64], 64, (add SGPR_64Regs)>;
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list