[llvm] r179500 - Fix PPC64 CR spill location for callee-saved registers

Bill Schmidt wschmidt at linux.vnet.ibm.com
Mon Apr 15 07:42:43 PDT 2013


On Mon, 2013-04-15 at 02:07 +0000, Hal Finkel wrote:
> Author: hfinkel
> Date: Sun Apr 14 21:07:05 2013
> New Revision: 179500
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=179500&view=rev
> Log:
> Fix PPC64 CR spill location for callee-saved registers
> 
> This fixes an ABI bug for non-Darwin PPC64. For the callee-saved condition
> registers, the spill location is specified relative to the stack pointer (SP +
> 8). However, this is not relative to the SP after the new stack frame is
> established, but instead relative to the caller's stack pointer (it is stored
> into the linkage area of the parent's stack frame).
> 
> So, like with the link register, we don't directly spill the CRs with other
> callee-saved registers, but just mark them to be spilled during prologue
> generation.
> 
> In practice, this reverts r179457 for PPC64 (but leaves it in place for PPC32).
> 
> Modified:
>     llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp
>     llvm/trunk/lib/Target/PowerPC/PPCMachineFunctionInfo.h
>     llvm/trunk/test/CodeGen/PowerPC/crsave.ll
> 
> Modified: llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp?rev=179500&r1=179499&r2=179500&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp (original)
> +++ llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp Sun Apr 14 21:07:05 2013
> @@ -223,9 +223,7 @@ unsigned PPCFrameLowering::determineFram
> 
>    // If we are a leaf function, and use up to 224 bytes of stack space,
>    // don't have a frame pointer, calls, or dynamic alloca then we do not need
> -  // to adjust the stack pointer (we fit in the Red Zone).  For 64-bit
> -  // SVR4, we also require a stack frame if we need to spill the CR,
> -  // since this spill area is addressed relative to the stack pointer.
> +  // to adjust the stack pointer (we fit in the Red Zone).
>    // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
>    // stackless code if all local vars are reg-allocated.
>    bool DisableRedZone = MF.getFunction()->getAttributes().
> @@ -237,9 +235,6 @@ unsigned PPCFrameLowering::determineFram
>        FrameSize <= 224 &&                          // Fits in red zone.
>        !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
>        !MFI->adjustsStack() &&                      // No calls.
> -      !(Subtarget.isPPC64() &&                     // No 64-bit SVR4 CRsave.
> -	Subtarget.isSVR4ABI()
> -	&& spillsCR(MF)) &&

Hi Hal,

I don't understand why you removed this.  This constitutes a performance
regression.  We want to use the red zone if at all possible, and this
logic was added recently per a user request.  Please fix.  You can
either reinstate the call to FuncInfo->setSpillsCR() or check
MustSaveCRs.empty() here, right?

>        (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
>      // No need for frame
>      if (UpdateMF)
> @@ -373,6 +368,7 @@ void PPCFrameLowering::emitPrologue(Mach
>    // Check if the link register (LR) must be saved.
>    PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
>    bool MustSaveLR = FI->mustSaveLR();
> +  const SmallVector<unsigned, 3> &MustSaveCRs = FI->getMustSaveCRs();
>    // Do we have a frame pointer for this function?
>    bool HasFP = hasFP(MF);
> 
> @@ -394,6 +390,13 @@ void PPCFrameLowering::emitPrologue(Mach
>      if (MustSaveLR)
>        BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
> 
> +    if (!MustSaveCRs.empty()) {
> +      MachineInstrBuilder MIB =
> +        BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), PPC::X12);
> +      for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
> +        MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill);
> +    }
> +
>      if (HasFP)
>        BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
>          .addReg(PPC::X31)
> @@ -405,6 +408,12 @@ void PPCFrameLowering::emitPrologue(Mach
>          .addReg(PPC::X0)
>          .addImm(LROffset / 4)
>          .addReg(PPC::X1);
> +
> +    if (!MustSaveCRs.empty())
> +      BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
> +        .addReg(PPC::X12, getKillRegState(true))
> +        .addImm(8)
> +        .addReg(PPC::X1);
>    } else {
>      if (MustSaveLR)
>        BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
> @@ -417,6 +426,9 @@ void PPCFrameLowering::emitPrologue(Mach
>          .addImm(FPOffset)
>          .addReg(PPC::R1);
> 
> +    assert(MustSaveCRs.empty() &&
> +           "Prologue CR saving supported only in 64-bit mode");
> +
>      if (MustSaveLR)
>        BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
>          .addReg(PPC::R0)
> @@ -580,7 +592,7 @@ void PPCFrameLowering::emitPrologue(Mach
>        // spilled CRs.
>        if (Subtarget.isSVR4ABI()
>  	  && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
> -	  && !spillsCR(MF))
> +	  && MustSaveCRs.empty())
>  	continue;
> 
>        // For 64-bit SVR4 when we have spilled CRs, the spill location
> @@ -636,6 +648,7 @@ void PPCFrameLowering::emitEpilogue(Mach
>    // Check if the link register (LR) has been saved.
>    PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
>    bool MustSaveLR = FI->mustSaveLR();
> +  const SmallVector<unsigned, 3> &MustSaveCRs = FI->getMustSaveCRs();
>    // Do we have a frame pointer for this function?
>    bool HasFP = hasFP(MF);
> 
> @@ -736,10 +749,19 @@ void PPCFrameLowering::emitEpilogue(Mach
>        BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
>          .addImm(LROffset/4).addReg(PPC::X1);
> 
> +    if (!MustSaveCRs.empty())
> +      BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), PPC::X12)
> +        .addImm(8).addReg(PPC::X1);
> +
>      if (HasFP)
>        BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
>          .addImm(FPOffset/4).addReg(PPC::X1);
> 
> +    if (!MustSaveCRs.empty())
> +      for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
> +        BuildMI(MBB, MBBI, dl, TII.get(PPC::MTCRF8), MustSaveCRs[i])
> +          .addReg(PPC::X12, getKillRegState(i == e-1));
> +
>      if (MustSaveLR)
>        BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0);
>    } else {
> @@ -747,6 +769,9 @@ void PPCFrameLowering::emitEpilogue(Mach
>        BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0)
>            .addImm(LROffset).addReg(PPC::R1);
> 
> +    assert(MustSaveCRs.empty() &&
> +           "Epilogue CR restoring supported only in 64-bit mode");
> +
>      if (HasFP)
>        BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
>            .addImm(FPOffset).addReg(PPC::R1);
> @@ -1139,23 +1164,13 @@ PPCFrameLowering::spillCalleeSavedRegist
> 
>      // Insert the spill to the stack frame.
>      if (IsCRField) {
> -      CRSpilled = true;
> -      // The first time we see a CR field, store the whole CR into the
> -      // save slot via GPR12 (available in the prolog for 32- and 64-bit).
> +      PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
>        if (Subtarget.isPPC64()) {
> -	// 64-bit:  SP+8
> -        bool is31 = needsFP(*MF);
> -        unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
> -        CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR8), PPC::X12)
> -                  .addReg(Reg, RegState::ImplicitKill);
> -
> -	MBB.insert(MI, CRMIB);
> -	MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW8))
> -			       .addReg(PPC::X12,
> -				       getKillRegState(true))
> -			       .addImm(8)
> -			       .addReg(FP8Reg));
> +        // The actual spill will happen at the start of the prologue.
> +        FuncInfo->addMustSaveCR(Reg);
>        } else {
> +        CRSpilled = true;
> +
>  	// 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
>  	// the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
>  	CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
> @@ -1167,10 +1182,6 @@ PPCFrameLowering::spillCalleeSavedRegist
>  						 getKillRegState(true)),
>  					 CSI[i].getFrameIdx()));
>        }
> -      
> -      // Record that we spill the CR in this function.
> -      PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
> -      FuncInfo->setSpillsCR();

By removing this last call, have you changed behavior anywhere else?
The value of this field is checked several times in
PPCFrameLowering.cpp, for instance.  Was this call redundant with the
calls in PPCInstrInfo.cpp?  Is the field itself redundant with
MustSaveCRs, and should that be checked instead in these places?  I
haven't tried to sort through it; I just want to be sure this has been
thought through.

Thanks,
Bill

>      } else {
>        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
>        TII.storeRegToStackSlot(MBB, MI, Reg, true,
> @@ -1192,15 +1203,10 @@ restoreCRs(bool isPPC64, bool is31,
>    DebugLoc DL;
>    unsigned RestoreOp, MoveReg;
> 
> -  if (isPPC64) {
> -    // 64-bit:  SP+8
> -    unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
> -    MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ8), PPC::X12)
> -	       .addImm(8)
> -	       .addReg(FP8Reg));
> -    RestoreOp = PPC::MTCRF8;
> -    MoveReg = PPC::X12;
> -  } else {
> +  if (isPPC64)
> +    // This is handled during epilogue generation.
> +    return;
> +  else {
>      // 32-bit:  FP-relative
>      MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
>  					     PPC::R12),
> 
> Modified: llvm/trunk/lib/Target/PowerPC/PPCMachineFunctionInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCMachineFunctionInfo.h?rev=179500&r1=179499&r2=179500&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/PowerPC/PPCMachineFunctionInfo.h (original)
> +++ llvm/trunk/lib/Target/PowerPC/PPCMachineFunctionInfo.h Sun Apr 14 21:07:05 2013
> @@ -84,6 +84,11 @@ class PPCFunctionInfo : public MachineFu
>    /// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4.
>    int CRSpillFrameIndex;
> 
> +  /// If any of CR[2-4] need to be saved in the prologue and restored in the
> +  /// epilogue then they are added to this array. This is used for the
> +  /// 64-bit SVR4 ABI.
> +  SmallVector<unsigned, 3> MustSaveCRs;
> +
>  public:
>    explicit PPCFunctionInfo(MachineFunction &MF) 
>      : FramePointerSaveIndex(0),
> @@ -154,6 +159,10 @@ public:
> 
>    int getCRSpillFrameIndex() const { return CRSpillFrameIndex; }
>    void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; }
> +
> +  const SmallVector<unsigned, 3> &
> +    getMustSaveCRs() const { return MustSaveCRs; }
> +  void addMustSaveCR(unsigned Reg) { MustSaveCRs.push_back(Reg); }
>  };
> 
>  } // end of namespace llvm
> 
> Modified: llvm/trunk/test/CodeGen/PowerPC/crsave.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/crsave.ll?rev=179500&r1=179499&r2=179500&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/crsave.ll (original)
> +++ llvm/trunk/test/CodeGen/PowerPC/crsave.ll Sun Apr 14 21:07:05 2013
> @@ -1,5 +1,4 @@
>  ; RUN: llc -O0 -disable-fp-elim -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC32
> -; RUN: llc -O0 -disable-fp-elim -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC64-FP
>  ; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC64
> 
>  declare void @foo()
> @@ -20,14 +19,11 @@ entry:
>  ; PPC32-NEXT: mtcrf 32, 12
> 
>  ; PPC64: mfcr 12
> -; PPC64-NEXT: stw 12, 8(1)
> +; PPC64: stw 12, 8(1)
> +; PPC64: stdu 1, -[[AMT:[0-9]+]](1)
> +; PPC64: addi 1, 1, [[AMT]]
>  ; PPC64: lwz 12, 8(1)
> -; PPC64-NEXT: mtcrf 32, 12
> -
> -; PPC64-FP: mfcr 12
> -; PPC64-FP-NEXT: stw 12, 8(31)
> -; PPC64-FP: lwz 12, 8(31)
> -; PPC64-FP-NEXT: mtcrf 32, 12
> +; PPC64: mtcrf 32, 12
> 
>  define i32 @test_cr234() nounwind {
>  entry:
> @@ -47,16 +43,11 @@ entry:
>  ; PPC32-NEXT: mtcrf 8, 12
> 
>  ; PPC64: mfcr 12
> -; PPC64-NEXT: stw 12, 8(1)
> +; PPC64: stw 12, 8(1)
> +; PPC64: stdu 1, -[[AMT:[0-9]+]](1)
> +; PPC64: addi 1, 1, [[AMT]]
>  ; PPC64: lwz 12, 8(1)
> -; PPC64-NEXT: mtcrf 32, 12
> -; PPC64-NEXT: mtcrf 16, 12
> -; PPC64-NEXT: mtcrf 8, 12
> -
> -; PPC64-FP: mfcr 12
> -; PPC64-FP-NEXT: stw 12, 8(31)
> -; PPC64-FP: lwz 12, 8(31)
> -; PPC64-FP-NEXT: mtcrf 32, 12
> -; PPC64-FP-NEXT: mtcrf 16, 12
> -; PPC64-FP-NEXT: mtcrf 8, 12
> +; PPC64: mtcrf 32, 12
> +; PPC64: mtcrf 16, 12
> +; PPC64: mtcrf 8, 12
> 
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> 




More information about the llvm-commits mailing list