[llvm] r244578 - [X86] When optimizing for minsize, use POP for small post-call stack clean-up

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 19 11:14:18 PDT 2015


Out of curiosity, what is the performance impact of this change?  I 
could see an argument for doing this even in an -O2 build since a) the 
frontend should entirely handle the pop on modern intels, and b) it's a 
space savings.  I can't predict the outcome, but it seems like it might 
be worth measuring.

Philip

On 08/11/2015 01:48 AM, Michael Kuperstein via llvm-commits wrote:
> Author: mkuper
> Date: Tue Aug 11 03:48:48 2015
> New Revision: 244578
>
> URL: http://llvm.org/viewvc/llvm-project?rev=244578&view=rev
> Log:
> [X86] When optimizing for minsize, use POP for small post-call stack clean-up
>
> When optimizing for size, replace "addl $4, %esp" and "addl $8, %esp"
> following a call by one or two pops, respectively. We don't try to do it in
> general, but only when the stack adjustment immediately follows a call - which
> is the most common case.
>
> That allows taking a short-cut when trying to find a free register to pop into,
> instead of a full-blown liveness check. If the adjustment immediately follows a
> call, then every register the call clobbers but doesn't define should be dead at
> that point, and can be used.
>
> Differential Revision: http://reviews.llvm.org/D11749
>
> Added:
>      llvm/trunk/test/CodeGen/X86/pop-stack-cleanup.ll
> Modified:
>      llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
>      llvm/trunk/lib/Target/X86/X86FrameLowering.h
>      llvm/trunk/test/CodeGen/X86/fold-push.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FrameLowering.cpp?rev=244578&r1=244577&r2=244578&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp Tue Aug 11 03:48:48 2015
> @@ -1851,6 +1851,69 @@ void X86FrameLowering::adjustForHiPEProl
>   #endif
>   }
>   
> +bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
> +    MachineBasicBlock::iterator MBBI, DebugLoc DL, int Offset) const {
> +
> +  if (Offset % SlotSize)
> +    return false;
> +
> +  int NumPops = Offset / SlotSize;
> +  // This is only worth it if we have at most 2 pops.
> +  if (NumPops != 1 && NumPops != 2)
> +    return false;
> +
> +  // Handle only the trivial case where the adjustment directly follows
> +  // a call. This is the most common one, anyway.
> +  if (MBBI == MBB.begin())
> +    return false;
> +  MachineBasicBlock::iterator Prev = std::prev(MBBI);
> +  if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
> +    return false;
> +
> +  unsigned Regs[2];
> +  unsigned FoundRegs = 0;
> +
> +  auto RegMask = Prev->getOperand(1);
> +
> +  // Try to find up to NumPops free registers.
> +  for (auto Candidate : X86::GR32_NOREX_NOSPRegClass) {
> +
> +    // Poor man's liveness:
> +    // Since we're immediately after a call, any register that is clobbered
> +    // by the call and not defined by it can be considered dead.
> +    if (!RegMask.clobbersPhysReg(Candidate))
> +      continue;
> +
> +    bool IsDef = false;
> +    for (const MachineOperand &MO : Prev->implicit_operands()) {
> +      if (MO.isReg() && MO.isDef() && MO.getReg() == Candidate) {
> +        IsDef = true;
> +        break;
> +      }
> +    }
> +
> +    if (IsDef)
> +      continue;
> +
> +    Regs[FoundRegs++] = Candidate;
> +    if (FoundRegs == (unsigned)NumPops)
> +      break;
> +  }
> +
> +  if (FoundRegs == 0)
> +    return false;
> +
> +  // If we found only one free register, but need two, reuse the same one twice.
> +  while (FoundRegs < (unsigned)NumPops)
> +    Regs[FoundRegs++] = Regs[0];
> +
> +  for (int i = 0; i < NumPops; ++i)
> +    BuildMI(MBB, MBBI, DL,
> +            TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]);
> +
> +  return true;
> +}
> +
>   void X86FrameLowering::
>   eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
>                                 MachineBasicBlock::iterator I) const {
> @@ -1882,8 +1945,12 @@ eliminateCallFramePseudoInstr(MachineFun
>       if (Amount) {
>         // Add Amount to SP to destroy a frame, and subtract to setup.
>         int Offset = isDestroy ? Amount : -Amount;
> -      BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
> +
> +      if (!(MF.getFunction()->optForMinSize() &&
> +            adjustStackWithPops(MBB, I, DL, Offset)))
> +        BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
>       }
> +
>       return;
>     }
>   
>
> Modified: llvm/trunk/lib/Target/X86/X86FrameLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FrameLowering.h?rev=244578&r1=244577&r2=244578&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86FrameLowering.h (original)
> +++ llvm/trunk/lib/Target/X86/X86FrameLowering.h Tue Aug 11 03:48:48 2015
> @@ -146,6 +146,11 @@ private:
>                             MachineBasicBlock::iterator MBBI, DebugLoc DL,
>                             uint64_t MaxAlign) const;
>   
> +  /// Make small positive stack adjustments using POPs.
> +  bool adjustStackWithPops(MachineBasicBlock &MBB,
> +                           MachineBasicBlock::iterator MBBI, DebugLoc DL,
> +                           int Offset) const;
> +
>     /// Adjusts the stack pointer using LEA, SUB, or ADD.
>     MachineInstrBuilder BuildStackAdjustment(MachineBasicBlock &MBB,
>                                              MachineBasicBlock::iterator MBBI,
>
> Modified: llvm/trunk/test/CodeGen/X86/fold-push.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-push.ll?rev=244578&r1=244577&r2=244578&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/fold-push.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/fold-push.ll Tue Aug 11 03:48:48 2015
> @@ -27,11 +27,11 @@ define void @test_min(i32 %a, i32 %b) mi
>   ; CHECK: movl [[EAX:%e..]], (%esp)
>   ; CHECK-NEXT: pushl [[EAX]]
>   ; CHECK-NEXT: calll
> -; CHECK-NEXT: addl $4, %esp
> +; CHECK-NEXT: popl
>   ; CHECK: nop
>   ; CHECK: pushl (%esp)
>   ; CHECK: calll
> -; CHECK-NEXT: addl $4, %esp
> +; CHECK-NEXT: popl
>     %c = add i32 %a, %b
>     call void @foo(i32 %c)
>     call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
>
> Added: llvm/trunk/test/CodeGen/X86/pop-stack-cleanup.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pop-stack-cleanup.ll?rev=244578&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/pop-stack-cleanup.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/pop-stack-cleanup.ll Tue Aug 11 03:48:48 2015
> @@ -0,0 +1,61 @@
> +; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=CHECK -check-prefix=NORMAL
> +
> +declare void @param1(i32 %a)
> +declare i32 @param2_ret(i32 %a, i32 %b)
> +declare i64 @param2_ret64(i32 %a, i32 %b)
> +declare void @param2(i32 %a, i32 %b)
> +declare void @param3(i32 %a, i32 %b, i32 %c)
> +
> +define void @test() minsize {
> +; CHECK-LABEL: test:
> +; CHECK: calll _param1
> +; CHECK-NEXT: popl %eax
> +; CHECK: calll _param2
> +; CHECK-NEXT: popl %eax
> +; CHECK-NEXT: popl %ecx
> +; CHECK: calll _param2_ret
> +; CHECK-NEXT: popl %ecx
> +; CHECK-NEXT: popl %edx
> +; CHECK-NEXT: pushl %eax
> +; CHECK: calll _param3
> +; CHECK-NEXT: addl $12, %esp
> +; CHECK: calll _param2_ret64
> +; CHECK-NEXT: popl %ecx
> +; CHECK-NEXT: popl %ecx
> +  call void @param1(i32 1)
> +  call void @param2(i32 1, i32 2)
> +  %ret = call i32 @param2_ret(i32 1, i32 2)
> +  call void @param3(i32 1, i32 2, i32 %ret)
> +  %ret64 = call i64 @param2_ret64(i32 1, i32 2)
> +  ret void
> +}
> +
> +define void @negative(i32 %k) {
> +; CHECK-LABEL: negative:
> +; CHECK: calll _param1
> +; CHECK-NEXT: addl $4, %esp
> +; CHECK: calll _param2
> +; CHECK-NEXT: addl $8, %esp
> +; CHECK: calll _param3
> +; CHECK-NEXT: movl %ebp, %esp
> +  %v = alloca i32, i32 %k
> +  call void @param1(i32 1)
> +  call void @param2(i32 1, i32 2)
> +  call void @param3(i32 1, i32 2, i32 3)
> +  ret void
> +}
> +
> +define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize {
> +; CHECK-LABEL: spill:
> +; CHECK-DAG: movl %ecx,
> +; CHECK-DAG: movl %edx,
> +; CHECK: calll _param2_ret
> +; CHECK-NEXT: popl %ecx
> +; CHECK-NEXT: popl %edx
> +; CHECK-DAG: movl {{.*}}, %ecx
> +; CHECK-DAG: movl {{.*}}, %edx
> +; CHECK: calll _spill
> +  %i = call i32 @param2_ret(i32 1, i32 2)
> +  call void @spill(i32 %a, i32 %b, i32 %c)
> +  ret void
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits



More information about the llvm-commits mailing list