[llvm] r266774 - Preliminary changes for fixing PR27241. Generalized/restructured some things
David L Kreitzer via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 19 10:43:45 PDT 2016
Author: dlkreitz
Date: Tue Apr 19 12:43:44 2016
New Revision: 266774
URL: http://llvm.org/viewvc/llvm-project?rev=266774&view=rev
Log:
Preliminary changes for fixing PR27241. Generalized/restructured some things
in preparation for enabling the outgoing parameter store-to-push optimization
for 64-bit targets.
Differential Revision: http://reviews.llvm.org/D19222
Modified:
llvm/trunk/lib/Target/X86/X86CallFrameOptimization.cpp
Modified: llvm/trunk/lib/Target/X86/X86CallFrameOptimization.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86CallFrameOptimization.cpp?rev=266774&r1=266773&r2=266774&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86CallFrameOptimization.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86CallFrameOptimization.cpp Tue Apr 19 12:43:44 2016
@@ -10,7 +10,7 @@
// This file defines a pass that optimizes call sequences on x86.
// Currently, it converts movs of function parameters onto the stack into
// pushes. This is beneficial for two main reasons:
-// 1) The push instruction encoding is much smaller than an esp-relative mov
+// 1) The push instruction encoding is much smaller than a stack-ptr-based mov.
// 2) It is possible to push memory arguments directly. So, if the
// the transformation is performed pre-reg-alloc, it can help relieve
// register pressure.
@@ -106,6 +106,8 @@ private:
const X86FrameLowering *TFL;
const X86Subtarget *STI;
const MachineRegisterInfo *MRI;
+ unsigned SlotSize;
+ unsigned Log2SlotSize;
static char ID;
};
@@ -207,7 +209,7 @@ bool X86CallFrameOptimization::isProfita
Advantage -= 3;
// Now, for each push, we save ~3 bytes. For small constants, we actually,
// save more (up to 5 bytes), but 3 should be a good approximation.
- Advantage += (CC.ExpectedDist / 4) * 3;
+ Advantage += (CC.ExpectedDist >> Log2SlotSize) * 3;
}
}
@@ -220,6 +222,12 @@ bool X86CallFrameOptimization::runOnMach
TFL = STI->getFrameLowering();
MRI = &MF.getRegInfo();
+ const X86RegisterInfo &RegInfo =
+ *static_cast<const X86RegisterInfo *>(STI->getRegisterInfo());
+ SlotSize = RegInfo.getSlotSize();
+ assert(isPowerOf2_32(SlotSize) && "Expect power of 2 stack slot size");
+ Log2SlotSize = Log2_32(SlotSize);
+
if (!isLegal(MF))
return false;
@@ -322,7 +330,8 @@ void X86CallFrameOptimization::collectCa
// How much do we adjust the stack? This puts an upper bound on
// the number of parameters actually passed on it.
- unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4;
+ unsigned int MaxAdjust =
+ FrameSetup->getOperand(0).getImm() >> Log2SlotSize;
// A zero adjustment means no stack parameters
if (!MaxAdjust) {
@@ -347,8 +356,8 @@ void X86CallFrameOptimization::collectCa
unsigned StackPtr = Context.SPCopy->getOperand(0).getReg();
// Scan the call setup sequence for the pattern we're looking for.
- // We only handle a simple case - a sequence of MOV32mi or MOV32mr
- // instructions, that push a sequence of 32-bit values onto the stack, with
+ // We only handle a simple case - a sequence of store instructions that
+ // push a sequence of stack-slot-aligned values onto the stack, with
// no gaps between them.
if (MaxAdjust > 4)
Context.MovVector.resize(MaxAdjust, nullptr);
@@ -363,9 +372,9 @@ void X86CallFrameOptimization::collectCa
continue;
}
- // We know the instruction is a MOV32mi/MOV32mr.
+ // We know the instruction has a supported store opcode.
// We only want movs of the form:
- // movl imm/r32, k(%esp)
+ // mov imm/reg, k(%StackPtr)
// If we run into something else, bail.
// Note that AddrBaseReg may, counter to its name, not be a register,
// but rather a frame index.
@@ -386,9 +395,9 @@ void X86CallFrameOptimization::collectCa
"Negative stack displacement when passing parameters");
// We really don't want to consider the unaligned case.
- if (StackDisp % 4)
+ if (StackDisp & (SlotSize - 1))
return;
- StackDisp /= 4;
+ StackDisp >>= Log2SlotSize;
assert((size_t)StackDisp < Context.MovVector.size() &&
"Function call has more parameters than the stack is adjusted for.");
@@ -419,9 +428,9 @@ void X86CallFrameOptimization::collectCa
return;
// Now, go through the vector, and see that we don't have any gaps,
- // but only a series of 32-bit MOVs.
+ // but only a series of MOVs.
auto MMI = Context.MovVector.begin(), MME = Context.MovVector.end();
- for (; MMI != MME; ++MMI, Context.ExpectedDist += 4)
+ for (; MMI != MME; ++MMI, Context.ExpectedDist += SlotSize)
if (*MMI == nullptr)
break;
@@ -451,12 +460,16 @@ bool X86CallFrameOptimization::adjustCal
// Now, iterate through the vector in reverse order, and replace the movs
// with pushes. MOVmi/MOVmr doesn't have any defs, so no need to
// replace uses.
- for (int Idx = (Context.ExpectedDist / 4) - 1; Idx >= 0; --Idx) {
+ for (int Idx = (Context.ExpectedDist >> Log2SlotSize) - 1; Idx >= 0; --Idx) {
MachineBasicBlock::iterator MOV = *Context.MovVector[Idx];
MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
MachineBasicBlock::iterator Push = nullptr;
- if (MOV->getOpcode() == X86::MOV32mi) {
- unsigned PushOpcode = X86::PUSHi32;
+ unsigned PushOpcode;
+ switch (MOV->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected Opcode!");
+ case X86::MOV32mi:
+ PushOpcode = X86::PUSHi32;
// If the operand is a small (8-bit) immediate, we can use a
// PUSH instruction with a shorter encoding.
// Note that isImm() may fail even though this is a MOVmi, because
@@ -468,7 +481,8 @@ bool X86CallFrameOptimization::adjustCal
}
Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
.addOperand(PushOp);
- } else {
+ break;
+ case X86::MOV32mr:
unsigned int Reg = PushOp.getReg();
// If PUSHrmm is not slow on this target, try to fold the source of the
@@ -479,7 +493,8 @@ bool X86CallFrameOptimization::adjustCal
// conservative about that.
MachineInstr *DefMov = nullptr;
if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
- Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm));
+ PushOpcode = X86::PUSH32rmm;
+ Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode));
unsigned NumOps = DefMov->getDesc().getNumOperands();
for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
@@ -487,18 +502,21 @@ bool X86CallFrameOptimization::adjustCal
DefMov->eraseFromParent();
} else {
- Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r))
+ PushOpcode = X86::PUSH32r;
+ Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
.addReg(Reg)
.getInstr();
}
+ break;
}
// For debugging, when using SP-based CFA, we need to adjust the CFA
// offset after each push.
// TODO: This is needed only if we require precise CFA.
if (!TFL->hasFP(MF))
- TFL->BuildCFI(MBB, std::next(Push), DL,
- MCCFIInstruction::createAdjustCfaOffset(nullptr, 4));
+ TFL->BuildCFI(
+ MBB, std::next(Push), DL,
+ MCCFIInstruction::createAdjustCfaOffset(nullptr, SlotSize));
MBB.erase(MOV);
}
More information about the llvm-commits
mailing list