<div dir="ltr">This seems to be causing some miscompiles. Working on a test case.</div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Mar 2, 2017 at 6:39 PM, Nemanja Ivanovic via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: nemanjai<br>
Date: Thu Mar 2 11:38:59 2017<br>
New Revision: 296771<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=296771&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=296771&view=rev</a><br>
Log:<br>
[PowerPC][ELFv2ABI] Allocate parameter area on-demand to reduce stack frame size<br>
<br>
This patch reduces the stack frame size by not allocating the parameter area if<br>
it is not required. In the current implementation LowerFormalArguments_64SVR4<br>
already handles the parameter area, but LowerCall_64SVR4 does not<br>
(when calculating the stack frame size). What this patch does is make<br>
LowerCall_64SVR4 consistent with LowerFormalArguments_64SVR4.<br>
<br>
Committing on behalf of Hiroshi Inoue.<br>
<br>
Differential Revision: <a href="https://reviews.llvm.org/D29881" rel="noreferrer" target="_blank">https://reviews.llvm.org/<wbr>D29881</a><br>
<br>
Added:<br>
llvm/trunk/test/CodeGen/<wbr>PowerPC/stacksize.ll<br>
Modified:<br>
llvm/trunk/lib/Target/PowerPC/<wbr>PPCISelLowering.cpp<br>
<br>
Modified: llvm/trunk/lib/Target/PowerPC/<wbr>PPCISelLowering.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=296771&r1=296770&r2=296771&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/Target/<wbr>PowerPC/PPCISelLowering.cpp?<wbr>rev=296771&r1=296770&r2=<wbr>296771&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/lib/Target/PowerPC/<wbr>PPCISelLowering.cpp (original)<br>
+++ llvm/trunk/lib/Target/PowerPC/<wbr>PPCISelLowering.cpp Thu Mar 2 11:38:59 2017<br>
@@ -5147,10 +5147,30 @@ SDValue PPCTargetLowering::LowerCall_<wbr>64S<br>
};<br>
<br>
const unsigned NumGPRs = array_lengthof(GPR);<br>
- const unsigned NumFPRs = 13;<br>
+ const unsigned NumFPRs = useSoftFloat() ? 0 : 13;<br>
const unsigned NumVRs = array_lengthof(VR);<br>
const unsigned NumQFPRs = NumFPRs;<br>
<br>
+ // On ELFv2, we can avoid allocating the parameter area if all the arguments<br>
+ // can be passed to the callee in registers.<br>
+ // For the fast calling convention, there is another check below.<br>
+ // Note: keep consistent with LowerFormalArguments_64SVR4()<br>
+ bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast;<br>
+ if (!HasParameterArea) {<br>
+ unsigned ParamAreaSize = NumGPRs * PtrByteSize;<br>
+ unsigned AvailableFPRs = NumFPRs;<br>
+ unsigned AvailableVRs = NumVRs;<br>
+ unsigned NumBytesTmp = NumBytes;<br>
+ for (unsigned i = 0; i != NumOps; ++i) {<br>
+ if (Outs[i].Flags.isNest()) continue;<br>
+ if (CalculateStackSlotUsed(Outs[<wbr>i].VT, Outs[i].ArgVT, Outs[i].Flags,<br>
+ PtrByteSize, LinkageSize, ParamAreaSize,<br>
+ NumBytesTmp, AvailableFPRs, AvailableVRs,<br>
+ Subtarget.hasQPX()))<br>
+ HasParameterArea = true;<br>
+ }<br>
+ }<br>
+<br>
// When using the fast calling convention, we don't provide backing for<br>
// arguments that will be in registers.<br>
unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;<br>
@@ -5218,13 +5238,18 @@ SDValue PPCTargetLowering::LowerCall_<wbr>64S<br>
<br>
unsigned NumBytesActuallyUsed = NumBytes;<br>
<br>
- // The prolog code of the callee may store up to 8 GPR argument registers to<br>
+ // In the old ELFv1 ABI,<br>
+ // the prolog code of the callee may store up to 8 GPR argument registers to<br>
// the stack, allowing va_start to index over them in memory if its varargs.<br>
// Because we cannot tell if this is needed on the caller side, we have to<br>
// conservatively assume that it is needed. As such, make sure we have at<br>
// least enough stack space for the caller to store the 8 GPRs.<br>
- // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.<br>
- NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);<br>
+ // In the ELFv2 ABI, we allocate the parameter area iff a callee<br>
+ // really requires memory operands, e.g. a vararg function.<br>
+ if (HasParameterArea)<br>
+ NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);<br>
+ else<br>
+ NumBytes = LinkageSize;<br>
<br>
// Tail call needs the stack to be aligned.<br>
if (getTargetMachine().Options.<wbr>GuaranteedTailCallOpt &&<br>
@@ -5443,6 +5468,8 @@ SDValue PPCTargetLowering::LowerCall_<wbr>64S<br>
if (CallConv == CallingConv::Fast)<br>
ComputePtrOff();<br>
<br>
+ assert(HasParameterArea &&<br>
+ "Parameter area must exist to pass an argument in memory.");<br>
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,<br>
true, isTailCall, false, MemOpChains,<br>
TailCallArguments, dl);<br>
@@ -5528,6 +5555,8 @@ SDValue PPCTargetLowering::LowerCall_<wbr>64S<br>
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);<br>
}<br>
<br>
+ assert(HasParameterArea &&<br>
+ "Parameter area must exist to pass an argument in memory.");<br>
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,<br>
true, isTailCall, false, MemOpChains,<br>
TailCallArguments, dl);<br>
@@ -5562,6 +5591,8 @@ SDValue PPCTargetLowering::LowerCall_<wbr>64S<br>
// GPRs when within range. For now, we always put the value in both<br>
// locations (or even all three).<br>
if (isVarArg) {<br>
+ assert(HasParameterArea &&<br>
+ "Parameter area must exist if we have a varargs call.");<br>
// We could elide this store in the case where the object fits<br>
// entirely in R registers. Maybe later.<br>
SDValue Store =<br>
@@ -5594,6 +5625,8 @@ SDValue PPCTargetLowering::LowerCall_<wbr>64S<br>
if (CallConv == CallingConv::Fast)<br>
ComputePtrOff();<br>
<br>
+ assert(HasParameterArea &&<br>
+ "Parameter area must exist to pass an argument in memory.");<br>
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,<br>
true, isTailCall, true, MemOpChains,<br>
TailCallArguments, dl);<br>
@@ -5614,6 +5647,8 @@ SDValue PPCTargetLowering::LowerCall_<wbr>64S<br>
case MVT::v4i1: {<br>
bool IsF32 = Arg.getValueType().<wbr>getSimpleVT().SimpleTy == MVT::v4f32;<br>
if (isVarArg) {<br>
+ assert(HasParameterArea &&<br>
+ "Parameter area must exist if we have a varargs call.");<br>
// We could elide this store in the case where the object fits<br>
// entirely in R registers. Maybe later.<br>
SDValue Store =<br>
@@ -5646,6 +5681,8 @@ SDValue PPCTargetLowering::LowerCall_<wbr>64S<br>
if (CallConv == CallingConv::Fast)<br>
ComputePtrOff();<br>
<br>
+ assert(HasParameterArea &&<br>
+ "Parameter area must exist to pass an argument in memory.");<br>
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,<br>
true, isTailCall, true, MemOpChains,<br>
TailCallArguments, dl);<br>
@@ -5660,7 +5697,8 @@ SDValue PPCTargetLowering::LowerCall_<wbr>64S<br>
}<br>
}<br>
<br>
- assert(NumBytesActuallyUsed == ArgOffset);<br>
+ assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&<br>
+ "mismatch in size of parameter area");<br>
(void)NumBytesActuallyUsed;<br>
<br>
if (!MemOpChains.empty())<br>
<br>
Added: llvm/trunk/test/CodeGen/<wbr>PowerPC/stacksize.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/stacksize.ll?rev=296771&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>CodeGen/PowerPC/stacksize.ll?<wbr>rev=296771&view=auto</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/CodeGen/<wbr>PowerPC/stacksize.ll (added)<br>
+++ llvm/trunk/test/CodeGen/<wbr>PowerPC/stacksize.ll Thu Mar 2 11:38:59 2017<br>
@@ -0,0 +1,86 @@<br>
+; For ELFv2 ABI, we can avoid allocating the parameter area in the stack frame of the caller function<br>
+; if all the arguments can be passed to the callee in registers.<br>
+; For ELFv1 ABI, we always need to allocate the parameter area.<br>
+<br>
+; Tests for ELFv2 ABI<br>
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-<wbr>linux-gnu -target-abi elfv2 < %s | FileCheck %s -check-prefix=PPC64-ELFV2<br>
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-<wbr>linux-gnu -target-abi elfv2 < %s | FileCheck %s -check-prefix=PPC64-ELFV2<br>
+<br>
+; Tests for ELFv1 ABI<br>
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-<wbr>linux-gnu -target-abi elfv1 < %s | FileCheck %s -check-prefix=PPC64-ELFV1<br>
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-<wbr>linux-gnu -target-abi elfv1 < %s | FileCheck %s -check-prefix=PPC64-ELFV1<br>
+<br>
+; If the callee has at most eight integer args, parameter area can be ommited for ELFv2 ABI.<br>
+<br>
+; PPC64-ELFV2-LABEL: WithoutParamArea1:<br>
+; PPC64-ELFV2-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)<br>
+; PPC64-ELFV2: stdu 1, -32(1)<br>
+; PPC64-ELFV2: addi 1, 1, 32<br>
+; PPC64-ELFV2-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)<br>
+; PPC64-ELFV1-LABEL: WithoutParamArea1:<br>
+; PPC64-ELFV1-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)<br>
+; PPC64-ELFV1: stdu 1, -112(1)<br>
+; PPC64-ELFV1: addi 1, 1, 112<br>
+; PPC64-ELFV1-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)<br>
+define signext i32 @WithoutParamArea1(i32 signext %a) local_unnamed_addr #0 {<br>
+entry:<br>
+ %call = tail call signext i32 @onearg(i32 signext %a) #2<br>
+ ret i32 %call<br>
+}<br>
+<br>
+; PPC64-ELFV2-LABEL: WithoutParamArea2:<br>
+; PPC64-ELFV2-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)<br>
+; PPC64-ELFV2: stdu 1, -32(1)<br>
+; PPC64-ELFV2: addi 1, 1, 32<br>
+; PPC64-ELFV2-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)<br>
+; PPC64-ELFV1-LABEL: WithoutParamArea2:<br>
+; PPC64-ELFV1-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)<br>
+; PPC64-ELFV1: stdu 1, -112(1)<br>
+; PPC64-ELFV1: addi 1, 1, 112<br>
+; PPC64-ELFV1-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)<br>
+define signext i32 @WithoutParamArea2(i32 signext %a) local_unnamed_addr #0 {<br>
+entry:<br>
+ %call = tail call signext i32 @eightargs(i32 signext %a, i32 signext %a, i32 signext %a, i32 signext %a, i32 signext %a, i32 signext %a, i32 signext %a, i32 signext %a) #2<br>
+ ret i32 %call<br>
+}<br>
+<br>
+; If the callee has more than eight integer args or variable number of args,<br>
+; parameter area cannot be ommited even for ELFv2 ABI<br>
+<br>
+; PPC64-ELFV2-LABEL: WithParamArea1:<br>
+; PPC64-ELFV2-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)<br>
+; PPC64-ELFV2: stdu 1, -96(1)<br>
+; PPC64-ELFV2: addi 1, 1, 96<br>
+; PPC64-ELFV2-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)<br>
+; PPC64-ELFV1-LABEL: WithParamArea1:<br>
+; PPC64-ELFV1-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)<br>
+; PPC64-ELFV1: stdu 1, -112(1)<br>
+; PPC64-ELFV1: addi 1, 1, 112<br>
+; PPC64-ELFV1-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)<br>
+define signext i32 @WithParamArea1(i32 signext %a) local_unnamed_addr #0 {<br>
+entry:<br>
+ %call = tail call signext i32 (i32, ...) @varargs(i32 signext %a, i32 signext %a) #2<br>
+ ret i32 %call<br>
+}<br>
+<br>
+; PPC64-ELFV2-LABEL: WithParamArea2:<br>
+; PPC64-ELFV2-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)<br>
+; PPC64-ELFV2: stdu 1, -112(1)<br>
+; PPC64-ELFV2: addi 1, 1, 112<br>
+; PPC64-ELFV2-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)<br>
+; PPC64-ELFV1-LABEL: WithParamArea2:<br>
+; PPC64-ELFV1-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)<br>
+; PPC64-ELFV1: stdu 1, -128(1)<br>
+; PPC64-ELFV1: addi 1, 1, 128<br>
+; PPC64-ELFV1-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)<br>
+define signext i32 @WithParamArea2(i32 signext %a) local_unnamed_addr #0 {<br>
+entry:<br>
+ %call = tail call signext i32 @nineargs(i32 signext %a, i32 signext %a, i32 signext %a, i32 signext %a, i32 signext %a, i32 signext %a, i32 signext %a, i32 signext %a, i32 signext %a) #2<br>
+ ret i32 %call<br>
+}<br>
+<br>
+declare signext i32 @onearg(i32 signext) local_unnamed_addr #1<br>
+declare signext i32 @eightargs(i32 signext, i32 signext, i32 signext, i32 signext, i32 signext, i32 signext, i32 signext, i32 signext) local_unnamed_addr #1<br>
+declare signext i32 @nineargs(i32 signext, i32 signext, i32 signext, i32 signext, i32 signext, i32 signext, i32 signext, i32 signext, i32 signext) local_unnamed_addr #1<br>
+declare signext i32 @varargs(i32 signext, ...) local_unnamed_addr #1<br>
+<br>
<br>
<br>
______________________________<wbr>_________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div>