[llvm] r372177 - [AArch64][GlobalISel] Support -tailcallopt
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 17 13:24:23 PDT 2019
Author: paquette
Date: Tue Sep 17 13:24:23 2019
New Revision: 372177
URL: http://llvm.org/viewvc/llvm-project?rev=372177&view=rev
Log:
[AArch64][GlobalISel] Support -tailcallopt
This adds support for `-tailcallopt` tail calls to CallLowering. This
piggy-backs off the changes from D67577, since doing it without a bit of
refactoring gets extremely ugly.
Support is basically ported from AArch64ISelLowering. The main difference here
is that tail calls in `-tailcallopt` change the ABI, so there's some extra
bookkeeping for the stack.
Show that we are correctly lowering these by updating tail-call.ll.
Also show that we don't do anything strange in general by updating
fastcc-reserved.ll, which passes `-tailcallopt`, but doesn't emit any tail
calls.
Differential Revision: https://reviews.llvm.org/D67580
Modified:
llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp
llvm/trunk/test/CodeGen/AArch64/fastcc-reserved.ll
llvm/trunk/test/CodeGen/AArch64/tail-call.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp?rev=372177&r1=372176&r2=372177&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp Tue Sep 17 13:24:23 2019
@@ -130,9 +130,11 @@ struct CallReturnHandler : public Incomi
struct OutgoingArgHandler : public CallLowering::ValueHandler {
OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
MachineInstrBuilder MIB, CCAssignFn *AssignFn,
- CCAssignFn *AssignFnVarArg, bool IsTailCall = false)
+ CCAssignFn *AssignFnVarArg, bool IsTailCall = false,
+ int FPDiff = 0)
: ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
- AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), StackSize(0) {}
+ AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff),
+ StackSize(0) {}
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
@@ -141,8 +143,7 @@ struct OutgoingArgHandler : public CallL
LLT s64 = LLT::scalar(64);
if (IsTailCall) {
- // TODO: For -tailcallopt tail calls, Offset will need FPDiff like in
- // ISelLowering.
+ Offset += FPDiff;
int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
Register FIReg = MRI.createGenericVirtualRegister(p0);
MIRBuilder.buildFrameIndex(FIReg, FI);
@@ -201,10 +202,18 @@ struct OutgoingArgHandler : public CallL
MachineInstrBuilder MIB;
CCAssignFn *AssignFnVarArg;
bool IsTailCall;
+
+ /// For tail calls, the byte offset of the call's argument area from the
+ /// callee's. Unused elsewhere.
+ int FPDiff;
uint64_t StackSize;
};
} // namespace
+static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
+ return CallConv == CallingConv::Fast && TailCallOpt;
+}
+
void AArch64CallLowering::splitToValueTypes(
const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv) const {
@@ -408,9 +417,21 @@ bool AArch64CallLowering::lowerFormalArg
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
}
- // TODO: Port checks for stack to restore for -tailcallopt from ISelLowering.
- // We need to keep track of the size of function stacks for tail call
- // optimization. When we tail call, we need to check if the callee's arguments
+ if (doesCalleeRestoreStack(F.getCallingConv(),
+ MF.getTarget().Options.GuaranteedTailCallOpt)) {
+ // We have a non-standard ABI, so why not make full use of the stack that
+ // we're going to pop? It must be aligned to 16 B in any case.
+ StackOffset = alignTo(StackOffset, 16);
+
+ // If we're expected to restore the stack (e.g. fastcc), then we'll be
+ // adding a multiple of 16.
+ FuncInfo->setArgumentStackToRestore(StackOffset);
+
+ // Our own callers will guarantee that the space is free by giving an
+ // aligned value to CALLSEQ_START.
+ }
+
+ // When we tail call, we need to check if the callee's arguments
// will fit on the caller's stack. So, whenever we lower formal arguments,
// we should keep track of this information, since we might lower a tail call
// in this function later.
@@ -639,9 +660,12 @@ bool AArch64CallLowering::isEligibleForT
}
}
- // If we have -tailcallopt and matching CCs, at this point, we could return
- // true. However, we don't have full tail call support yet. So, continue
- // checking. We want to emit a sibling call.
+ // If we have -tailcallopt, then we're done.
+ if (MF.getTarget().Options.GuaranteedTailCallOpt)
+ return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv();
+
+ // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
+ // Try to find cases where we can do that.
// I want anyone implementing a new calling convention to think long and hard
// about this assert.
@@ -695,6 +719,9 @@ bool AArch64CallLowering::lowerTailCall(
MachineRegisterInfo &MRI = MF.getRegInfo();
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
+ // True when we're tail calling, but without -tailcallopt.
+ bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt;
+
// TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
// register class. Until we can do that, we should fall back here.
if (F.hasFnAttribute("branch-target-enforcement")) {
@@ -704,18 +731,22 @@ bool AArch64CallLowering::lowerTailCall(
}
// Find out which ABI gets to decide where things go.
+ CallingConv::ID CalleeCC = Info.CallConv;
CCAssignFn *AssignFnFixed =
- TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/false);
+ TLI.CCAssignFnForCall(CalleeCC, /*IsVarArg=*/false);
CCAssignFn *AssignFnVarArg =
- TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/true);
+ TLI.CCAssignFnForCall(CalleeCC, /*IsVarArg=*/true);
+
+ MachineInstrBuilder CallSeqStart;
+ if (!IsSibCall)
+ CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), true);
auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
MIB.add(Info.Callee);
- // Add the byte offset for the tail call. We only have sibling calls, so this
- // is always 0.
- // TODO: Handle tail calls where we will have a different value here.
+ // Byte offset for the tail call. When we are sibcalling, this will always
+ // be 0.
MIB.addImm(0);
// Tell the call which registers are clobbered.
@@ -728,13 +759,64 @@ bool AArch64CallLowering::lowerTailCall(
if (TRI->isAnyArgRegReserved(MF))
TRI->emitReservedArgRegCallError(MF);
+ // FPDiff is the byte offset of the call's argument area from the callee's.
+ // Stores to callee stack arguments will be placed in FixedStackSlots offset
+ // by this amount for a tail call. In a sibling call it must be 0 because the
+ // caller will deallocate the entire stack and the callee still expects its
+ // arguments to begin at SP+0.
+ int FPDiff = 0;
+
+ // This will be 0 for sibcalls, potentially nonzero for tail calls produced
+ // by -tailcallopt. For sibcalls, the memory operands for the call are
+ // already available in the caller's incoming argument space.
+ unsigned NumBytes = 0;
+ if (!IsSibCall) {
+ // We aren't sibcalling, so we need to compute FPDiff. We need to do this
+ // before handling assignments, because FPDiff must be known for memory
+ // arguments.
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
+ SmallVector<CCValAssign, 16> OutLocs;
+ CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
+ analyzeArgInfo(OutInfo, OutArgs,
+ *TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg));
+
+ // The callee will pop the argument stack as a tail call. Thus, we must
+ // keep it 16-byte aligned.
+ NumBytes = alignTo(OutInfo.getNextStackOffset(), 16);
+
+ // FPDiff will be negative if this tail call requires more space than we
+ // would automatically have in our incoming argument space. Positive if we
+ // actually shrink the stack.
+ FPDiff = NumReusableBytes - NumBytes;
+
+ // The stack pointer must be 16-byte aligned at all times it's used for a
+ // memory operation, which in practice means at *all* times and in
+ // particular across call boundaries. Therefore our own arguments started at
+ // a 16-byte aligned SP and the delta applied for the tail call should
+ // satisfy the same constraint.
+ assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
+ }
+
// Do the actual argument marshalling.
SmallVector<unsigned, 8> PhysRegs;
OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
- AssignFnVarArg, true);
+ AssignFnVarArg, true, FPDiff);
if (!handleAssignments(MIRBuilder, OutArgs, Handler))
return false;
+ // If we have -tailcallopt, we need to adjust the stack. We'll do the call
+ // sequence start and end here.
+ if (!IsSibCall) {
+ MIB->getOperand(1).setImm(FPDiff);
+ CallSeqStart.addImm(NumBytes).addImm(0);
+ // End the call sequence *before* emitting the call. Normally, we would
+ // tidy the frame up after the call. However, here, we've laid out the
+ // parameters so that when SP is reset, they will be in the correct
+ // location.
+ MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(NumBytes).addImm(0);
+ }
+
// Now we can add the actual call instruction to the correct basic block.
MIRBuilder.insertInstr(MIB);
@@ -765,12 +847,6 @@ bool AArch64CallLowering::lowerCall(Mach
return false;
}
- if (Info.IsTailCall && MF.getTarget().Options.GuaranteedTailCallOpt) {
- // TODO: Until we lower all tail calls, we should fall back on this.
- LLVM_DEBUG(dbgs() << "Cannot handle -tailcallopt yet.\n");
- return false;
- }
-
SmallVector<ArgInfo, 8> OutArgs;
for (auto &OrigArg : Info.OrigArgs) {
splitToValueTypes(OrigArg, OutArgs, DL, MRI, Info.CallConv);
@@ -848,10 +924,16 @@ bool AArch64CallLowering::lowerCall(Mach
MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
}
+ uint64_t CalleePopBytes =
+ doesCalleeRestoreStack(Info.CallConv,
+ MF.getTarget().Options.GuaranteedTailCallOpt)
+ ? alignTo(Handler.StackSize, 16)
+ : 0;
+
CallSeqStart.addImm(Handler.StackSize).addImm(0);
MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
.addImm(Handler.StackSize)
- .addImm(0);
+ .addImm(CalleePopBytes);
return true;
}
Modified: llvm/trunk/test/CodeGen/AArch64/fastcc-reserved.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fastcc-reserved.ll?rev=372177&r1=372176&r2=372177&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fastcc-reserved.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fastcc-reserved.ll Tue Sep 17 13:24:23 2019
@@ -1,4 +1,5 @@
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s
+; RUN: llc -global-isel -global-isel-abort=1 -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s
; This test is designed to be run in the situation where the
; call-frame is not reserved (hence disable-fp-elim), but where
Modified: llvm/trunk/test/CodeGen/AArch64/tail-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/tail-call.ll?rev=372177&r1=372176&r2=372177&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/tail-call.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/tail-call.ll Tue Sep 17 13:24:23 2019
@@ -1,5 +1,5 @@
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s --check-prefixes=SDAG,COMMON
-; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s --check-prefixes=GISEL,COMMON
+; RUN: llc -global-isel -global-isel-abort=1 -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s --check-prefixes=GISEL,COMMON
declare fastcc void @callee_stack0()
declare fastcc void @callee_stack8([8 x i64], i64)
More information about the llvm-commits
mailing list