[llvm] r372177 - [AArch64][GlobalISel] Support -tailcallopt

Jessica Paquette via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 17 13:24:23 PDT 2019


Author: paquette
Date: Tue Sep 17 13:24:23 2019
New Revision: 372177

URL: http://llvm.org/viewvc/llvm-project?rev=372177&view=rev
Log:
[AArch64][GlobalISel] Support -tailcallopt

This adds support for `-tailcallopt` tail calls to CallLowering. This
piggy-backs off the changes from D67577, since doing it without a bit of
refactoring gets extremely ugly.

Support is basically ported from AArch64ISelLowering. The main difference here
is that tail calls in `-tailcallopt` change the ABI, so there's some extra
bookkeeping for the stack.

Show that we are correctly lowering these by updating tail-call.ll.

Also show that we don't do anything strange in general by updating
fastcc-reserved.ll, which passes `-tailcallopt`, but doesn't emit any tail
calls.

Differential Revision: https://reviews.llvm.org/D67580

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp
    llvm/trunk/test/CodeGen/AArch64/fastcc-reserved.ll
    llvm/trunk/test/CodeGen/AArch64/tail-call.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp?rev=372177&r1=372176&r2=372177&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp Tue Sep 17 13:24:23 2019
@@ -130,9 +130,11 @@ struct CallReturnHandler : public Incomi
 struct OutgoingArgHandler : public CallLowering::ValueHandler {
   OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
                      MachineInstrBuilder MIB, CCAssignFn *AssignFn,
-                     CCAssignFn *AssignFnVarArg, bool IsTailCall = false)
+                     CCAssignFn *AssignFnVarArg, bool IsTailCall = false,
+                     int FPDiff = 0)
       : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
-        AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), StackSize(0) {}
+        AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff),
+        StackSize(0) {}
 
   Register getStackAddress(uint64_t Size, int64_t Offset,
                            MachinePointerInfo &MPO) override {
@@ -141,8 +143,7 @@ struct OutgoingArgHandler : public CallL
     LLT s64 = LLT::scalar(64);
 
     if (IsTailCall) {
-      // TODO: For -tailcallopt tail calls, Offset will need FPDiff like in
-      // ISelLowering.
+      Offset += FPDiff;
       int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
       Register FIReg = MRI.createGenericVirtualRegister(p0);
       MIRBuilder.buildFrameIndex(FIReg, FI);
@@ -201,10 +202,18 @@ struct OutgoingArgHandler : public CallL
   MachineInstrBuilder MIB;
   CCAssignFn *AssignFnVarArg;
   bool IsTailCall;
+
+  /// For tail calls, the byte offset of the call's argument area from the
+  /// callee's. Unused elsewhere.
+  int FPDiff;
   uint64_t StackSize;
 };
 } // namespace
 
+static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
+  return CallConv == CallingConv::Fast && TailCallOpt;
+}
+
 void AArch64CallLowering::splitToValueTypes(
     const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
     const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv) const {
@@ -408,9 +417,21 @@ bool AArch64CallLowering::lowerFormalArg
     FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
   }
 
-  // TODO: Port checks for stack to restore for -tailcallopt from ISelLowering.
-  // We need to keep track of the size of function stacks for tail call
-  // optimization. When we tail call, we need to check if the callee's arguments
+  if (doesCalleeRestoreStack(F.getCallingConv(),
+                             MF.getTarget().Options.GuaranteedTailCallOpt)) {
+    // We have a non-standard ABI, so why not make full use of the stack that
+    // we're going to pop? It must be aligned to 16 B in any case.
+    StackOffset = alignTo(StackOffset, 16);
+
+    // If we're expected to restore the stack (e.g. fastcc), then we'll be
+    // adding a multiple of 16.
+    FuncInfo->setArgumentStackToRestore(StackOffset);
+
+    // Our own callers will guarantee that the space is free by giving an
+    // aligned value to CALLSEQ_START.
+  }
+
+  // When we tail call, we need to check if the callee's arguments
   // will fit on the caller's stack. So, whenever we lower formal arguments,
   // we should keep track of this information, since we might lower a tail call
   // in this function later.
@@ -639,9 +660,12 @@ bool AArch64CallLowering::isEligibleForT
     }
   }
 
-  // If we have -tailcallopt and matching CCs, at this point, we could return
-  // true. However, we don't have full tail call support yet. So, continue
-  // checking. We want to emit a sibling call.
+  // If we have -tailcallopt, then we're done.
+  if (MF.getTarget().Options.GuaranteedTailCallOpt)
+    return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv();
+
+  // We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
+  // Try to find cases where we can do that.
 
   // I want anyone implementing a new calling convention to think long and hard
   // about this assert.
@@ -695,6 +719,9 @@ bool AArch64CallLowering::lowerTailCall(
   MachineRegisterInfo &MRI = MF.getRegInfo();
   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 
+  // True when we're tail calling, but without -tailcallopt.
+  bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt;
+
   // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
   // register class. Until we can do that, we should fall back here.
   if (F.hasFnAttribute("branch-target-enforcement")) {
@@ -704,18 +731,22 @@ bool AArch64CallLowering::lowerTailCall(
   }
 
   // Find out which ABI gets to decide where things go.
+  CallingConv::ID CalleeCC = Info.CallConv;
   CCAssignFn *AssignFnFixed =
-      TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/false);
+      TLI.CCAssignFnForCall(CalleeCC, /*IsVarArg=*/false);
   CCAssignFn *AssignFnVarArg =
-      TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/true);
+      TLI.CCAssignFnForCall(CalleeCC, /*IsVarArg=*/true);
+
+  MachineInstrBuilder CallSeqStart;
+  if (!IsSibCall)
+    CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
 
   unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), true);
   auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
   MIB.add(Info.Callee);
 
-  // Add the byte offset for the tail call. We only have sibling calls, so this
-  // is always 0.
-  // TODO: Handle tail calls where we will have a different value here.
+  // Byte offset for the tail call. When we are sibcalling, this will always
+  // be 0.
   MIB.addImm(0);
 
   // Tell the call which registers are clobbered.
@@ -728,13 +759,64 @@ bool AArch64CallLowering::lowerTailCall(
   if (TRI->isAnyArgRegReserved(MF))
     TRI->emitReservedArgRegCallError(MF);
 
+  // FPDiff is the byte offset of the call's argument area from the callee's.
+  // Stores to callee stack arguments will be placed in FixedStackSlots offset
+  // by this amount for a tail call. In a sibling call it must be 0 because the
+  // caller will deallocate the entire stack and the callee still expects its
+  // arguments to begin at SP+0.
+  int FPDiff = 0;
+
+  // This will be 0 for sibcalls, potentially nonzero for tail calls produced
+  // by -tailcallopt. For sibcalls, the memory operands for the call are
+  // already available in the caller's incoming argument space.
+  unsigned NumBytes = 0;
+  if (!IsSibCall) {
+    // We aren't sibcalling, so we need to compute FPDiff. We need to do this
+    // before handling assignments, because FPDiff must be known for memory
+    // arguments.
+    AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+    unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
+    SmallVector<CCValAssign, 16> OutLocs;
+    CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
+    analyzeArgInfo(OutInfo, OutArgs,
+                   *TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg));
+
+    // The callee will pop the argument stack as a tail call. Thus, we must
+    // keep it 16-byte aligned.
+    NumBytes = alignTo(OutInfo.getNextStackOffset(), 16);
+
+    // FPDiff will be negative if this tail call requires more space than we
+    // would automatically have in our incoming argument space. Positive if we
+    // actually shrink the stack.
+    FPDiff = NumReusableBytes - NumBytes;
+
+    // The stack pointer must be 16-byte aligned at all times it's used for a
+    // memory operation, which in practice means at *all* times and in
+    // particular across call boundaries. Therefore our own arguments started at
+    // a 16-byte aligned SP and the delta applied for the tail call should
+    // satisfy the same constraint.
+    assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
+  }
+
   // Do the actual argument marshalling.
   SmallVector<unsigned, 8> PhysRegs;
   OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
-                             AssignFnVarArg, true);
+                             AssignFnVarArg, true, FPDiff);
   if (!handleAssignments(MIRBuilder, OutArgs, Handler))
     return false;
 
+  // If we have -tailcallopt, we need to adjust the stack. We'll do the call
+  // sequence start and end here.
+  if (!IsSibCall) {
+    MIB->getOperand(1).setImm(FPDiff);
+    CallSeqStart.addImm(NumBytes).addImm(0);
+    // End the call sequence *before* emitting the call. Normally, we would
+    // tidy the frame up after the call. However, here, we've laid out the
+    // parameters so that when SP is reset, they will be in the correct
+    // location.
+    MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(NumBytes).addImm(0);
+  }
+
   // Now we can add the actual call instruction to the correct basic block.
   MIRBuilder.insertInstr(MIB);
 
@@ -765,12 +847,6 @@ bool AArch64CallLowering::lowerCall(Mach
     return false;
   }
 
-  if (Info.IsTailCall && MF.getTarget().Options.GuaranteedTailCallOpt) {
-    // TODO: Until we lower all tail calls, we should fall back on this.
-    LLVM_DEBUG(dbgs() << "Cannot handle -tailcallopt yet.\n");
-    return false;
-  }
-
   SmallVector<ArgInfo, 8> OutArgs;
   for (auto &OrigArg : Info.OrigArgs) {
     splitToValueTypes(OrigArg, OutArgs, DL, MRI, Info.CallConv);
@@ -848,10 +924,16 @@ bool AArch64CallLowering::lowerCall(Mach
     MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
   }
 
+  uint64_t CalleePopBytes =
+      doesCalleeRestoreStack(Info.CallConv,
+                             MF.getTarget().Options.GuaranteedTailCallOpt)
+          ? alignTo(Handler.StackSize, 16)
+          : 0;
+
   CallSeqStart.addImm(Handler.StackSize).addImm(0);
   MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
       .addImm(Handler.StackSize)
-      .addImm(0);
+      .addImm(CalleePopBytes);
 
   return true;
 }

Modified: llvm/trunk/test/CodeGen/AArch64/fastcc-reserved.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fastcc-reserved.ll?rev=372177&r1=372176&r2=372177&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fastcc-reserved.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fastcc-reserved.ll Tue Sep 17 13:24:23 2019
@@ -1,4 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s
+; RUN: llc -global-isel -global-isel-abort=1 -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s
 
 ; This test is designed to be run in the situation where the
 ; call-frame is not reserved (hence disable-fp-elim), but where

Modified: llvm/trunk/test/CodeGen/AArch64/tail-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/tail-call.ll?rev=372177&r1=372176&r2=372177&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/tail-call.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/tail-call.ll Tue Sep 17 13:24:23 2019
@@ -1,5 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s --check-prefixes=SDAG,COMMON
-; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s --check-prefixes=GISEL,COMMON
+; RUN: llc -global-isel -global-isel-abort=1 -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s --check-prefixes=GISEL,COMMON
 
 declare fastcc void @callee_stack0()
 declare fastcc void @callee_stack8([8 x i64], i64)




More information about the llvm-commits mailing list