[llvm] r372897 - [AArch64][GlobalISel] Choose CCAssignFns per-argument for tail call lowering

Jessica Paquette via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 25 09:45:35 PDT 2019


Author: paquette
Date: Wed Sep 25 09:45:35 2019
New Revision: 372897

URL: http://llvm.org/viewvc/llvm-project?rev=372897&view=rev
Log:
[AArch64][GlobalISel] Choose CCAssignFns per-argument for tail call lowering

When checking for tail call eligibility, we should use the correct CCAssignFn
for each argument, rather than just checking if the caller/callee is varargs or
not.

This is important for tail call lowering with varargs. If we don't check it,
then basically any varargs callee with parameters cannot be tail called on
Darwin, for one thing. If the parameters are all guaranteed to be in registers,
this should be entirely safe.

On top of that, not checking for this could potentially make it so that we have
the wrong stack offsets when checking for tail call eligibility.

Also refactor some of the stuff for CCAssignFnForCall and pull it out into a
helper function.

Update call-translator-tail-call.ll to show that we can now correctly tail call
on Darwin. Also add two extra tail call checks. The first verifies that we still
respect the caller's stack size, and the second verifies that we still don't
tail call when a varargs function has a memory argument.

Differential Revision: https://reviews.llvm.org/D67939

Modified:
    llvm/trunk/include/llvm/CodeGen/GlobalISel/CallLowering.h
    llvm/trunk/lib/CodeGen/GlobalISel/CallLowering.cpp
    llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll

Modified: llvm/trunk/include/llvm/CodeGen/GlobalISel/CallLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/GlobalISel/CallLowering.h?rev=372897&r1=372896&r2=372897&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/GlobalISel/CallLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/GlobalISel/CallLowering.h Wed Sep 25 09:45:35 2019
@@ -220,7 +220,8 @@ protected:
   ///
   /// Used to check if arguments are suitable for tail call lowering.
   bool analyzeArgInfo(CCState &CCState, SmallVectorImpl<ArgInfo> &Args,
-                      CCAssignFn &Fn) const;
+                      CCAssignFn &AssignFnFixed,
+                      CCAssignFn &AssignFnVarArg) const;
 
   /// \returns True if the calling convention for a callee and its caller pass
   /// results in the same way. Typically used for tail call eligibility checks.
@@ -228,12 +229,18 @@ protected:
   /// \p Info is the CallLoweringInfo for the call.
   /// \p MF is the MachineFunction for the caller.
   /// \p InArgs contains the results of the call.
-  /// \p CalleeAssignFn is the CCAssignFn to be used for the callee.
-  /// \p CallerAssignFn is the CCAssignFn to be used for the caller.
+  /// \p CalleeAssignFnFixed is the CCAssignFn to be used for the callee for
+  /// fixed arguments.
+  /// \p CalleeAssignFnVarArg is similar, but for varargs.
+  /// \p CallerAssignFnFixed is the CCAssignFn to be used for the caller for
+  /// fixed arguments.
+  /// \p CallerAssignFnVarArg is similar, but for varargs.
   bool resultsCompatible(CallLoweringInfo &Info, MachineFunction &MF,
                          SmallVectorImpl<ArgInfo> &InArgs,
-                         CCAssignFn &CalleeAssignFn,
-                         CCAssignFn &CallerAssignFn) const;
+                         CCAssignFn &CalleeAssignFnFixed,
+                         CCAssignFn &CalleeAssignFnVarArg,
+                         CCAssignFn &CallerAssignFnFixed,
+                         CCAssignFn &CallerAssignFnVarArg) const;
 
 public:
   CallLowering(const TargetLowering *TLI) : TLI(TLI) {}

Modified: llvm/trunk/lib/CodeGen/GlobalISel/CallLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/GlobalISel/CallLowering.cpp?rev=372897&r1=372896&r2=372897&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/GlobalISel/CallLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/GlobalISel/CallLowering.cpp Wed Sep 25 09:45:35 2019
@@ -379,10 +379,12 @@ bool CallLowering::handleAssignments(CCS
 }
 
 bool CallLowering::analyzeArgInfo(CCState &CCState,
-                                     SmallVectorImpl<ArgInfo> &Args,
-                                     CCAssignFn &Fn) const {
+                                  SmallVectorImpl<ArgInfo> &Args,
+                                  CCAssignFn &AssignFnFixed,
+                                  CCAssignFn &AssignFnVarArg) const {
   for (unsigned i = 0, e = Args.size(); i < e; ++i) {
     MVT VT = MVT::getVT(Args[i].Ty);
+    CCAssignFn &Fn = Args[i].IsFixed ? AssignFnFixed : AssignFnVarArg;
     if (Fn(i, VT, VT, CCValAssign::Full, Args[i].Flags[0], CCState)) {
       // Bail out on anything we can't handle.
       LLVM_DEBUG(dbgs() << "Cannot analyze " << EVT(VT).getEVTString()
@@ -396,8 +398,10 @@ bool CallLowering::analyzeArgInfo(CCStat
 bool CallLowering::resultsCompatible(CallLoweringInfo &Info,
                                      MachineFunction &MF,
                                      SmallVectorImpl<ArgInfo> &InArgs,
-                                     CCAssignFn &CalleeAssignFn,
-                                     CCAssignFn &CallerAssignFn) const {
+                                     CCAssignFn &CalleeAssignFnFixed,
+                                     CCAssignFn &CalleeAssignFnVarArg,
+                                     CCAssignFn &CallerAssignFnFixed,
+                                     CCAssignFn &CallerAssignFnVarArg) const {
   const Function &F = MF.getFunction();
   CallingConv::ID CalleeCC = Info.CallConv;
   CallingConv::ID CallerCC = F.getCallingConv();
@@ -407,12 +411,14 @@ bool CallLowering::resultsCompatible(Cal
 
   SmallVector<CCValAssign, 16> ArgLocs1;
   CCState CCInfo1(CalleeCC, false, MF, ArgLocs1, F.getContext());
-  if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFn))
+  if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFnFixed,
+                      CalleeAssignFnVarArg))
     return false;
 
   SmallVector<CCValAssign, 16> ArgLocs2;
   CCState CCInfo2(CallerCC, false, MF, ArgLocs2, F.getContext());
-  if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFn))
+  if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFnFixed,
+                      CalleeAssignFnVarArg))
     return false;
 
   // We need the argument locations to match up exactly. If there's more in

Modified: llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp?rev=372897&r1=372896&r2=372897&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp Wed Sep 25 09:45:35 2019
@@ -464,6 +464,13 @@ static bool mayTailCallThisCC(CallingCon
   }
 }
 
+/// Returns a pair containing the fixed CCAssignFn and the vararg CCAssignFn for
+/// CC.
+static std::pair<CCAssignFn *, CCAssignFn *>
+getAssignFnsForCC(CallingConv::ID CC, const AArch64TargetLowering &TLI) {
+  return {TLI.CCAssignFnForCall(CC, false), TLI.CCAssignFnForCall(CC, true)};
+}
+
 bool AArch64CallLowering::doCallerAndCalleePassArgsTheSameWay(
     CallLoweringInfo &Info, MachineFunction &MF,
     SmallVectorImpl<ArgInfo> &InArgs) const {
@@ -477,11 +484,19 @@ bool AArch64CallLowering::doCallerAndCal
 
   // Check if the caller and callee will handle arguments in the same way.
   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
-  CCAssignFn *CalleeAssignFn = TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg);
-  CCAssignFn *CallerAssignFn =
-      TLI.CCAssignFnForCall(CallerCC, CallerF.isVarArg());
-
-  if (!resultsCompatible(Info, MF, InArgs, *CalleeAssignFn, *CallerAssignFn))
+  CCAssignFn *CalleeAssignFnFixed;
+  CCAssignFn *CalleeAssignFnVarArg;
+  std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) =
+      getAssignFnsForCC(CalleeCC, TLI);
+
+  CCAssignFn *CallerAssignFnFixed;
+  CCAssignFn *CallerAssignFnVarArg;
+  std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) =
+      getAssignFnsForCC(CallerCC, TLI);
+
+  if (!resultsCompatible(Info, MF, InArgs, *CalleeAssignFnFixed,
+                         *CalleeAssignFnVarArg, *CallerAssignFnFixed,
+                         *CallerAssignFnVarArg))
     return false;
 
   // Make sure that the caller and callee preserve all of the same registers.
@@ -508,12 +523,15 @@ bool AArch64CallLowering::areCalleeOutgo
   CallingConv::ID CallerCC = CallerF.getCallingConv();
   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
 
+  CCAssignFn *AssignFnFixed;
+  CCAssignFn *AssignFnVarArg;
+  std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
+
   // We have outgoing arguments. Make sure that we can tail call with them.
   SmallVector<CCValAssign, 16> OutLocs;
   CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext());
 
-  if (!analyzeArgInfo(OutInfo, OutArgs,
-                      *TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg))) {
+  if (!analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg)) {
     LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n");
     return false;
   }
@@ -741,10 +759,9 @@ bool AArch64CallLowering::lowerTailCall(
 
   // Find out which ABI gets to decide where things go.
   CallingConv::ID CalleeCC = Info.CallConv;
-  CCAssignFn *AssignFnFixed =
-      TLI.CCAssignFnForCall(CalleeCC, /*IsVarArg=*/false);
-  CCAssignFn *AssignFnVarArg =
-      TLI.CCAssignFnForCall(CalleeCC, /*IsVarArg=*/true);
+  CCAssignFn *AssignFnFixed;
+  CCAssignFn *AssignFnVarArg;
+  std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI);
 
   MachineInstrBuilder CallSeqStart;
   if (!IsSibCall)
@@ -787,8 +804,7 @@ bool AArch64CallLowering::lowerTailCall(
     unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
     SmallVector<CCValAssign, 16> OutLocs;
     CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
-    analyzeArgInfo(OutInfo, OutArgs,
-                   *TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg));
+    analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg);
 
     // The callee will pop the argument stack as a tail call. Thus, we must
     // keep it 16-byte aligned.
@@ -879,10 +895,10 @@ bool AArch64CallLowering::lowerCall(Mach
     return lowerTailCall(MIRBuilder, Info, OutArgs);
 
   // Find out which ABI gets to decide where things go.
-  CCAssignFn *AssignFnFixed =
-      TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/false);
-  CCAssignFn *AssignFnVarArg =
-      TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/true);
+  CCAssignFn *AssignFnFixed;
+  CCAssignFn *AssignFnVarArg;
+  std::tie(AssignFnFixed, AssignFnVarArg) =
+      getAssignFnsForCC(Info.CallConv, TLI);
 
   MachineInstrBuilder CallSeqStart;
   CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);

Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll?rev=372897&r1=372896&r2=372897&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll Wed Sep 25 09:45:35 2019
@@ -77,32 +77,77 @@ define i32 @test_nonvoid_ret() {
 
 declare void @varargs(i32, double, i64, ...)
 define void @test_varargs() {
-  ; On Darwin, everything is passed on the stack. Since the caller has no stack,
-  ; we don't tail call.
-  ; DARWIN-LABEL: name: test_varargs
-  ; DARWIN: bb.1 (%ir-block.0):
-  ; DARWIN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
-  ; DARWIN:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
-  ; DARWIN:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-  ; DARWIN:   ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
-  ; DARWIN:   $w0 = COPY [[C]](s32)
-  ; DARWIN:   $d0 = COPY [[C1]](s64)
-  ; DARWIN:   $x1 = COPY [[C2]](s64)
+  ; COMMON-LABEL: name: test_varargs
+  ; COMMON: bb.1 (%ir-block.0):
+  ; COMMON:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; COMMON:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+  ; COMMON:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; COMMON:   $w0 = COPY [[C]](s32)
+  ; COMMON:   $d0 = COPY [[C1]](s64)
+  ; COMMON:   $x1 = COPY [[C2]](s64)
+  ; COMMON:   TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1
+  tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12)
+  ret void
+}
+
+; Darwin should not tail call here, because the last parameter to @varargs is
+; not fixed. So, it's passed on the stack, which will make us not fit. On
+; Windows, it's passed in a register, so it's safe to tail call.
+define void @test_varargs_2() {
+  ; DARWIN-LABEL: name: test_varargs_2
+  ; DARWIN-NOT: TCRETURNdi @varargs
   ; DARWIN:   BL @varargs, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
-  ; DARWIN:   ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+  ; DARWIN:   ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp
   ; DARWIN:   RET_ReallyLR
 
-  ; Windows uses registers, so we don't need to worry about using the stack.
-  ; WINDOWS-LABEL: name: test_varargs
+  ; WINDOWS-LABEL: name: test_varargs_2
   ; WINDOWS: bb.1 (%ir-block.0):
   ; WINDOWS:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
   ; WINDOWS:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
   ; WINDOWS:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; WINDOWS:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
   ; WINDOWS:   $w0 = COPY [[C]](s32)
   ; WINDOWS:   $d0 = COPY [[C1]](s64)
   ; WINDOWS:   $x1 = COPY [[C2]](s64)
-  ; WINDOWS:   TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1
-  tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12)
+  ; WINDOWS:   $x2 = COPY [[C3]](s64)
+  ; WINDOWS:   TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1, implicit $x2
+  tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i64 314)
+  ret void
+}
+
+; Same deal here, even though we have enough room to fit. On Darwin, we'll pass
+; the last argument to @varargs on the stack. We don't allow tail calling
+; varargs arguments that are on the stack.
+define void @test_varargs_3([8 x <2 x double>], <4 x half> %arg) {
+  ; DARWIN-LABEL: name: test_varargs_3
+  ; DARWIN-NOT: TCRETURNdi @varargs
+  ; DARWIN:   BL @varargs, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
+  ; DARWIN:   ADJCALLSTACKUP 8, 0, implicit-def $sp, implicit $sp
+  ; DARWIN:   RET_ReallyLR
+
+  ; WINDOWS-LABEL: name: test_varargs_3
+  ; WINDOWS: bb.1 (%ir-block.1):
+  ; WINDOWS:   liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7
+  ; WINDOWS:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; WINDOWS:   [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+  ; WINDOWS:   [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
+  ; WINDOWS:   [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
+  ; WINDOWS:   [[COPY4:%[0-9]+]]:_(<2 x s64>) = COPY $q4
+  ; WINDOWS:   [[COPY5:%[0-9]+]]:_(<2 x s64>) = COPY $q5
+  ; WINDOWS:   [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY $q6
+  ; WINDOWS:   [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY $q7
+  ; WINDOWS:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; WINDOWS:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 1)
+  ; WINDOWS:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
+  ; WINDOWS:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+  ; WINDOWS:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+  ; WINDOWS:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314
+  ; WINDOWS:   $w0 = COPY [[C]](s32)
+  ; WINDOWS:   $d0 = COPY [[C1]](s64)
+  ; WINDOWS:   $x1 = COPY [[C2]](s64)
+  ; WINDOWS:   $x2 = COPY [[C3]](s64)
+  ; WINDOWS:   TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1, implicit $x2
+  tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i64 314)
   ret void
 }
 




More information about the llvm-commits mailing list