[llvm] r362366 - [ARM][FIX] Ran out of registers due tail recursion

Diogo N. Sampaio via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 3 01:58:05 PDT 2019


Author: dnsampaio
Date: Mon Jun  3 01:58:05 2019
New Revision: 362366

URL: http://llvm.org/viewvc/llvm-project?rev=362366&view=rev
Log:
[ARM][FIX] Ran out of registers due tail recursion

Summary:
- pr42062
When compiling for MinSize,
ARMTargetLowering::LowerCall decides to indirect
multiple calls to a same function. However,
it disconsiders the limitation that thumb1
indirect calls require the callee to be in a
register from r0 to r3 (llvm limiation).
If all those registers are used by arguments, the
compiler dies with "error: run out of registers
during register allocation".
This patch tells the function
IsEligibleForTailCallOptimization if we intend to
perform indirect calls, as to avoid tail call
optimization.

Reviewers: dmgreen, efriedma

Reviewed By: efriedma

Subscribers: javed.absar, kristof.beyls, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62683

Added:
    llvm/trunk/test/CodeGen/ARM/pr42062.ll
Modified:
    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
    llvm/trunk/lib/Target/ARM/ARMISelLowering.h

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=362366&r1=362365&r2=362366&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Mon Jun  3 01:58:05 2019
@@ -1832,29 +1832,40 @@ ARMTargetLowering::LowerCall(TargetLower
   bool isVarArg                         = CLI.IsVarArg;
 
   MachineFunction &MF = DAG.getMachineFunction();
-  bool isStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
-  bool isThisReturn   = false;
-  bool isSibCall      = false;
+  bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+  bool isThisReturn = false;
   auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
+  bool PreferIndirect = false;
 
   // Disable tail calls if they're not supported.
   if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
     isTailCall = false;
 
+  if (isa<GlobalAddressSDNode>(Callee)) {
+    // If we're optimizing for minimum size and the function is called three or
+    // more times in this block, we can improve codesize by calling indirectly
+    // as BLXr has a 16-bit encoding.
+    auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
+    auto *BB = CLI.CS.getParent();
+    PreferIndirect =
+        Subtarget->isThumb() && Subtarget->hasMinSize() &&
+        count_if(GV->users(), [&BB](const User *U) {
+          return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
+        }) > 2;
+  }
   if (isTailCall) {
     // Check if it's really possible to do a tail call.
-    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
-                    isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(),
-                                                   Outs, OutVals, Ins, DAG);
+    isTailCall = IsEligibleForTailCallOptimization(
+        Callee, CallConv, isVarArg, isStructRet,
+        MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
+        PreferIndirect);
     if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
       report_fatal_error("failed to perform tail call elimination on a call "
                          "site marked musttail");
     // We don't support GuaranteedTailCallOpt for ARM, only automatically
     // detected sibcalls.
-    if (isTailCall) {
+    if (isTailCall)
       ++NumTailCalls;
-      isSibCall = true;
-    }
   }
 
   // Analyze operands of the call, assigning locations to each operand.
@@ -1866,14 +1877,14 @@ ARMTargetLowering::LowerCall(TargetLower
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
 
-  // For tail calls, memory operands are available in our caller's stack.
-  if (isSibCall)
+  if (isTailCall) {
+    // For tail calls, memory operands are available in our caller's stack.
     NumBytes = 0;
-
-  // Adjust the stack pointer for the new arguments...
-  // These operations are automatically eliminated by the prolog/epilog pass
-  if (!isSibCall)
+  } else {
+    // Adjust the stack pointer for the new arguments...
+    // These operations are automatically eliminated by the prolog/epilog pass
     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
+  }
 
   SDValue StackPtr =
       DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
@@ -1995,7 +2006,7 @@ ARMTargetLowering::LowerCall(TargetLower
         MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
                                           Ops));
       }
-    } else if (!isSibCall) {
+    } else if (!isTailCall) {
       assert(VA.isMemLoc());
 
       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
@@ -2067,17 +2078,6 @@ ARMTargetLowering::LowerCall(TargetLower
           MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
     }
   } else if (isa<GlobalAddressSDNode>(Callee)) {
-    // If we're optimizing for minimum size and the function is called three or
-    // more times in this block, we can improve codesize by calling indirectly
-    // as BLXr has a 16-bit encoding.
-    auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
-    auto *BB = CLI.CS.getParent();
-    bool PreferIndirect =
-        Subtarget->isThumb() && Subtarget->hasMinSize() &&
-        count_if(GV->users(), [&BB](const User *U) {
-          return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
-        }) > 2;
-
     if (!PreferIndirect) {
       isDirect = true;
       bool isDef = GV->isStrongDefinitionForLinker();
@@ -2309,28 +2309,25 @@ bool MatchingStackOffset(SDValue Arg, un
 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
 /// for tail call optimization. Targets which want to do tail call
 /// optimization should implement this function.
-bool
-ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
-                                                     CallingConv::ID CalleeCC,
-                                                     bool isVarArg,
-                                                     bool isCalleeStructRet,
-                                                     bool isCallerStructRet,
-                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                    const SmallVectorImpl<SDValue> &OutVals,
-                                    const SmallVectorImpl<ISD::InputArg> &Ins,
-                                                     SelectionDAG& DAG) const {
+bool ARMTargetLowering::IsEligibleForTailCallOptimization(
+    SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
+    bool isCalleeStructRet, bool isCallerStructRet,
+    const SmallVectorImpl<ISD::OutputArg> &Outs,
+    const SmallVectorImpl<SDValue> &OutVals,
+    const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG,
+    const bool isIndirect) const {
   MachineFunction &MF = DAG.getMachineFunction();
   const Function &CallerF = MF.getFunction();
   CallingConv::ID CallerCC = CallerF.getCallingConv();
 
   assert(Subtarget->supportsTailCall());
 
-  // Tail calls to function pointers cannot be optimized for Thumb1 if the args
+  // Indirect tail calls cannot be optimized for Thumb1 if the args
   // to the call take up r0-r3. The reason is that there are no legal registers
   // left to hold the pointer to the function to be called.
   if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
-      !isa<GlobalAddressSDNode>(Callee.getNode()))
-      return false;
+      (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect))
+    return false;
 
   // Look for obvious safe cases to perform tail call optimization that do not
   // require ABI changes. This is what gcc calls sibcall.

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=362366&r1=362365&r2=362366&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Mon Jun  3 01:58:05 2019
@@ -763,15 +763,13 @@ class VectorType;
     /// IsEligibleForTailCallOptimization - Check whether the call is eligible
     /// for tail call optimization. Targets which want to do tail call
     /// optimization should implement this function.
-    bool IsEligibleForTailCallOptimization(SDValue Callee,
-                                           CallingConv::ID CalleeCC,
-                                           bool isVarArg,
-                                           bool isCalleeStructRet,
-                                           bool isCallerStructRet,
-                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                    const SmallVectorImpl<SDValue> &OutVals,
-                                    const SmallVectorImpl<ISD::InputArg> &Ins,
-                                           SelectionDAG& DAG) const;
+    bool IsEligibleForTailCallOptimization(
+        SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
+        bool isCalleeStructRet, bool isCallerStructRet,
+        const SmallVectorImpl<ISD::OutputArg> &Outs,
+        const SmallVectorImpl<SDValue> &OutVals,
+        const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG,
+        const bool isIndirect) const;
 
     bool CanLowerReturn(CallingConv::ID CallConv,
                         MachineFunction &MF, bool isVarArg,

Added: llvm/trunk/test/CodeGen/ARM/pr42062.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/pr42062.ll?rev=362366&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/pr42062.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/pr42062.ll Mon Jun  3 01:58:05 2019
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -o - %s 2>&1 | FileCheck %s --implicit-check-not=error
+target triple = "thumbv8m.base-arm-none-eabi"
+ at foo = external global i8
+declare i32 @bar(i8* nocapture, i32, i32, i8* nocapture)
+
+define void @food(i8* %a) #0 {
+; CHECK-LABEL: food:
+; CHECK:    mov [[ARG0:r[4-7]]], r0
+; CHECK-NEXT:    movs r1, #8
+; CHECK-NEXT:    movs r2, #1
+; CHECK-NEXT:    ldr [[FOO_R:r[4-7]]], [[FOO_ADDR:\..*]]
+; CHECK-NEXT:    ldr [[BAR_R:r[4-7]]], [[BAR_ADDR:\..*]]
+; CHECK-NEXT:    mov r3, [[FOO_R]]
+; CHECK-NEXT:    blx [[BAR_R]]
+; CHECK-NEXT:    movs r1, #9
+; CHECK-NEXT:    movs r2, #0
+; CHECK-NEXT:    mov r0, [[ARG0]]
+; CHECK-NEXT:    mov r3, [[FOO_R]]
+; CHECK-NEXT:    blx [[BAR_R]]
+; CHECK-NEXT:    movs r1, #7
+; CHECK-NEXT:    movs r2, #2
+; CHECK-NEXT:    mov r0, [[ARG0]]
+; CHECK-NEXT:    mov r3, [[FOO_R]]
+; CHECK-NEXT:    blx [[BAR_R]]
+; CHECK-NEXT:    pop {r4, r5, r6, pc}
+; CHECK:         [[FOO_ADDR]]:
+; CHECK-NEXT:    .long foo
+; CHECK:         [[BAR_ADDR]]:
+; CHECK-NEXT:    .long bar
+entry:
+  %0 = tail call i32 @bar(i8* %a, i32 8, i32 1, i8* nonnull @foo)
+  %1 = tail call i32 @bar(i8* %a, i32 9, i32 0, i8* nonnull @foo)
+  %2 = tail call i32 @bar(i8* %a, i32 7, i32 2, i8* nonnull @foo)
+  ret void
+}
+attributes #0 = { minsize "target-cpu"="cortex-m23" }
+




More information about the llvm-commits mailing list