[llvm] r318143 - [ARM] Fix incorrect conversion of a tail call to an ordinary call

Momchil Velikov via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 14 02:36:52 PST 2017


Author: chill
Date: Tue Nov 14 02:36:52 2017
New Revision: 318143

URL: http://llvm.org/viewvc/llvm-project?rev=318143&view=rev
Log:
[ARM] Fix incorrect conversion of a tail call to an ordinary call

When we emit a tail call for Armv8-M, but then discover that the caller needs to
save/restore `LR`, we convert the tail call to an ordinary one, since restoring
`LR` takes extra instructions, which may negate the benefits of the tail
call. If the callee, however, takes stack arguments, this conversion is
incorrect, since nothing has been done to pass the stack arguments.

Thus the patch reverts https://reviews.llvm.org/rL294000

Also, we improve the instruction sequence for popping `LR` in the case when we
couldn't immediately find a scratch low register, but we can use as a temporary
one of the callee-saved low registers and restore `LR` before popping other
callee-saves.

Differential Revision: https://reviews.llvm.org/D39599


Modified:
    llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp
    llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp
    llvm/trunk/test/CodeGen/ARM/thumb1_return_sequence.ll
    llvm/trunk/test/CodeGen/ARM/v8m-tail-call.ll
    llvm/trunk/test/CodeGen/Thumb/thumb-shrink-wrapping.ll

Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp?rev=318143&r1=318142&r2=318143&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp Tue Nov 14 02:36:52 2017
@@ -221,8 +221,8 @@ void ARMSubtarget::initSubtargetFeatures
   // baseline, since the LDM/POP instruction on Thumb doesn't take LR.  This
   // means if we need to reload LR, it takes extra instructions, which outweighs
   // the value of the tail call; but here we don't know yet whether LR is going
-  // to be used. We generate the tail call here and turn it back into CALL/RET
-  // in emitEpilogue if LR is used.
+  // to be used. We take the optimistic approach of generating the tail call and
+  // perhaps taking a hit if we need to restore the LR.
 
   // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
   // but we need to make sure there are enough registers; the only valid

Modified: llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp?rev=318143&r1=318142&r2=318143&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/Thumb1FrameLowering.cpp Tue Nov 14 02:36:52 2017
@@ -512,6 +512,26 @@ bool Thumb1FrameLowering::needPopSpecial
   return false;
 }
 
+static void findTemporariesForLR(const BitVector &GPRsNoLRSP,
+                                 const BitVector &PopFriendly,
+                                 const LivePhysRegs &UsedRegs, unsigned &PopReg,
+                                 unsigned &TmpReg) {
+  PopReg = TmpReg = 0;
+  for (auto Reg : GPRsNoLRSP.set_bits()) {
+    if (!UsedRegs.contains(Reg)) {
+      // Remember the first pop-friendly register and exit.
+      if (PopFriendly.test(Reg)) {
+        PopReg = Reg;
+        TmpReg = 0;
+        break;
+      }
+      // Otherwise, remember that the register will be available to
+      // save a pop-friendly register.
+      TmpReg = Reg;
+    }
+  }
+}
+
 bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
                                               bool DoIt) const {
   MachineFunction &MF = *MBB.getParent();
@@ -600,17 +620,19 @@ bool Thumb1FrameLowering::emitPopSpecial
   GPRsNoLRSP.reset(ARM::LR);
   GPRsNoLRSP.reset(ARM::SP);
   GPRsNoLRSP.reset(ARM::PC);
-  for (unsigned Register : GPRsNoLRSP.set_bits()) {
-    if (!UsedRegs.contains(Register)) {
-      // Remember the first pop-friendly register and exit.
-      if (PopFriendly.test(Register)) {
-        PopReg = Register;
-        TemporaryReg = 0;
-        break;
-      }
-      // Otherwise, remember that the register will be available to
-      // save a pop-friendly register.
-      TemporaryReg = Register;
+  findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg);
+
+  // If we couldn't find a pop-friendly register, restore LR before popping the
+  // other callee-saved registers, so we can use one of them as a temporary.
+  bool UseLDRSP = false;
+  if (!PopReg && MBBI != MBB.begin()) {
+    auto PrevMBBI = MBBI;
+    PrevMBBI--;
+    if (PrevMBBI->getOpcode() == ARM::tPOP) {
+      MBBI = PrevMBBI;
+      UsedRegs.stepBackward(*MBBI);
+      findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg);
+      UseLDRSP = true;
     }
   }
 
@@ -619,6 +641,26 @@ bool Thumb1FrameLowering::emitPopSpecial
 
   assert((PopReg || TemporaryReg) && "Cannot get LR");
 
+  if (UseLDRSP) {
+    assert(PopReg && "Do not know how to get LR");
+    // Load the LR via LDR tmp, [SP, #off]
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRspi))
+      .addReg(PopReg, RegState::Define)
+      .addReg(ARM::SP)
+      .addImm(MBBI->getNumOperands() - 3)
+      .add(predOps(ARMCC::AL));
+    // Move from the temporary register to the LR.
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+      .addReg(ARM::LR, RegState::Define)
+      .addReg(PopReg, RegState::Kill)
+      .add(predOps(ARMCC::AL));
+    // Advance past the pop instruction.
+    MBBI++;
+    // Increment the SP.
+    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize + 4);
+    return true;
+  }
+
   if (TemporaryReg) {
     assert(!PopReg && "Unnecessary MOV is about to be inserted");
     PopReg = PopFriendly.find_first();
@@ -911,33 +953,29 @@ restoreCalleeSavedRegisters(MachineBasic
 
     if (Reg == ARM::LR) {
       Info.setRestored(false);
-      if (MBB.succ_empty()) {
-        // Special epilogue for vararg functions. See emitEpilogue
-        if (isVarArg)
-          continue;
-        // ARMv4T requires BX, see emitEpilogue
-        if (!STI.hasV5TOps())
-          continue;
-        // Tailcall optimization failed; change TCRETURN to a tBL
-        if (MI->getOpcode() == ARM::TCRETURNdi ||
-            MI->getOpcode() == ARM::TCRETURNri) {
-          unsigned Opcode = MI->getOpcode() == ARM::TCRETURNdi
-                            ? ARM::tBL : ARM::tBLXr;
-          MachineInstrBuilder BL = BuildMI(MF, DL, TII.get(Opcode));
-          BL.add(predOps(ARMCC::AL));
-          BL.add(MI->getOperand(0));
-          MBB.insert(MI, &*BL);
-        }
-        Reg = ARM::PC;
-        (*MIB).setDesc(TII.get(ARM::tPOP_RET));
-        if (MI != MBB.end())
-          MIB.copyImplicitOps(*MI);
-        MI = MBB.erase(MI);
-      } else
+      if (!MBB.succ_empty() ||
+          MI->getOpcode() == ARM::TCRETURNdi ||
+          MI->getOpcode() == ARM::TCRETURNri)
         // LR may only be popped into PC, as part of return sequence.
         // If this isn't the return sequence, we'll need emitPopSpecialFixUp
         // to restore LR the hard way.
+        // FIXME: if we don't pass any stack arguments it would be actually
+        // advantageous *and* correct to do the conversion to an ordinary call
+        // instruction here.
         continue;
+      // Special epilogue for vararg functions. See emitEpilogue
+      if (isVarArg)
+        continue;
+      // ARMv4T requires BX, see emitEpilogue
+      if (!STI.hasV5TOps())
+        continue;
+
+      // Pop LR into PC.
+      Reg = ARM::PC;
+      (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+      if (MI != MBB.end())
+        MIB.copyImplicitOps(*MI);
+      MI = MBB.erase(MI);
     }
     MIB.addReg(Reg, getDefRegState(true));
     NeedsPop = true;

Modified: llvm/trunk/test/CodeGen/ARM/thumb1_return_sequence.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/thumb1_return_sequence.ll?rev=318143&r1=318142&r2=318143&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/thumb1_return_sequence.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/thumb1_return_sequence.ll Tue Nov 14 02:36:52 2017
@@ -25,23 +25,14 @@ entry:
 ; --------
 ; Stack realignment means sp is restored from frame pointer
 ; CHECK-V4T:         mov sp
+; CHECK-V4T-NEXT:    ldr [[POP:r[4567]]], [sp, #{{.*}}]
+; CHECK-V4T-NEXT:    mov lr, [[POP]]
 ; CHECK-V4T-NEXT:    pop {[[SAVED]]}
+; CHECK-V4T-NEXT     add sp, sp, #4
 ; The ISA for v4 does not support pop pc, so make sure we do not emit
 ; one even when we do not need to update SP.
 ; CHECK-V4T-NOT:     pop {pc}
-; We may only use lo register to pop, but in that case, all the scratch
-; ones are used.
-; r12 is the only register we are allowed to clobber for AAPCS.
-; Use it to save a lo register.
-; CHECK-V4T-NEXT:    mov [[TEMP_REG:r12]], [[POP_REG:r[0-7]]]
-; Pop the value of LR.
-; CHECK-V4T-NEXT:    pop {[[POP_REG]]}
-; Copy the value of LR in the right register.
-; CHECK-V4T-NEXT:    mov lr, [[POP_REG]]
-; Restore the value that was in the register we used to pop the value of LR.
-; CHECK-V4T-NEXT:    mov [[POP_REG]], [[TEMP_REG]]
-; Return.
-; CHECK-V4T-NEXT:    bx lr
+; CHECK-V4T:         bx lr
 ; CHECK-V5T:         pop {[[SAVED]], pc}
 }
 
@@ -66,22 +57,18 @@ entry:
 
 ; Epilogue
 ; --------
-; CHECK-V4T:         pop {[[SAVED]]}
-; CHECK-V4T-NEXT:    mov r12, [[POP_REG:r[0-7]]]
-; CHECK-V4T-NEXT:    pop {[[POP_REG]]}
-; CHECK-V4T-NEXT:    add sp,
-; CHECK-V4T-NEXT:    mov lr, [[POP_REG]]
-; CHECK-V4T-NEXT:    mov [[POP_REG]], r12
-; CHECK-V4T:         bx  lr
+; CHECK-V4T:         ldr [[POP:r[4567]]], [sp, #{{.*}}]
+; CHECK-V4T-NEXT:    mov lr, [[POP]]
+; CHECK-V4T-NEXT:    pop {[[SAVED]]}
+; CHECK-V4T-NEXT:    add sp, #16
+; CHECK-V4T-NEXT:    bx  lr
 ; CHECK-V5T:         lsls r4
 ; CHECK-V5T-NEXT:    mov sp, r4
-; CHECK-V5T:         pop {[[SAVED]]}
-; CHECK-V5T-NEXT:    mov r12, [[POP_REG:r[0-7]]]
-; CHECK-V5T-NEXT:    pop {[[POP_REG]]}
-; CHECK-V5T-NEXT:    add sp,
-; CHECK-V5T-NEXT:    mov lr, [[POP_REG]]
-; CHECK-V5T-NEXT:    mov [[POP_REG]], r12
-; CHECK-V5T-NEXT:    bx lr
+; CHECK-V5T:         ldr [[POP:r[4567]]], [sp, #{{.*}}]
+; CHECK-V5T-NEXT:    mov lr, [[POP]]
+; CHECK-V5T-NEXT:    pop {[[SAVED]]}
+; CHECK-V5T-NEXT:    add sp, #16
+; CHECK-V5T-NEXT:    bx  lr
 }
 
 ; CHECK-V4T-LABEL: simpleframe

Modified: llvm/trunk/test/CodeGen/ARM/v8m-tail-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/v8m-tail-call.ll?rev=318143&r1=318142&r2=318143&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/v8m-tail-call.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/v8m-tail-call.ll Tue Nov 14 02:36:52 2017
@@ -1,23 +1,47 @@
 ; RUN: llc %s -o - -mtriple=thumbv8m.base | FileCheck %s
 
-define void @test() {
-; CHECK-LABEL: test:
-entry:
-  %call = tail call i32 @foo()
-  %tail = tail call i32 @foo()
-  ret void
-; CHECK: bl foo
-; CHECK: bl foo
-; CHECK-NOT: b foo
+declare i32 @g(...)
+
+declare i32 @h0(i32, i32, i32, i32)
+define hidden i32 @f0() {
+  %1 = tail call i32 bitcast (i32 (...)* @g to i32 ()*)()
+  %2 = tail call i32 @h0(i32 %1, i32 1, i32 2, i32 3)
+  ret i32 %2
+; CHECK-LABEL: f0
+; CHECK:      ldr     [[POP:r[4567]]], [sp
+; CHECK-NEXT: mov     lr, [[POP]]
+; CHECK-NEXT: pop     {{.*}}[[POP]]
+; CHECK-NEXT: add     sp, #4
+; CHECK-NEXT: b       h0
 }
 
-define void @test2() {
-; CHECK-LABEL: test2:
-entry:
-  %tail = tail call i32 @foo()
-  ret void
-; CHECK: b foo
-; CHECK-NOT: bl foo
+declare i32 @h1(i32)
+define hidden i32 @f1() {
+  %1 = tail call i32 bitcast (i32 (...)* @g to i32 ()*)()
+  %2 = tail call i32 @h1(i32 %1)
+  ret i32 %2
+; CHECK-LABEL: f1
+; CHECK: pop     {r7}
+; CHECK: pop     {r1}
+; CHECK: mov     lr, r1
+; CHECK: b       h1
 }
 
-declare i32 @foo()
+declare i32 @h2(i32, i32, i32, i32, i32)
+define hidden i32 @f2(i32, i32, i32, i32, i32) {
+  %6 = tail call i32 bitcast (i32 (...)* @g to i32 ()*)()
+  %7 = icmp eq i32 %6, 0
+  br i1 %7, label %10, label %8
+
+  %9 = tail call i32 @h2(i32 %6, i32 %1, i32 %2, i32 %3, i32 %4)
+  br label %10
+
+  %11 = phi i32 [ %9, %8 ], [ -1, %5 ]
+  ret i32 %11
+; CHECK-LABEL: f2
+; CHECK:      ldr     [[POP:r[4567]]], [sp
+; CHECK-NEXT: mov     lr, [[POP]]
+; CHECK-NEXT: pop     {{.*}}[[POP]]
+; CHECK-NEXT: add     sp, #4
+; CHECK-NEXT: b       h2
+}

Modified: llvm/trunk/test/CodeGen/Thumb/thumb-shrink-wrapping.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb/thumb-shrink-wrapping.ll?rev=318143&r1=318142&r2=318143&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb/thumb-shrink-wrapping.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb/thumb-shrink-wrapping.ll Tue Nov 14 02:36:52 2017
@@ -647,11 +647,10 @@ define i1 @beq_to_bx(i32* %y, i32 %head)
 ; ENABLE: push {r4, lr}
 
 ; CHECK: tst r3, r4
-; ENABLE-NEXT: pop {r4}
-; ENABLE-NEXT: mov r12, r{{.*}}
-; ENABLE-NEXT: pop {r0}
-; ENABLE-NEXT: mov lr, r0
-; ENABLE-NEXT: mov r0, r12
+; ENABLE-NEXT: ldr [[POP:r[4567]]], [sp, #8]
+; ENABLE-NEXT: mov lr, [[POP]]
+; ENABLE-NEXT: pop {[[POP]]}
+; ENABLE-NEXT: add sp, #4
 ; CHECK-NEXT: beq [[EXIT_LABEL]]
 
 ; CHECK: str r1, [r2]




More information about the llvm-commits mailing list