[llvm] 1354a03 - [PowerPC][Future] Implement PC Relative Tail Calls

Stefan Pintilie via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 27 10:58:18 PDT 2020


Author: Stefan Pintilie
Date: 2020-04-27T12:55:08-05:00
New Revision: 1354a03e74c2f59e424dce12c0592c5ceecb0809

URL: https://github.com/llvm/llvm-project/commit/1354a03e74c2f59e424dce12c0592c5ceecb0809
DIFF: https://github.com/llvm/llvm-project/commit/1354a03e74c2f59e424dce12c0592c5ceecb0809.diff

LOG: [PowerPC][Future] Implement PC Relative Tail Calls

Tail Calls were initially disabled for PC Relative code because it was not safe
to make certain assumptions about the tail calls (namely that all compiled
functions no longer used the TOC pointer in R2). However, once all of the
TOC pointer references have been removed it is safe to tail call everything
that was tail called prior to the PC relative additions as well as a number of
new cases.
For example, it is now possible to tail call indirect functions as there is no
need to save and restore the TOC pointer for indirect functions if the caller
is marked as may clobber R2 (st_other=1). For the same reason it is now also
possible to tail call functions that are external.

Differential Revision: https://reviews.llvm.org/D77788

Added: 
    llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
    llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
    llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll
    llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
    llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
    llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index d4fd0fb77a05..9f79182f518c 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1537,13 +1537,14 @@ void PPCLinuxAsmPrinter::emitFunctionBodyStart() {
     // 3) A function does not use the TOC pointer R2 but does have calls.
     //    In this case st_other=1 since we do not know whether or not any
     //    of the callees clobber R2. This case is dealt with in this else if
-    //    block.
+    //    block. Tail calls are considered calls and the st_other should also
+    //    be set to 1 in that case as well.
     // 4) The function does not use the TOC pointer but R2 is used inside
     //    the function. In this case st_other=1 once again.
     // 5) This function uses inline asm. We mark R2 as reserved if the function
-    //    has inline asm so we have to assume that it may be used.
-    if (MF->getFrameInfo().hasCalls() || MF->hasInlineAsm() ||
-        (!PPCFI->usesTOCBasePtr() && UsesX2OrR2)) {
+    //    has inline asm as we have to assume that it may be used.
+    if (MF->getFrameInfo().hasCalls() || MF->getFrameInfo().hasTailCall() ||
+        MF->hasInlineAsm() || (!PPCFI->usesTOCBasePtr() && UsesX2OrR2)) {
       PPCTargetStreamer *TS =
           static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
       if (TS)

diff  --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 93695a854c24..ad1f16b4820a 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1674,13 +1674,25 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
   DebugLoc dl = MBBI->getDebugLoc();
   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
 
-  // Create branch instruction for pseudo tail call return instruction
+  // Create branch instruction for pseudo tail call return instruction.
+  // The TCRETURNdi variants are direct calls. Valid targets for those are
+  // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
+  // since we can tail call external functions with PC-Rel (i.e. we don't need
+  // to worry about 
diff erent TOC pointers). Some of the external functions will
+  // be MO_GlobalAddress while others like memcpy for example, are going to
+  // be MO_ExternalSymbol.
   unsigned RetOpcode = MBBI->getOpcode();
   if (RetOpcode == PPC::TCRETURNdi) {
     MBBI = MBB.getLastNonDebugInstr();
     MachineOperand &JumpTarget = MBBI->getOperand(0);
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
-      addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+    if (JumpTarget.isGlobal())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+        addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+    else if (JumpTarget.isSymbol())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+        addExternalSymbol(JumpTarget.getSymbolName());
+    else
+      llvm_unreachable("Expecting Global or External Symbol");
   } else if (RetOpcode == PPC::TCRETURNri) {
     MBBI = MBB.getLastNonDebugInstr();
     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
@@ -1692,8 +1704,14 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
   } else if (RetOpcode == PPC::TCRETURNdi8) {
     MBBI = MBB.getLastNonDebugInstr();
     MachineOperand &JumpTarget = MBBI->getOperand(0);
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
-      addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+    if (JumpTarget.isGlobal())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+        addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+    else if (JumpTarget.isSymbol())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+        addExternalSymbol(JumpTarget.getSymbolName());
+    else
+      llvm_unreachable("Expecting Global or External Symbol");
   } else if (RetOpcode == PPC::TCRETURNri8) {
     MBBI = MBB.getLastNonDebugInstr();
     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 7df040776cfc..28c7319e7d6f 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -4780,16 +4780,6 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
     const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
 
-  // FIXME: Tail calls are currently disabled when using PC Relative addressing.
-  // The issue is that PC Relative is only partially implemented and so there
-  // is currently a mix of functions that require the TOC and functions that do
-  // not require it. If we have A calls B calls C and both A and B require the
-  // TOC and C does not and is marked as clobbering R2 then it is not safe for
-  // B to tail call C. Since we do not have the information of whether or not
-  // a funciton needs to use the TOC here in this function we need to be
-  // conservatively safe and disable all tail calls for now.
-  if (Subtarget.isUsingPCRelativeCalls()) return false;
-
   if (DisableSCO && !TailCallOpt) return false;
 
   // Variadic argument functions are not supported.
@@ -4829,15 +4819,22 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
       needStackSlotPassParameters(Subtarget, Outs))
     return false;
 
-  // No TCO/SCO on indirect call because Caller have to restore its TOC
-  if (!isFunctionGlobalAddress(Callee) &&
-      !isa<ExternalSymbolSDNode>(Callee))
+  // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
+  // the caller and callee share the same TOC for TCO/SCO. If the caller and
+  // callee potentially have 
diff erent TOC bases then we cannot tail call since
+  // we need to restore the TOC pointer after the call.
+  // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
+  // We cannot guarantee this for indirect calls or calls to external functions.
+  // When PC-Relative addressing is used, the concept of the TOC is no longer
+  // applicable so this check is not required.
+  // Check first for indirect calls.
+  if (!Subtarget.isUsingPCRelativeCalls() &&
+      !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
     return false;
 
-  // If the caller and callee potentially have 
diff erent TOC bases then we
-  // cannot tail call since we need to restore the TOC pointer after the call.
-  // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
-  if (!callsShareTOCBase(&Caller, Callee, getTargetMachine()))
+  // Check if we share the TOC base.
+  if (!Subtarget.isUsingPCRelativeCalls() &&
+      !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
     return false;
 
   // TCO allows altering callee ABI, so we don't have to check further.
@@ -4849,11 +4846,14 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
   // If callee use the same argument list that caller is using, then we can
   // apply SCO on this case. If it is not, then we need to check if callee needs
   // stack for passing arguments.
-  assert(CB && "Expected to have a CallBase!");
-  if (!hasSameArgumentList(&Caller, *CB) &&
-      needStackSlotPassParameters(Subtarget, Outs)) {
+  // PC Relative tail calls may not have a CallBase.
+  // If there is no CallBase we cannot verify if we have the same argument
+  // list so assume that we don't have the same argument list.
+  if (CB && !hasSameArgumentList(&Caller, *CB) &&
+      needStackSlotPassParameters(Subtarget, Outs))
+    return false;
+  else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
     return false;
-  }
 
   return true;
 }
@@ -5534,13 +5534,18 @@ SDValue PPCTargetLowering::FinishCall(
 
   // Emit tail call.
   if (CFlags.IsTailCall) {
+    // Indirect tail call when using PC Relative calls do not have the same
+    // constraints.
     assert(((Callee.getOpcode() == ISD::Register &&
              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
             Callee.getOpcode() == ISD::TargetExternalSymbol ||
             Callee.getOpcode() == ISD::TargetGlobalAddress ||
-            isa<ConstantSDNode>(Callee)) &&
-           "Expecting a global address, external symbol, absolute value or "
-           "register");
+            isa<ConstantSDNode>(Callee) ||
+            (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
+           "Expecting a global address, external symbol, absolute value, "
+           "register or an indirect tail call when PC Relative calls are "
+           "used.");
+    // PC Relative calls also use TC_RETURN as the way to mark tail calls.
     assert(CallOpc == PPCISD::TC_RETURN &&
            "Unexpected call opcode for a tail call.");
     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
@@ -5598,17 +5603,19 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
         ++NumSiblingCalls;
 
-      assert(isa<GlobalAddressSDNode>(Callee) &&
+      // PC Relative calls no longer guarantee that the callee is a Global
+      // Address Node. The callee could be an indirect tail call in which
+      // case the SDValue for the callee could be a load (to load the address
+      // of a function pointer) or it may be a register copy (to move the
+      // address of the callee from a function parameter into a virtual
+      // register). It may also be an ExternalSymbolSDNode (ex memcopy).
+      assert((Subtarget.isUsingPCRelativeCalls() ||
+              isa<GlobalAddressSDNode>(Callee)) &&
              "Callee should be an llvm::Function object.");
-      LLVM_DEBUG(
-          const GlobalValue *GV =
-              cast<GlobalAddressSDNode>(Callee)->getGlobal();
-          const unsigned Width =
-              80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0");
-          dbgs() << "TCO caller: "
-                 << left_justify(DAG.getMachineFunction().getName(), Width)
-                 << ", callee linkage: " << GV->getVisibility() << ", "
-                 << GV->getLinkage() << "\n");
+
+      LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
+                        << "\nTCO callee: ");
+      LLVM_DEBUG(Callee.dump());
     }
   }
 

diff  --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index add4de24275f..236f98f32e18 100644
--- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -86,14 +86,22 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
     RefKind = MCSymbolRefExpr::VK_PPC_GOT_PCREL;
 
   const MachineInstr *MI = MO.getParent();
-
-  if (MI->getOpcode() == PPC::BL8_NOTOC)
-    RefKind = MCSymbolRefExpr::VK_PPC_NOTOC;
-
   const MachineFunction *MF = MI->getMF();
   const Module *M = MF->getFunction().getParent();
   const PPCSubtarget *Subtarget = &(MF->getSubtarget<PPCSubtarget>());
   const TargetMachine &TM = Printer.TM;
+
+  unsigned MIOpcode = MI->getOpcode();
+  assert((Subtarget->isUsingPCRelativeCalls() || MIOpcode != PPC::BL8_NOTOC) &&
+         "BL8_NOTOC is only valid when using PC Relative Calls.");
+  if (Subtarget->isUsingPCRelativeCalls()) {
+    if (MIOpcode == PPC::TAILB || MIOpcode == PPC::TAILB8 ||
+        MIOpcode == PPC::TCRETURNdi || MIOpcode == PPC::TCRETURNdi8 ||
+        MIOpcode == PPC::BL8_NOTOC) {
+      RefKind = MCSymbolRefExpr::VK_PPC_NOTOC;
+    }
+  }
+
   const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx);
   // If -msecure-plt -fPIC, add 32768 to symbol.
   if (Subtarget->isSecurePlt() && TM.isPositionIndependent() &&

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll
index c145b5c4378d..957a2d5e48f2 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll
@@ -7,13 +7,11 @@
 
 
 ; CHECK-S-LABEL: caller
-; CHECK-S: bl callee at notoc
-; CHECK-S: blr
+; CHECK-S: b callee at notoc
 
 ; CHECK-O-LABEL: caller
-; CHECK-O: bl
+; CHECK-O: b
 ; CHECK-O-NEXT: R_PPC64_REL24_NOTOC callee
-; CHECK-O: blr
 define dso_local signext i32 @caller() local_unnamed_addr {
 entry:
   %call = tail call signext i32 bitcast (i32 (...)* @callee to i32 ()*)()
@@ -25,13 +23,11 @@ declare signext i32 @callee(...) local_unnamed_addr
 
 ; Some calls can be considered Extrnal Symbols.
 ; CHECK-S-LABEL: ExternalSymbol
-; CHECK-S: bl memcpy at notoc
-; CHECK-S: blr
+; CHECK-S: b memcpy at notoc
 
 ; CHECK-O-LABEL: ExternalSymbol
-; CHECK-O: bl
+; CHECK-O: b
 ; CHECK-O-NEXT: R_PPC64_REL24_NOTOC memcpy
-; CHECK-O: blr
 define dso_local void @ExternalSymbol(i8* nocapture %out, i8* nocapture readonly %in, i64 %num) local_unnamed_addr {
 entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %out, i8* align 1 %in, i64 %num, i1 false)

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
index 010704f546d0..bb6b5052ee58 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
@@ -193,19 +193,10 @@ define dso_local signext i32 @TailCallLocal1(i32 signext %a) local_unnamed_addr
 ; CHECK-ALL-LABEL: TailCallLocal1:
 ; CHECK-S:         .localentry     TailCallLocal1
 ; CHECK-S:       # %bb.0: # %entry
-; CHECK-S-NEXT:    mflr r0
-; CHECK-S-NEXT:    std r0, 16(r1)
-; CHECK-S-NEXT:    stdu r1, -32(r1)
-; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-S-NEXT:    .cfi_offset lr, 16
-; CHECK-S-NEXT:    plwz r4, globalVar at PCREL(0), 1
+; CHECK-S:         plwz r4, globalVar at PCREL(0), 1
 ; CHECK-S-NEXT:    add r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
-; CHECK-S-NEXT:    bl localCall at notoc
-; CHECK-S-NEXT:    addi r1, r1, 32
-; CHECK-S-NEXT:    ld r0, 16(r1)
-; CHECK-S-NEXT:    mtlr r0
-; CHECK-S-NEXT:    blr
+; CHECK-S-NEXT:    b localCall at notoc
 entry:
   %0 = load i32, i32* @globalVar, align 4
   %add = add nsw i32 %0, %a
@@ -217,20 +208,11 @@ define dso_local signext i32 @TailCallLocal2(i32 signext %a) local_unnamed_addr
 ; CHECK-ALL-LABEL: TailCallLocal2:
 ; CHECK-S:         .localentry     TailCallLocal2
 ; CHECK-S:       # %bb.0: # %entry
-; CHECK-S-NEXT:    mflr r0
-; CHECK-S-NEXT:    std r0, 16(r1)
-; CHECK-S-NEXT:    stdu r1, -32(r1)
-; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-S-NEXT:    .cfi_offset lr, 16
-; CHECK-S-NEXT:    pld r4, externGlobalVar at got@pcrel(0), 1
+; CHECK-S:         pld r4, externGlobalVar at got@pcrel(0), 1
 ; CHECK-S-NEXT:    lwz r4, 0(r4)
 ; CHECK-S-NEXT:    add r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
-; CHECK-S-NEXT:    bl localCall at notoc
-; CHECK-S-NEXT:    addi r1, r1, 32
-; CHECK-S-NEXT:    ld r0, 16(r1)
-; CHECK-S-NEXT:    mtlr r0
-; CHECK-S-NEXT:    blr
+; CHECK-S-NEXT:    b localCall at notoc
 entry:
   %0 = load i32, i32* @externGlobalVar, align 4
   %add = add nsw i32 %0, %a
@@ -243,16 +225,7 @@ define dso_local signext i32 @TailCallLocalNoGlobal(i32 signext %a) local_unname
 ; CHECK-S:         .localentry TailCallLocalNoGlobal, 1
 ; CHECK-P9:        .localentry TailCallLocalNoGlobal, .Lfunc_lep9-.Lfunc_gep9
 ; CHECK-ALL:       # %bb.0: # %entry
-; CHECK-S-NEXT:    mflr r0
-; CHECK-S-NEXT:    std r0, 16(r1)
-; CHECK-S-NEXT:    stdu r1, -32(r1)
-; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-S-NEXT:    .cfi_offset lr, 16
-; CHECK-S-NEXT:    bl localCall at notoc
-; CHECK-S-NEXT:    addi r1, r1, 32
-; CHECK-S-NEXT:    ld r0, 16(r1)
-; CHECK-S-NEXT:    mtlr r0
-; CHECK-S-NEXT:    blr
+; CHECK-S:         b localCall at notoc
 entry:
   %call = tail call signext i32 @localCall(i32 signext %a)
   ret i32 %call
@@ -262,19 +235,10 @@ define dso_local signext i32 @TailCallExtern1(i32 signext %a) local_unnamed_addr
 ; CHECK-ALL-LABEL: TailCallExtern1:
 ; CHECK-S:         .localentry     TailCallExtern1
 ; CHECK-S:       # %bb.0: # %entry
-; CHECK-S-NEXT:    mflr r0
-; CHECK-S-NEXT:    std r0, 16(r1)
-; CHECK-S-NEXT:    stdu r1, -32(r1)
-; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-S-NEXT:    .cfi_offset lr, 16
-; CHECK-S-NEXT:    plwz r4, globalVar at PCREL(0), 1
+; CHECK-S:         plwz r4, globalVar at PCREL(0), 1
 ; CHECK-S-NEXT:    add r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
-; CHECK-S-NEXT:    bl externCall at notoc
-; CHECK-S-NEXT:    addi r1, r1, 32
-; CHECK-S-NEXT:    ld r0, 16(r1)
-; CHECK-S-NEXT:    mtlr r0
-; CHECK-S-NEXT:    blr
+; CHECK-S-NEXT:    b externCall at notoc
 entry:
   %0 = load i32, i32* @globalVar, align 4
   %add = add nsw i32 %0, %a
@@ -286,20 +250,11 @@ define dso_local signext i32 @TailCallExtern2(i32 signext %a) local_unnamed_addr
 ; CHECK-ALL-LABEL: TailCallExtern2:
 ; CHECK-S:         .localentry     TailCallExtern2
 ; CHECK-S:       # %bb.0: # %entry
-; CHECK-S-NEXT:    mflr r0
-; CHECK-S-NEXT:    std r0, 16(r1)
-; CHECK-S-NEXT:    stdu r1, -32(r1)
-; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-S-NEXT:    .cfi_offset lr, 16
-; CHECK-S-NEXT:    pld r4, externGlobalVar at got@pcrel(0), 1
+; CHECK-S:         pld r4, externGlobalVar at got@pcrel(0), 1
 ; CHECK-S-NEXT:    lwz r4, 0(r4)
 ; CHECK-S-NEXT:    add r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
-; CHECK-S-NEXT:    bl externCall at notoc
-; CHECK-S-NEXT:    addi r1, r1, 32
-; CHECK-S-NEXT:    ld r0, 16(r1)
-; CHECK-S-NEXT:    mtlr r0
-; CHECK-S-NEXT:    blr
+; CHECK-S-NEXT:    b externCall at notoc
 entry:
   %0 = load i32, i32* @externGlobalVar, align 4
   %add = add nsw i32 %0, %a
@@ -311,16 +266,8 @@ define dso_local signext i32 @TailCallExternNoGlobal(i32 signext %a) local_unnam
 ; CHECK-ALL-LABEL: TailCallExternNoGlobal:
 ; CHECK-S:         .localentry TailCallExternNoGlobal, 1
 ; CHECK-S-NEXT:  # %bb.0: # %entry
-; CHECK-S-NEXT:    mflr r0
-; CHECK-S-NEXT:    std r0, 16(r1)
-; CHECK-S-NEXT:    stdu r1, -32(r1)
-; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-S-NEXT:    .cfi_offset lr, 16
-; CHECK-S-NEXT:    bl externCall at notoc
-; CHECK-S-NEXT:    addi r1, r1, 32
-; CHECK-S-NEXT:    ld r0, 16(r1)
-; CHECK-S-NEXT:    mtlr r0
-; CHECK-S-NEXT:    blr
+; CHECK-S-NEXT:    b externCall at notoc
+; CHECK-S-NEXT:    #TC_RETURNd8 externCall at notoc
 entry:
   %call = tail call signext i32 @externCall(i32 signext %a)
   ret i32 %call
@@ -443,18 +390,10 @@ entry:
 define dso_local signext i32 @IndirectCallOnly(i32 signext %a, i32 (i32)* nocapture %call_param) local_unnamed_addr {
 ; CHECK-ALL-LABEL: IndirectCallOnly:
 ; CHECK-S:       # %bb.0: # %entry
-; CHECK-S-NEXT:    mflr r0
-; CHECK-S-NEXT:    std r0, 16(r1)
-; CHECK-S-NEXT:    stdu r1, -32(r1)
-; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-S-NEXT:    .cfi_offset lr, 16
 ; CHECK-S-NEXT:    mtctr r4
 ; CHECK-S-NEXT:    mr r12, r4
-; CHECK-S-NEXT:    bctrl
-; CHECK-S-NEXT:    addi r1, r1, 32
-; CHECK-S-NEXT:    ld r0, 16(r1)
-; CHECK-S-NEXT:    mtlr r0
-; CHECK-S-NEXT:    blr
+; CHECK-S-NEXT:    bctr
+; CHECK-S-NEXT:    #TC_RETURNr8 ctr
 entry:
   %call = tail call signext i32 %call_param(i32 signext %a)
   ret i32 %call

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
index a3404a8951a1..7f7659b356ee 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
@@ -215,20 +215,13 @@ entry:
 
 define dso_local void @ReadFuncPtr() local_unnamed_addr  {
 ; CHECK-LABEL: ReadFuncPtr:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
-; CHECK-NEXT:    stdu r1, -32(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK:         .localentry ReadFuncPtr, 1
+; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    pld r3, ptrfunc at got@pcrel(0), 1
 ; CHECK-NEXT:    ld r12, 0(r3)
 ; CHECK-NEXT:    mtctr r12
-; CHECK-NEXT:    bctrl
-; CHECK-NEXT:    addi r1, r1, 32
-; CHECK-NEXT:    ld r0, 16(r1)
-; CHECK-NEXT:    mtlr r0
-; CHECK-NEXT:    blr
+; CHECK-NEXT:    bctr
+; CHECK-NEXT:    #TC_RETURNr8 ctr 0
 entry:
   %0 = load void ()*, void ()** bitcast (void (...)** @ptrfunc to void ()**), align 8
   tail call void %0()

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll b/llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll
index 7806d691c530..d7df6f10a6be 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll
@@ -9,20 +9,10 @@
 define dso_local void @IndirectCallExternFuncPtr(void ()* nocapture %ptrfunc) {
 ; CHECK-LABEL: IndirectCallExternFuncPtr:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mflr r0
-; CHECK-NEXT:    std r0, 16(r1)
-; CHECK-NEXT:    stdu r1, -32(r1)
-
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    mtctr r3
 ; CHECK-NEXT:    mr r12, r3
-; CHECK-NEXT:    bctrl
-
-; CHECK-NEXT:    addi r1, r1, 32
-; CHECK-NEXT:    ld r0, 16(r1)
-; CHECK-NEXT:    mtlr r0
-; CHECK-NEXT:    blr
+; CHECK-NEXT:    bctr
+; CHECK-NEXT:    #TC_RETURNr8 ctr
 entry:
   tail call void %ptrfunc()
   ret void

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
new file mode 100644
index 000000000000..78b01601f1fa
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
@@ -0,0 +1,237 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; The tests check the behaviour of PC Relative tail calls. When using
+; PC Relative we are able to do more tail calling than we have done in
+; the past as we no longer need to restore the TOC pointer into R2 after
+; most calls.
+
+ at Func = external local_unnamed_addr global i32 (...)*, align 8
+ at FuncLocal = common dso_local local_unnamed_addr global i32 (...)* null, align 8
+
+; No calls in this function but we assign the function pointers.
+define dso_local void @AssignFuncPtr() local_unnamed_addr {
+; CHECK-LABEL: AssignFuncPtr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, Func at got@pcrel(0), 1
+; CHECK-NEXT:    pld r4, Function at got@pcrel(0), 1
+; CHECK-NEXT:    std r4, 0(r3)
+; CHECK-NEXT:    pstd r4, FuncLocal at PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store i32 (...)* @Function, i32 (...)** @Func, align 8
+  store i32 (...)* @Function, i32 (...)** @FuncLocal, align 8
+  ret void
+}
+
+declare signext i32 @Function(...)
+
+define dso_local void @TailCallLocalFuncPtr() local_unnamed_addr {
+; CHECK-LABEL: TailCallLocalFuncPtr:
+; CHECK:         .localentry TailCallLocalFuncPtr, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    pld r12, FuncLocal at PCREL(0), 1
+; CHECK-NEXT:    mtctr r12
+; CHECK-NEXT:    bctr
+; CHECK-NEXT:    #TC_RETURNr8 ctr 0
+entry:
+  %0 = load i32 ()*, i32 ()** bitcast (i32 (...)** @FuncLocal to i32 ()**), align 8
+  %call = tail call signext i32 %0()
+  ret void
+}
+
+define dso_local void @TailCallExtrnFuncPtr() local_unnamed_addr {
+; CHECK-LABEL: TailCallExtrnFuncPtr:
+; CHECK:         .localentry TailCallExtrnFuncPtr, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, Func at got@pcrel(0), 1
+; CHECK-NEXT:    ld r12, 0(r3)
+; CHECK-NEXT:    mtctr r12
+; CHECK-NEXT:    bctr
+; CHECK-NEXT:    #TC_RETURNr8 ctr 0
+entry:
+  %0 = load i32 ()*, i32 ()** bitcast (i32 (...)** @Func to i32 ()**), align 8
+  %call = tail call signext i32 %0()
+  ret void
+}
+
+define dso_local signext i32 @TailCallParamFuncPtr(i32 (...)* nocapture %passedfunc) local_unnamed_addr {
+; CHECK-LABEL: TailCallParamFuncPtr:
+; CHECK:         .localentry TailCallParamFuncPtr, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    mtctr r3
+; CHECK-NEXT:    mr r12, r3
+; CHECK-NEXT:    bctr
+; CHECK-NEXT:    #TC_RETURNr8 ctr 0
+entry:
+  %callee.knr.cast = bitcast i32 (...)* %passedfunc to i32 ()*
+  %call = tail call signext i32 %callee.knr.cast()
+  ret i32 %call
+}
+
+define dso_local signext i32 @NoTailIndirectCall(i32 (...)* nocapture %passedfunc, i32 signext %a) local_unnamed_addr {
+; CHECK-LABEL: NoTailIndirectCall:
+; CHECK:         .localentry NoTailIndirectCall, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset r30, -16
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    mtctr r3
+; CHECK-NEXT:    mr r12, r3
+; CHECK-NEXT:    mr r30, r4
+; CHECK-NEXT:    bctrl
+; CHECK-NEXT:    add r3, r3, r30
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    addi r1, r1, 48
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+entry:
+  %callee.knr.cast = bitcast i32 (...)* %passedfunc to i32 ()*
+  %call = tail call signext i32 %callee.knr.cast()
+  %add = add nsw i32 %call, %a
+  ret i32 %add
+}
+
+define dso_local signext i32 @TailCallDirect() local_unnamed_addr {
+; CHECK-LABEL: TailCallDirect:
+; CHECK:         .localentry TailCallDirect, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    b Function at notoc
+; CHECK-NEXT:    #TC_RETURNd8 Function at notoc 0
+entry:
+  %call = tail call signext i32 bitcast (i32 (...)* @Function to i32 ()*)()
+  ret i32 %call
+}
+
+define dso_local signext i32 @NoTailCallDirect(i32 signext %a) local_unnamed_addr {
+; CHECK-LABEL: NoTailCallDirect:
+; CHECK:         .localentry NoTailCallDirect, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset r30, -16
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    mr r30, r3
+; CHECK-NEXT:    bl Function at notoc
+; CHECK-NEXT:    add r3, r3, r30
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    addi r1, r1, 48
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+entry:
+  %call = tail call signext i32 bitcast (i32 (...)* @Function to i32 ()*)()
+  %add = add nsw i32 %call, %a
+  ret i32 %add
+}
+
+define dso_local signext i32 @TailCallDirectLocal() local_unnamed_addr {
+; CHECK-LABEL: TailCallDirectLocal:
+; CHECK:         .localentry TailCallDirectLocal, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    b LocalFunction at notoc
+; CHECK-NEXT:    #TC_RETURNd8 LocalFunction at notoc 0
+entry:
+  %call = tail call fastcc signext i32 @LocalFunction()
+  ret i32 %call
+}
+
+define dso_local signext i32 @NoTailCallDirectLocal(i32 signext %a) local_unnamed_addr {
+; CHECK-LABEL: NoTailCallDirectLocal:
+; CHECK:         .localentry NoTailCallDirectLocal, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset r30, -16
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    mr r30, r3
+; CHECK-NEXT:    bl LocalFunction at notoc
+; CHECK-NEXT:    add r3, r3, r30
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    addi r1, r1, 48
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+entry:
+  %call = tail call fastcc signext i32 @LocalFunction()
+  %add = add nsw i32 %call, %a
+  ret i32 %add
+}
+
+define dso_local signext i32 @TailCallAbs() local_unnamed_addr {
+; CHECK-LABEL: TailCallAbs:
+; CHECK:         .localentry TailCallAbs, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    li r3, 400
+; CHECK-NEXT:    mtctr r3
+; CHECK-NEXT:    li r12, 400
+; CHECK-NEXT:    bctr
+; CHECK-NEXT:    #TC_RETURNr8 ctr 0
+entry:
+  %call = tail call signext i32 inttoptr (i64 400 to i32 ()*)()
+  ret i32 %call
+}
+
+define dso_local signext i32 @NoTailCallAbs(i32 signext %a) local_unnamed_addr {
+; CHECK-LABEL: NoTailCallAbs:
+; CHECK:         .localentry NoTailCallAbs, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset r30, -16
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    mr r30, r3
+; CHECK-NEXT:    li r3, 400
+; CHECK-NEXT:    mtctr r3
+; CHECK-NEXT:    li r12, 400
+; CHECK-NEXT:    bctrl
+; CHECK-NEXT:    add r3, r3, r30
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    addi r1, r1, 48
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+entry:
+  %call = tail call signext i32 inttoptr (i64 400 to i32 ()*)()
+  %add = add nsw i32 %call, %a
+  ret i32 %add
+}
+
+; Function Attrs: noinline
+; This function should be tail called and not inlined.
+define internal fastcc signext i32 @LocalFunction() unnamed_addr #0 {
+; CHECK-LABEL: LocalFunction:
+; CHECK:         .localentry LocalFunction, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    li r3, 42
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call i32 asm "li $0, 42", "=&r"()
+  ret i32 %0
+}
+
+attributes #0 = { noinline }
+


        


More information about the llvm-commits mailing list