[llvm] 6c4b40d - [PowerPC][Future] Add Support For Functions That Do Not Use A TOC.

Kamau Bridgeman via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 8 06:08:23 PDT 2020


Author: Stefan Pintilie
Date: 2020-04-08T08:07:35-05:00
New Revision: 6c4b40def77622a5cf62a219ef4af63dc876e144

URL: https://github.com/llvm/llvm-project/commit/6c4b40def77622a5cf62a219ef4af63dc876e144
DIFF: https://github.com/llvm/llvm-project/commit/6c4b40def77622a5cf62a219ef4af63dc876e144.diff

LOG: [PowerPC][Future] Add Support For Functions That Do Not Use A TOC.

On PowerPC most functions require a valid TOC pointer.

This is the case because either the function itself needs to use this
pointer to access the TOC or because other functions that are called
from that function expect a valid TOC pointer in the register R2.
The main exception to this is leaf functions that do not access the TOC
since they are guaranteed not to need a valid TOC pointer.

This patch introduces a feature that will allow more functions to not
require a valid TOC pointer in R2.

Differential Revision: https://reviews.llvm.org/D73664

Added: 
    llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
    llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll
    llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll

Modified: 
    llvm/include/llvm/BinaryFormat/ELF.h
    llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def
    llvm/include/llvm/MC/MCExpr.h
    llvm/lib/MC/MCExpr.cpp
    llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
    llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
    llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
    llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
    llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
    llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCISelLowering.h
    llvm/lib/Target/PowerPC/PPCInstr64Bit.td
    llvm/lib/Target/PowerPC/PPCInstrInfo.td
    llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
    llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
    llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
    llvm/lib/Target/PowerPC/PPCScheduleP9.td
    llvm/lib/Target/PowerPC/PPCSubtarget.cpp
    llvm/lib/Target/PowerPC/PPCSubtarget.h
    llvm/test/MC/PowerPC/ppc64-localentry-error1.s
    llvm/test/MC/PowerPC/ppc64-localentry-error2.s

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 9bf5a3c99a55..1b0412bc47be 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -393,12 +393,6 @@ static inline int64_t decodePPC64LocalEntryOffset(unsigned Other) {
   unsigned Val = (Other & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT;
   return ((1 << Val) >> 2) << 2;
 }
-static inline unsigned encodePPC64LocalEntryOffset(int64_t Offset) {
-  unsigned Val =
-      (Offset >= 4 * 4 ? (Offset >= 8 * 4 ? (Offset >= 16 * 4 ? 6 : 5) : 4)
-                       : (Offset >= 2 * 4 ? 3 : (Offset >= 1 * 4 ? 2 : 0)));
-  return Val << STO_PPC64_LOCAL_BIT;
-}
 
 // ELF Relocation types for PPC64
 enum {

diff  --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def
index 719d0c9c36ac..f8c330e2cf3f 100644
--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def
+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def
@@ -96,6 +96,7 @@
 #undef R_PPC64_TPREL16_HIGHA
 #undef R_PPC64_DTPREL16_HIGH
 #undef R_PPC64_DTPREL16_HIGHA
+#undef R_PPC64_REL24_NOTOC
 #undef R_PPC64_IRELATIVE
 #undef R_PPC64_REL16
 #undef R_PPC64_REL16_LO
@@ -190,6 +191,7 @@ ELF_RELOC(R_PPC64_TPREL16_HIGH,         112)
 ELF_RELOC(R_PPC64_TPREL16_HIGHA,        113)
 ELF_RELOC(R_PPC64_DTPREL16_HIGH,        114)
 ELF_RELOC(R_PPC64_DTPREL16_HIGHA,       115)
+ELF_RELOC(R_PPC64_REL24_NOTOC,          116)
 ELF_RELOC(R_PPC64_IRELATIVE,            248)
 ELF_RELOC(R_PPC64_REL16,                249)
 ELF_RELOC(R_PPC64_REL16_LO,             250)

diff  --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h
index c0060c8610e7..386fa9c533c1 100644
--- a/llvm/include/llvm/MC/MCExpr.h
+++ b/llvm/include/llvm/MC/MCExpr.h
@@ -284,6 +284,7 @@ class MCSymbolRefExpr : public MCExpr {
     VK_PPC_GOT_TLSLD_HA,   // symbol at got@tlsld at ha
     VK_PPC_TLSLD,          // symbol at tlsld
     VK_PPC_LOCAL,          // symbol at local
+    VK_PPC_NOTOC,          // symbol at notoc
 
     VK_COFF_IMGREL32, // symbol at imgrel (image-relative)
 

diff  --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index 2ddbf944f8ed..091880250ec5 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -319,6 +319,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_PPC_GOT_TLSLD_HA: return "got at tlsld@ha";
   case VK_PPC_TLSLD: return "tlsld";
   case VK_PPC_LOCAL: return "local";
+  case VK_PPC_NOTOC: return "notoc";
   case VK_COFF_IMGREL32: return "IMGREL";
   case VK_Hexagon_LO16: return "LO16";
   case VK_Hexagon_HI16: return "HI16";
@@ -432,6 +433,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
     .Case("got at tlsld@l", VK_PPC_GOT_TLSLD_LO)
     .Case("got at tlsld@h", VK_PPC_GOT_TLSLD_HI)
     .Case("got at tlsld@ha", VK_PPC_GOT_TLSLD_HA)
+    .Case("notoc", VK_PPC_NOTOC)
     .Case("gdgot", VK_Hexagon_GD_GOT)
     .Case("gdplt", VK_Hexagon_GD_PLT)
     .Case("iegot", VK_Hexagon_IE_GOT)

diff  --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 7320c1e6f51d..cb7d4293e0f1 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -39,6 +39,7 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
     return Value & 0xfffc;
   case PPC::fixup_ppc_br24:
   case PPC::fixup_ppc_br24abs:
+  case PPC::fixup_ppc_br24_notoc:
     return Value & 0x3fffffc;
   case PPC::fixup_ppc_half16:
     return Value & 0xffff;
@@ -62,6 +63,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
   case PPC::fixup_ppc_brcond14abs:
   case PPC::fixup_ppc_br24:
   case PPC::fixup_ppc_br24abs:
+  case PPC::fixup_ppc_br24_notoc:
     return 4;
   case FK_Data_8:
     return 8;
@@ -88,6 +90,7 @@ class PPCAsmBackend : public MCAsmBackend {
     const static MCFixupKindInfo InfosBE[PPC::NumTargetFixupKinds] = {
       // name                    offset  bits  flags
       { "fixup_ppc_br24",        6,      24,   MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_ppc_br24_notoc",  6,      24,   MCFixupKindInfo::FKF_IsPCRel },
       { "fixup_ppc_brcond14",    16,     14,   MCFixupKindInfo::FKF_IsPCRel },
       { "fixup_ppc_br24abs",     6,      24,   0 },
       { "fixup_ppc_brcond14abs", 16,     14,   0 },
@@ -98,6 +101,7 @@ class PPCAsmBackend : public MCAsmBackend {
     const static MCFixupKindInfo InfosLE[PPC::NumTargetFixupKinds] = {
       // name                    offset  bits  flags
       { "fixup_ppc_br24",        2,      24,   MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_ppc_br24_notoc",  2,      24,   MCFixupKindInfo::FKF_IsPCRel },
       { "fixup_ppc_brcond14",    2,      14,   MCFixupKindInfo::FKF_IsPCRel },
       { "fixup_ppc_br24abs",     2,      24,   0 },
       { "fixup_ppc_brcond14abs", 2,      14,   0 },
@@ -151,6 +155,7 @@ class PPCAsmBackend : public MCAsmBackend {
       return Kind >= FirstLiteralRelocationKind;
     case PPC::fixup_ppc_br24:
     case PPC::fixup_ppc_br24abs:
+    case PPC::fixup_ppc_br24_notoc:
       // If the target symbol has a local entry point we must not attempt
       // to resolve the fixup directly.  Emit a relocation and leave
       // resolution of the final target address to the linker.

diff  --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index c0379620a197..99a8207832b0 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -86,6 +86,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
       llvm_unreachable("Unimplemented");
     case PPC::fixup_ppc_br24:
     case PPC::fixup_ppc_br24abs:
+    case PPC::fixup_ppc_br24_notoc:
       switch (Modifier) {
       default: llvm_unreachable("Unsupported Modifier");
       case MCSymbolRefExpr::VK_None:
@@ -97,6 +98,9 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
       case MCSymbolRefExpr::VK_PPC_LOCAL:
         Type = ELF::R_PPC_LOCAL24PC;
         break;
+      case MCSymbolRefExpr::VK_PPC_NOTOC:
+        Type = ELF::R_PPC64_REL24_NOTOC;
+        break;
       }
       break;
     case PPC::fixup_ppc_brcond14:
@@ -431,6 +435,7 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
       return false;
 
     case ELF::R_PPC_REL24:
+    case ELF::R_PPC64_REL24_NOTOC:
       // If the target symbol has a local entry point, we must keep the
       // target symbol to preserve that information for the linker.
       // The "other" values are stored in the last 6 bits of the second byte.

diff  --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 845489788c86..efa7f0ad86d9 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -19,6 +19,10 @@ enum Fixups {
   // 24-bit PC relative relocation for direct branches like 'b' and 'bl'.
   fixup_ppc_br24 = FirstTargetFixupKind,
 
+  // 24-bit PC relative relocation for direct branches like 'b' and 'bl' where
+  // the caller does not use the TOC.
+  fixup_ppc_br24_notoc,
+
   /// 14-bit PC relative relocation for conditional branches.
   fixup_ppc_brcond14,
 

diff  --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 672f6e02ce93..06df3bde48f6 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -48,7 +48,9 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
 
   // Add a fixup for the branch target.
   Fixups.push_back(MCFixup::create(0, MO.getExpr(),
-                                   (MCFixupKind)PPC::fixup_ppc_br24));
+                                   ((MI.getOpcode() == PPC::BL8_NOTOC)
+                                        ? (MCFixupKind)PPC::fixup_ppc_br24_notoc
+                                        : (MCFixupKind)PPC::fixup_ppc_br24)));
   return 0;
 }
 

diff  --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 9d3ecc07b3b2..c85b08ad77e0 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -179,13 +179,9 @@ class PPCTargetELFStreamer : public PPCTargetStreamer {
   void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
     MCAssembler &MCA = getStreamer().getAssembler();
 
-    int64_t Res;
-    if (!LocalOffset->evaluateAsAbsolute(Res, MCA))
-      report_fatal_error(".localentry expression must be absolute.");
-
-    unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res);
-    if (Res != ELF::decodePPC64LocalEntryOffset(Encoded))
-      report_fatal_error(".localentry expression cannot be encoded.");
+    // encodePPC64LocalEntryOffset will report an error if it cannot
+    // encode LocalOffset.
+    unsigned Encoded = encodePPC64LocalEntryOffset(LocalOffset);
 
     unsigned Other = S->getOther();
     Other &= ~ELF::STO_PPC64_LOCAL_MASK;
@@ -230,6 +226,31 @@ class PPCTargetELFStreamer : public PPCTargetStreamer {
     D->setOther(Other);
     return true;
   }
+
+  unsigned encodePPC64LocalEntryOffset(const MCExpr *LocalOffset) {
+    MCAssembler &MCA = getStreamer().getAssembler();
+    int64_t Offset;
+    if (!LocalOffset->evaluateAsAbsolute(Offset, MCA))
+      MCA.getContext().reportFatalError(
+          LocalOffset->getLoc(), ".localentry expression must be absolute.");
+
+    switch (Offset) {
+    default:
+      MCA.getContext().reportFatalError(
+          LocalOffset->getLoc(),
+          ".localentry expression is not a valid power of 2.");
+    case 0:
+      return 0;
+    case 1:
+      return 1 << ELF::STO_PPC64_LOCAL_BIT;
+    case 4:
+    case 8:
+    case 16:
+    case 32:
+    case 64:
+      return (int)Log2(Offset) << (int)ELF::STO_PPC64_LOCAL_BIT;
+    }
+  }
 };
 
 class PPCTargetMachOStreamer : public PPCTargetStreamer {

diff  --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 0a256cb5876b..e0bbb8f8dd3e 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1460,14 +1460,16 @@ void PPCLinuxAsmPrinter::emitFunctionBodyStart() {
   //
   // This ensures we have r2 set up correctly while executing the function
   // body, no matter which entry point is called.
-  if (Subtarget->isELFv2ABI()
-      // Only do all that if the function uses r2 in the first place.
-      && !MF->getRegInfo().use_empty(PPC::X2)) {
+  const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
+  const bool UsesX2OrR2 = !MF->getRegInfo().use_empty(PPC::X2) ||
+                          !MF->getRegInfo().use_empty(PPC::R2);
+  // Only do all that if the function uses R2 as the TOC pointer
+  // in the first place. We don't need the global entry point if the
+  // function uses R2 as an allocatable register.
+  if (Subtarget->isELFv2ABI() && UsesX2OrR2 && PPCFI->usesTOCBasePtr()) {
     // Note: The logic here must be synchronized with the code in the
     // branch-selection pass which sets the offset of the first block in the
     // function. This matters because it affects the alignment.
-    const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
-
     MCSymbol *GlobalEntryLabel = PPCFI->getGlobalEPSymbol();
     OutStreamer->emitLabel(GlobalEntryLabel);
     const MCSymbolRefExpr *GlobalEntryLabelExp =
@@ -1519,6 +1521,35 @@ void PPCLinuxAsmPrinter::emitFunctionBodyStart() {
 
     if (TS)
       TS->emitLocalEntry(cast<MCSymbolELF>(CurrentFnSym), LocalOffsetExp);
+  } else if (Subtarget->isELFv2ABI()) {
+    // When generating the entry point for a function we have a few scenarios
+    // based on whether or not that function uses R2 and whether or not that
+    // function makes calls (or is a leaf function).
+    // 1) A leaf function that does not use R2 (or treats it as callee-saved
+    //    and preserves it). In this case st_other=0 and both
+    //    the local and global entry points for the function are the same.
+    //    No special entry point code is required.
+    // 2) A function uses the TOC pointer R2. This function may or may not have
+    //    calls. In this case st_other=[2,6] and the global and local entry
+    //    points are 
diff erent. Code to correctly setup the TOC pointer in R2
+    //    is put between the global and local entry points. This case is
+    //    covered by the if statatement above.
+    // 3) A function does not use the TOC pointer R2 but does have calls.
+    //    In this case st_other=1 since we do not know whether or not any
+    //    of the callees clobber R2. This case is dealt with in this else if
+    //    block.
+    // 4) The function does not use the TOC pointer but R2 is used inside
+    //    the function. In this case st_other=1 once again.
+    // 5) This function uses inline asm. We mark R2 as reserved if the function
+    //    has inline asm so we have to assume that it may be used.
+    if (MF->getFrameInfo().hasCalls() || MF->hasInlineAsm() ||
+        (!PPCFI->usesTOCBasePtr() && UsesX2OrR2)) {
+      PPCTargetStreamer *TS =
+          static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
+      if (TS)
+        TS->emitLocalEntry(cast<MCSymbolELF>(CurrentFnSym),
+                           MCConstantExpr::create(1, OutContext));
+    }
   }
 }
 

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 3d8441907c04..db81a6c2cb70 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1404,6 +1404,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
   case PPCISD::CALL:            return "PPCISD::CALL";
   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
+  case PPCISD::CALL_NOTOC:      return "PPCISD::CALL_NOTOC";
   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
@@ -4689,6 +4690,16 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
                                     SelectionDAG& DAG) const {
   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
 
+  // FIXME: Tail calls are currently disabled when using PC Relative addressing.
+  // The issue is that PC Relative is only partially implemented and so there
+  // is currently a mix of functions that require the TOC and functions that do
+  // not require it. If we have A calls B calls C and both A and B require the
+  // TOC and C does not and is marked as clobbering R2 then it is not safe for
+  // B to tail call C. Since we do not have the information of whether or not
+  // a funciton needs to use the TOC here in this function we need to be
+  // conservatively safe and disable all tail calls for now.
+  if (Subtarget.isUsingPCRelativeCalls()) return false;
+
   if (DisableSCO && !TailCallOpt) return false;
 
   // Variadic argument functions are not supported.
@@ -5085,6 +5096,17 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
     return PPCISD::BCTRL;
   }
 
+  // FIXME: At this moment indirect calls are treated ahead of the
+  // PC Relative condition because binaries can still contain a possible
+  // mix of functions that use a TOC and functions that do not use a TOC.
+  // Once the PC Relative feature is complete this condition should be moved
+  // up ahead of the indirect calls and should return a PPCISD::BCTRL for
+  // that case.
+  if (Subtarget.isUsingPCRelativeCalls()) {
+    assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
+    return PPCISD::CALL_NOTOC;
+  }
+
   // The ABIs that maintain a TOC pointer accross calls need to have a nop
   // immediately following the call instruction if the caller and callee may
   // have 
diff erent TOC bases. At link time if the linker determines the calls
@@ -5094,8 +5116,8 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
   // will rewrite the nop to be a load of the TOC pointer from the linkage area
   // into gpr2.
   if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
-    return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
-                                                  : PPCISD::CALL_NOP;
+      return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
+                                                    : PPCISD::CALL_NOP;
 
   return PPCISD::CALL;
 }
@@ -5372,7 +5394,7 @@ buildCallOperands(SmallVectorImpl<SDValue> &Ops,
   // no way to mark dependencies as implicit here.
   // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
   if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
-      !CFlags.IsPatchPoint)
+       !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
     Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
 
   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
@@ -5398,7 +5420,8 @@ SDValue PPCTargetLowering::FinishCall(
     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
     SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
 
-  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI())
+  if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
+      Subtarget.isAIXABI())
     setUsesTOCBasePtr(DAG);
 
   unsigned CallOpc =
@@ -11373,7 +11396,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
     if (Subtarget.is64BitELFABI() &&
-        MI.getOpcode() == TargetOpcode::PATCHPOINT) {
+        MI.getOpcode() == TargetOpcode::PATCHPOINT &&
+        !Subtarget.isUsingPCRelativeCalls()) {
       // Call lowering should have added an r2 operand to indicate a dependence
       // on the TOC base pointer value. It can't however, because there is no
       // way to mark the dependence as implicit there, and so the stackmap code

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 4107d2bc188e..3a1001c0e135 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -165,9 +165,11 @@ namespace llvm {
 
     /// CALL - A direct function call.
     /// CALL_NOP is a call with the special NOP which follows 64-bit
+    /// CALL_NOTOC the caller does not use the TOC.
     /// SVR4 calls and 32-bit/64-bit AIX calls.
     CALL,
     CALL_NOP,
+    CALL_NOTOC,
 
     /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
     /// MTCTR instruction.

diff  --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 542225ca0d10..073436191dd9 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -140,6 +140,15 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
                              (outs), (ins abscalltarget:$func),
                              "bla $func\n\tnop", IIC_BrB,
                              [(PPCcall_nop (i64 imm:$func))]>;
+    let Predicates = [PCRelativeMemops] in {
+      // BL8_NOTOC means that the caller does not use the TOC pointer and if
+      // it does use R2 then it is just a caller saved register. Therefore it is
+      // safe to emit only the bl and not the nop for this instruction. The
+      // linker will not try to restore R2 after the call.
+      def BL8_NOTOC : IForm_and_DForm_4_zero<18, 0, 1, 24, (outs),
+                                             (ins calltarget:$func),
+                                             "bl $func", IIC_BrB, []>;
+    }
   }
   let Uses = [CTR8, RM] in {
     let isPredicable = 1 in
@@ -194,6 +203,11 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)),
 def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
           (BL8_NOP texternalsym:$dst)>;
 
+def : Pat<(PPCcall_notoc (i64 tglobaladdr:$dst)),
+          (BL8_NOTOC tglobaladdr:$dst)>;
+def : Pat<(PPCcall_notoc (i64 texternalsym:$dst)),
+          (BL8_NOTOC texternalsym:$dst)>;
+
 // Calls for AIX
 def : Pat<(PPCcall (i64 mcsym:$dst)),
           (BL8 mcsym:$dst)>;

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 189dd77ab7ed..7a7128d99016 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -252,6 +252,9 @@ def PPCcall  : SDNode<"PPCISD::CALL", SDT_PPCCall,
 def PPCcall_nop  : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                            SDNPVariadic]>;
+def PPCcall_notoc : SDNode<"PPCISD::CALL_NOTOC", SDT_PPCCall,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                            SDNPVariadic]>;
 def PPCmtctr      : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
@@ -994,6 +997,7 @@ def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">;
 def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">;
 def IsISA3_0 : Predicate<"PPCSubTarget->isISA3_0()">;
 def HasFPU : Predicate<"PPCSubTarget->hasFPU()">;
+def PCRelativeMemops : Predicate<"PPCSubTarget->hasPCRelativeMemops()">;
 
 //===----------------------------------------------------------------------===//
 // PowerPC Multiclass Definitions.

diff  --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index e9856d4ceb64..253f71ad33ab 100644
--- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -81,7 +81,12 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
  if (MO.getTargetFlags() == PPCII::MO_PLT)
     RefKind = MCSymbolRefExpr::VK_PLT;
 
-  const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+  const MachineInstr *MI = MO.getParent();
+
+  if (MI->getOpcode() == PPC::BL8_NOTOC)
+    RefKind = MCSymbolRefExpr::VK_PPC_NOTOC;
+
+  const MachineFunction *MF = MI->getMF();
   const Module *M = MF->getFunction().getParent();
   const PPCSubtarget *Subtarget = &(MF->getSubtarget<PPCSubtarget>());
   const TargetMachine &TM = Printer.TM;

diff  --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 70d18edafe19..ff2329ada82f 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -57,6 +57,8 @@ STATISTIC(NumRotatesCollapsed,
           "Number of pairs of rotate left, clear left/right collapsed");
 STATISTIC(NumEXTSWAndSLDICombined,
           "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
+STATISTIC(NumX2FoundForPCRel, "Number of times the X2 TOC pointer has been "
+                              "found when PC relative NOTOC is being used.");
 
 static cl::opt<bool>
 FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
@@ -99,6 +101,11 @@ struct PPCMIPeephole : public MachineFunctionPass {
   // Initialize class variables.
   void initialize(MachineFunction &MFParm);
 
+  // Perform peepholes that cannot be skipped.
+  // Some peephole simplifications are required for correctness and will not
+  // be skipped even if skipFunction(MF.getFunction()) returns true.
+  void unskipableSimplifyCode(void);
+
   // Perform peepholes.
   bool simplifyCode(void);
 
@@ -124,9 +131,14 @@ struct PPCMIPeephole : public MachineFunctionPass {
 
   // Main entry point for this pass.
   bool runOnMachineFunction(MachineFunction &MF) override {
+    initialize(MF);
+    // FIXME: This introduces another complete traversal of the instructions
+    // in the function in the common case (function is not skipped). Although
+    // this is less than ideal for compile time, this code will go away once
+    // our PC-Rel implementation is complete.
+    unskipableSimplifyCode();
     if (skipFunction(MF.getFunction()))
       return false;
-    initialize(MF);
     return simplifyCode();
   }
 };
@@ -260,6 +272,41 @@ void PPCMIPeephole::UpdateTOCSaves(
   TOCSaves[MI] = Keep;
 }
 
+void PPCMIPeephole::unskipableSimplifyCode(void) {
+  // If this function has no uses of R2 there is nothing to do here.
+  if(MF->getRegInfo().use_empty(PPC::X2))
+    return;
+
+  // This is only for PCRelative calls.
+  if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls()) {
+    return;
+  }
+
+  // This function has R2 so we need to mark an implicit def for it.
+  PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
+  FuncInfo->setUsesTOCBasePtr();
+  for (MachineBasicBlock &MBB : *MF) {
+    for (MachineInstr &MI : MBB) {
+      if (MI.getOpcode() == PPC::BL8_NOTOC) {
+        // At this point the BL8_NOTOC instruction is not really safe because it
+        // assumes that the caller does not need the TOC. It will be safe
+        // later once the full PC relative implementation is complete but it is
+        // not now.
+        // Here we are looking for X2. Since this is Pre-RA the only uses of X2
+        // would indicate the use of the TOC. We want to detect all uses of the
+        // TOC. Once the work is done we should not see any uses of the TOC.
+        // TODO: Once the implementation is complete this should be turned into
+        // an assert
+        Register Reg = MF->getSubtarget<PPCSubtarget>().getTOCPointerRegister();
+        MachineOperand MO = MachineOperand::CreateReg(Reg, false, true);
+        MI.addOperand(*MF, MO);
+        MI.setDesc(TII->get(PPC::BL8_NOP));
+        ++NumX2FoundForPCRel;
+      }
+    }
+  }
+}
+
 // Perform peephole optimizations.
 bool PPCMIPeephole::simplifyCode(void) {
   bool Simplified = false;

diff  --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 4fcfb798b755..ae1aa66acc3a 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -153,7 +153,14 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     return CSR_SRV464_TLS_PE_SaveList;
 
   // On PPC64, we might need to save r2 (but only if it is not reserved).
-  bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2);
+  // We do not need to treat R2 as callee-saved when using PC-Relative calls
+  // because any direct uses of R2 will cause it to be reserved. If the function
+  // is a leaf or the only uses of R2 are implicit uses for calls, the calls
+  // will use the @notoc relocation which will cause this function to set the
+  // st_other bit to 1, thereby communicating to its caller that it arbitrarily
+  // clobbers the TOC.
+  bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2) &&
+                !Subtarget.isUsingPCRelativeCalls();
 
   // Cold calling convention CSRs.
   if (MF->getFunction().getCallingConv() == CallingConv::Cold) {

diff  --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index 4f00fb17b1e8..e779abea02b8 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -41,8 +41,8 @@ def P9Model : SchedMachineModel {
   let CompleteModel = 1;
 
   // Do not support QPX (Quad Processing eXtension), SPE (Signal Procesing
-  // Engine) or prefixed instructions on Power 9.
-  let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs];
+  // Engine), prefixed instructions on Power 9 or PC relative mem ops.
+  let UnsupportedFeatures = [HasQPX, HasSPE, PrefixInstrs, PCRelativeMemops];
 
 }
 

diff  --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index b0c0f30a56ec..7819874b74ee 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -227,3 +227,8 @@ bool PPCSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const {
 
 bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); }
 bool PPCSubtarget::isPPC64() const { return TM.isPPC64(); }
+
+bool PPCSubtarget::isUsingPCRelativeCalls() const {
+  return isPPC64() && hasPCRelativeMemops() && isELFv2ABI() &&
+         CodeModel::Medium == getTargetMachine().getCodeModel();
+}

diff  --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index be061d9ce0a1..b7b09319e22e 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -333,6 +333,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
 
   bool is64BitELFABI() const { return  isSVR4ABI() && isPPC64(); }
   bool is32BitELFABI() const { return  isSVR4ABI() && !isPPC64(); }
+  bool isUsingPCRelativeCalls() const;
 
   /// Originally, this function return hasISEL(). Now we always enable it,
   /// but may expand the ISEL instruction later.

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
new file mode 100644
index 000000000000..01024d485731
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
@@ -0,0 +1,176 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names < %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-S
+
+ at global_int = common dso_local local_unnamed_addr global i32 0, align 4
+
+define dso_local signext i32 @NoTOC() local_unnamed_addr {
+; CHECK-S-LABEL: NoTOC:
+; CHECK-S-NOT:     .localentry
+; CHECK-S:         li r3, 42
+; CHECK-S-NEXT:    blr
+entry:
+  ret i32 42
+}
+
+define dso_local signext i32 @AsmClobberX2(i32 signext %a, i32 signext %b) local_unnamed_addr {
+; CHECK-S-LABEL: AsmClobberX2:
+; CHECK-S:         .localentry AsmClobberX2, 1
+; CHECK-S:         add r3, r4, r3
+; CHECK-S:         #APP
+; CHECK-S-NEXT:    nop
+; CHECK-S-NEXT:    #NO_APP
+; CHECK-S:         blr
+entry:
+  %add = add nsw i32 %b, %a
+  tail call void asm sideeffect "nop", "~{r2}"()
+  ret i32 %add
+}
+
+; FIXME: This is actually a test case that shows a bug. On power9 and earlier
+;        this test should not compile. On later CPUs (like this test) the @toc
+;        should be replaced with @pcrel and we won't need R2 and so the problem
+;        goes away.
+define dso_local signext i32 @AsmClobberX2WithTOC(i32 signext %a, i32 signext %b) local_unnamed_addr {
+; CHECK-S-LABEL: AsmClobberX2WithTOC:
+; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep2 at ha
+; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep2 at l
+; CHECK-S:         .localentry     AsmClobberX2WithTOC, .Lfunc_lep2-.Lfunc_gep2
+; CHECK-S:         #APP
+; CHECK-S-NEXT:    li r2, 0
+; CHECK-S-NEXT:    #NO_APP
+; CHECK-S-NEXT:    addis r5, r2, global_int at toc@ha
+; CHECK-S-NEXT:    lwz r5, global_int at toc@l(r5)
+; CHECK-S-NEXT:    add r3, r4, r3
+; CHECK-S-NEXT:    add r3, r3, r5
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    blr
+entry:
+  %add = add nsw i32 %b, %a
+  tail call void asm sideeffect "li 2, 0", "~{r2}"()
+  %0 = load i32, i32* @global_int, align 4
+  %add1 = add nsw i32 %add, %0
+  ret i32 %add1
+}
+
+define dso_local signext i32 @AsmClobberX5(i32 signext %a, i32 signext %b) local_unnamed_addr {
+; CHECK-S-LABEL: AsmClobberX5:
+; CHECK-S:         .localentry AsmClobberX5, 1
+; CHECK-S-NEXT:  # %bb.0: # %entry
+; CHECK-S-NEXT:    add r3, r4, r3
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    #APP
+; CHECK-S-NEXT:    nop
+; CHECK-S-NEXT:    #NO_APP
+; CHECK-S-NEXT:    blr
+entry:
+  %add = add nsw i32 %b, %a
+  tail call void asm sideeffect "nop", "~{r5}"()
+  ret i32 %add
+}
+
+; Clobber all GPRs except R2.
+define dso_local signext i32 @AsmClobberNotR2(i32 signext %a, i32 signext %b) local_unnamed_addr {
+; CHECK-S-LABEL: AsmClobberNotR2:
+; CHECK-S:         .localentry AsmClobberNotR2, 1
+; CHECK-S:         add r3, r4, r3
+; CHECK-S:         stw r3, -148(r1) # 4-byte Folded Spill
+; CHECK-S-NEXT:    #APP
+; CHECK-S-NEXT:    nop
+; CHECK-S-NEXT:    #NO_APP
+; CHECK-S-NEXT:    lwz r3, -148(r1) # 4-byte Folded Reload
+; CHECK-S:    blr
+entry:
+  %add = add nsw i32 %b, %a
+  tail call void asm sideeffect "nop", "~{r0},~{r1},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"()
+  ret i32 %add
+}
+
+; Increase register pressure enough to force the register allocator to
+; make use of R2.
+define dso_local signext i32 @X2IsCallerSaved(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d, i32 signext %e, i32 signext %f, i32 signext %g, i32 signext %h) local_unnamed_addr {
+; CHECK-S-LABEL: X2IsCallerSaved:
+; CHECK-S:         .localentry X2IsCallerSaved, 1
+; CHECK-S-NEXT:  # %bb.0: # %entry
+; CHECK-S-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-S-NEXT:    add r11, r4, r3
+; CHECK-S-NEXT:    subf r29, r9, r8
+; CHECK-S-NEXT:    add r9, r10, r9
+; CHECK-S-NEXT:    subf r10, r3, r10
+; CHECK-S-NEXT:    mullw r3, r4, r3
+; CHECK-S-NEXT:    mullw r3, r3, r11
+; CHECK-S-NEXT:    mullw r3, r3, r5
+; CHECK-S-NEXT:    subf r12, r5, r4
+; CHECK-S-NEXT:    mullw r3, r3, r6
+; CHECK-S-NEXT:    add r0, r6, r5
+; CHECK-S-NEXT:    mullw r3, r3, r12
+; CHECK-S-NEXT:    mullw r3, r3, r0
+; CHECK-S-NEXT:    mullw r3, r3, r7
+; CHECK-S-NEXT:    subf r2, r7, r6
+; CHECK-S-NEXT:    mullw r3, r3, r8
+; CHECK-S-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-S-NEXT:    add r30, r8, r7
+; CHECK-S-NEXT:    mullw r3, r3, r2
+; CHECK-S-NEXT:    mullw r3, r3, r30
+; CHECK-S-NEXT:    mullw r3, r3, r29
+; CHECK-S-NEXT:    mullw r3, r3, r9
+; CHECK-S-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-S-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-S-NEXT:    mullw r3, r3, r10
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    blr
+entry:
+  %add = add nsw i32 %b, %a
+  %sub = sub nsw i32 %b, %c
+  %add1 = add nsw i32 %d, %c
+  %sub2 = sub nsw i32 %d, %e
+  %add3 = add nsw i32 %f, %e
+  %sub4 = sub nsw i32 %f, %g
+  %add5 = add nsw i32 %h, %g
+  %sub6 = sub nsw i32 %h, %a
+  %mul = mul i32 %b, %a
+  %mul7 = mul i32 %mul, %add
+  %mul8 = mul i32 %mul7, %c
+  %mul9 = mul i32 %mul8, %d
+  %mul10 = mul i32 %mul9, %sub
+  %mul11 = mul i32 %mul10, %add1
+  %mul12 = mul i32 %mul11, %e
+  %mul13 = mul i32 %mul12, %f
+  %mul14 = mul i32 %mul13, %sub2
+  %mul15 = mul i32 %mul14, %add3
+  %mul16 = mul i32 %mul15, %sub4
+  %mul17 = mul i32 %mul16, %add5
+  %mul18 = mul i32 %mul17, %sub6
+  ret i32 %mul18
+}
+
+
+define dso_local signext i32 @UsesX2AsTOC() local_unnamed_addr {
+; CHECK-S-LABEL: UsesX2AsTOC:
+; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep6 at ha
+; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep6 at l
+; CHECK-S:       .localentry     UsesX2AsTOC, .Lfunc_lep6-.Lfunc_gep6
+; CHECK-S:       # %bb.0: # %entry
+; CHECK-S-NEXT:    addis r3, r2, global_int at toc@ha
+; CHECK-S-NEXT:    lwa r3, global_int at toc@l(r3)
+; CHECK-S-NEXT:    blr
+entry:
+  %0 = load i32, i32* @global_int, align 4
+  ret i32 %0
+}
+
+
+define dso_local double @UsesX2AsConstPoolTOC() local_unnamed_addr {
+; CHECK-S-LABEL: UsesX2AsConstPoolTOC:
+; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep7 at ha
+; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep7 at l
+; CHECK-S:       .localentry     UsesX2AsConstPoolTOC, .Lfunc_lep7-.Lfunc_gep7
+; CHECK-S:       # %bb.0: # %entry
+; CHECK-S-NEXT:    addis r3, r2, .LCPI7_0 at toc@ha
+; CHECK-S-NEXT:    lfd f1, .LCPI7_0 at toc@l(r3)
+; CHECK-S-NEXT:    blr
+entry:
+  ret double 0x404124A4EBDD334C
+}
+
+

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll
new file mode 100644
index 000000000000..c145b5c4378d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll
@@ -0,0 +1,42 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names < %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-S
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names --filetype=obj < %s | \
+; RUN:   llvm-objdump -dr - | FileCheck %s --check-prefix=CHECK-O
+
+
+; CHECK-S-LABEL: caller
+; CHECK-S: bl callee at notoc
+; CHECK-S: blr
+
+; CHECK-O-LABEL: caller
+; CHECK-O: bl
+; CHECK-O-NEXT: R_PPC64_REL24_NOTOC callee
+; CHECK-O: blr
+define dso_local signext i32 @caller() local_unnamed_addr {
+entry:
+  %call = tail call signext i32 bitcast (i32 (...)* @callee to i32 ()*)()
+  ret i32 %call
+}
+
+declare signext i32 @callee(...) local_unnamed_addr
+
+
+; Some calls can be considered Extrnal Symbols.
+; CHECK-S-LABEL: ExternalSymbol
+; CHECK-S: bl memcpy at notoc
+; CHECK-S: blr
+
+; CHECK-O-LABEL: ExternalSymbol
+; CHECK-O: bl
+; CHECK-O-NEXT: R_PPC64_REL24_NOTOC memcpy
+; CHECK-O: blr
+define dso_local void @ExternalSymbol(i8* nocapture %out, i8* nocapture readonly %in, i64 %num) local_unnamed_addr {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %out, i8* align 1 %in, i64 %num, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg)
+

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
new file mode 100644
index 000000000000..69b88531cfc1
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
@@ -0,0 +1,521 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names < %s \
+; RUN:   | FileCheck %s --check-prefix=CHECK-S
+
+ at globalVar = common dso_local local_unnamed_addr global i32 0, align 4
+ at externGlobalVar = external local_unnamed_addr global i32, align 4
+ at indirectCall = common dso_local local_unnamed_addr global i32 (i32)* null, align 8
+
+; This funcion needs to remain as noinline.
+; The compiler needs to know this function is local but must be forced to call
+; it. The only thing we really need to check here is that st_other=0 and
+; so we make sure that there is no .localentry.
+define dso_local signext i32 @localCall(i32 signext %a) local_unnamed_addr #0 {
+; CHECK-S-LABEL: localCall:
+; CHECK-S-NOT:   .localentry
+; CHECK-S:         addi r3, r3, 5
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    blr
+entry:
+  %add = add nsw i32 %a, 5
+  ret i32 %add
+}
+
+define dso_local signext i32 @DirectCallLocal1(i32 signext %a, i32 signext %b) local_unnamed_addr {
+; CHECK-S-LABEL: DirectCallLocal1:
+; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep1 at ha
+; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep1 at l
+; CHECK-S:         .localentry     DirectCallLocal1, .Lfunc_lep1-.Lfunc_gep1
+; CHECK-S:       # %bb.0: # %entry
+; CHECK-S-NEXT:    mflr r0
+; CHECK-S-NEXT:    std r0, 16(r1)
+; CHECK-S-NEXT:    stdu r1, -32(r1)
+; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-S-NEXT:    .cfi_offset lr, 16
+; CHECK-S-NEXT:    add r3, r4, r3
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    bl localCall
+; CHECK-S-NEXT:    nop
+; CHECK-S-NEXT:    addis r4, r2, globalVar at toc@ha
+; CHECK-S-NEXT:    lwz r4, globalVar at toc@l(r4)
+; CHECK-S-NEXT:    mullw r3, r4, r3
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    addi r1, r1, 32
+; CHECK-S-NEXT:    ld r0, 16(r1)
+; CHECK-S-NEXT:    mtlr r0
+; CHECK-S-NEXT:    blr
+entry:
+  %add = add nsw i32 %b, %a
+  %call = tail call signext i32 @localCall(i32 signext %add)
+  %0 = load i32, i32* @globalVar, align 4
+  %mul = mul nsw i32 %0, %call
+  ret i32 %mul
+}
+
+define dso_local signext i32 @DirectCallLocal2(i32 signext %a, i32 signext %b) local_unnamed_addr {
+; CHECK-S-LABEL: DirectCallLocal2:
+; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep2 at ha
+; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep2 at l
+; CHECK-S:         .localentry     DirectCallLocal2, .Lfunc_lep2-.Lfunc_gep2
+; CHECK-S:       # %bb.0: # %entry
+; CHECK-S-NEXT:    mflr r0
+; CHECK-S-NEXT:    std r0, 16(r1)
+; CHECK-S-NEXT:    stdu r1, -32(r1)
+; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-S-NEXT:    .cfi_offset lr, 16
+; CHECK-S-NEXT:    add r3, r4, r3
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    bl localCall
+; CHECK-S-NEXT:    nop
+; CHECK-S-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-S-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-S-NEXT:    lwz r4, 0(r4)
+; CHECK-S-NEXT:    mullw r3, r4, r3
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    addi r1, r1, 32
+; CHECK-S-NEXT:    ld r0, 16(r1)
+; CHECK-S-NEXT:    mtlr r0
+; CHECK-S-NEXT:    blr
+entry:
+  %add = add nsw i32 %b, %a
+  %call = tail call signext i32 @localCall(i32 signext %add)
+  %0 = load i32, i32* @externGlobalVar, align 4
+  %mul = mul nsw i32 %0, %call
+  ret i32 %mul
+}
+
+define dso_local signext i32 @DirectCallLocalNoGlobal(i32 signext %a, i32 signext %b) local_unnamed_addr {
+; CHECK-S-LABEL: DirectCallLocalNoGlobal:
+; CHECK-S:         .localentry DirectCallLocalNoGlobal, 1
+; CHECK-S-NEXT:  # %bb.0: # %entry
+; CHECK-S-NEXT:    mflr r0
+; CHECK-S-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-S-NEXT:    .cfi_offset lr, 16
+; CHECK-S-NEXT:    .cfi_offset r30, -16
+; CHECK-S-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-S-NEXT:    std r0, 16(r1)
+; CHECK-S-NEXT:    stdu r1, -48(r1)
+; CHECK-S-NEXT:    mr r30, r4
+; CHECK-S-NEXT:    bl localCall at notoc
+; CHECK-S-NEXT:    add r3, r3, r30
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    addi r1, r1, 48
+; CHECK-S-NEXT:    ld r0, 16(r1)
+; CHECK-S-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-S-NEXT:    mtlr r0
+; CHECK-S-NEXT:    blr
+entry:
+  %call = tail call signext i32 @localCall(i32 signext %a)
+  %add = add nsw i32 %call, %b
+  ret i32 %add
+}
+
+define dso_local signext i32 @DirectCallExtern1(i32 signext %a, i32 signext %b) local_unnamed_addr {
+; CHECK-S-LABEL: DirectCallExtern1:
+; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep4 at ha
+; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep4 at l
+; CHECK-S:         .localentry     DirectCallExtern1, .Lfunc_lep4-.Lfunc_gep4
+; CHECK-S:       # %bb.0: # %entry
+; CHECK-S-NEXT:    mflr r0
+; CHECK-S-NEXT:    std r0, 16(r1)
+; CHECK-S-NEXT:    stdu r1, -32(r1)
+; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-S-NEXT:    .cfi_offset lr, 16
+; CHECK-S-NEXT:    add r3, r4, r3
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    bl externCall
+; CHECK-S-NEXT:    nop
+; CHECK-S-NEXT:    addis r4, r2, globalVar at toc@ha
+; CHECK-S-NEXT:    lwz r4, globalVar at toc@l(r4)
+; CHECK-S-NEXT:    mullw r3, r4, r3
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    addi r1, r1, 32
+; CHECK-S-NEXT:    ld r0, 16(r1)
+; CHECK-S-NEXT:    mtlr r0
+; CHECK-S-NEXT:    blr
+entry:
+  %add = add nsw i32 %b, %a
+  %call = tail call signext i32 @externCall(i32 signext %add)
+  %0 = load i32, i32* @globalVar, align 4
+  %mul = mul nsw i32 %0, %call
+  ret i32 %mul
+}
+
+declare signext i32 @externCall(i32 signext) local_unnamed_addr
+
+define dso_local signext i32 @DirectCallExtern2(i32 signext %a, i32 signext %b) local_unnamed_addr {
+; CHECK-S-LABEL: DirectCallExtern2:
+; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep5 at ha
+; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep5 at l
+; CHECK-S:         .localentry     DirectCallExtern2, .Lfunc_lep5-.Lfunc_gep5
+; CHECK-S:       # %bb.0: # %entry
+; CHECK-S-NEXT:    mflr r0
+; CHECK-S-NEXT:    std r0, 16(r1)
+; CHECK-S-NEXT:    stdu r1, -32(r1)
+; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-S-NEXT:    .cfi_offset lr, 16
+; CHECK-S-NEXT:    add r3, r4, r3
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    bl externCall
+; CHECK-S-NEXT:    nop
+; CHECK-S-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-S-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-S-NEXT:    lwz r4, 0(r4)
+; CHECK-S-NEXT:    mullw r3, r4, r3
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    addi r1, r1, 32
+; CHECK-S-NEXT:    ld r0, 16(r1)
+; CHECK-S-NEXT:    mtlr r0
+; CHECK-S-NEXT:    blr
+entry:
+  %add = add nsw i32 %b, %a
+  %call = tail call signext i32 @externCall(i32 signext %add)
+  %0 = load i32, i32* @externGlobalVar, align 4
+  %mul = mul nsw i32 %0, %call
+  ret i32 %mul
+}
+
+define dso_local signext i32 @DirectCallExternNoGlobal(i32 signext %a, i32 signext %b) local_unnamed_addr {
+; CHECK-S-LABEL: DirectCallExternNoGlobal:
+; CHECK-S:         .localentry DirectCallExternNoGlobal, 1
+; CHECK-S-NEXT:  # %bb.0: # %entry
+; CHECK-S-NEXT:    mflr r0
+; CHECK-S-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-S-NEXT:    .cfi_offset lr, 16
+; CHECK-S-NEXT:    .cfi_offset r30, -16
+; CHECK-S-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-S-NEXT:    std r0, 16(r1)
+; CHECK-S-NEXT:    stdu r1, -48(r1)
+; CHECK-S-NEXT:    mr r30, r4
+; CHECK-S-NEXT:    bl externCall at notoc
+; CHECK-S-NEXT:    add r3, r3, r30
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    addi r1, r1, 48
+; CHECK-S-NEXT:    ld r0, 16(r1)
+; CHECK-S-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-S-NEXT:    mtlr r0
+; CHECK-S-NEXT:    blr
+entry:
+  %call = tail call signext i32 @externCall(i32 signext %a)
+  %add = add nsw i32 %call, %b
+  ret i32 %add
+}
+
+define dso_local signext i32 @TailCallLocal1(i32 signext %a) local_unnamed_addr {
+; CHECK-S-LABEL: TailCallLocal1:
+; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep7 at ha
+; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep7 at l
+; CHECK-S:         .localentry     TailCallLocal1, .Lfunc_lep7-.Lfunc_gep7
+; CHECK-S:       # %bb.0: # %entry
+; CHECK-S-NEXT:    mflr r0
+; CHECK-S-NEXT:    std r0, 16(r1)
+; CHECK-S-NEXT:    stdu r1, -32(r1)
+; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-S-NEXT:    .cfi_offset lr, 16
+; CHECK-S-NEXT:    addis r4, r2, globalVar at toc@ha
+; CHECK-S-NEXT:    lwz r4, globalVar at toc@l(r4)
+; CHECK-S-NEXT:    add r3, r4, r3
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    bl localCall
+; CHECK-S-NEXT:    nop
+; CHECK-S-NEXT:    addi r1, r1, 32
+; CHECK-S-NEXT:    ld r0, 16(r1)
+; CHECK-S-NEXT:    mtlr r0
+; CHECK-S-NEXT:    blr
+entry:
+  %0 = load i32, i32* @globalVar, align 4
+  %add = add nsw i32 %0, %a
+  %call = tail call signext i32 @localCall(i32 signext %add)
+  ret i32 %call
+}
+
+define dso_local signext i32 @TailCallLocal2(i32 signext %a) local_unnamed_addr {
+; CHECK-S-LABEL: TailCallLocal2:
+; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep8 at ha
+; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep8 at l
+; CHECK-S:         .localentry     TailCallLocal2, .Lfunc_lep8-.Lfunc_gep8
+; CHECK-S:       # %bb.0: # %entry
+; CHECK-S-NEXT:    mflr r0
+; CHECK-S-NEXT:    std r0, 16(r1)
+; CHECK-S-NEXT:    stdu r1, -32(r1)
+; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-S-NEXT:    .cfi_offset lr, 16
+; CHECK-S-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-S-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-S-NEXT:    lwz r4, 0(r4)
+; CHECK-S-NEXT:    add r3, r4, r3
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    bl localCall
+; CHECK-S-NEXT:    nop
+; CHECK-S-NEXT:    addi r1, r1, 32
+; CHECK-S-NEXT:    ld r0, 16(r1)
+; CHECK-S-NEXT:    mtlr r0
+; CHECK-S-NEXT:    blr
+entry:
+  %0 = load i32, i32* @externGlobalVar, align 4
+  %add = add nsw i32 %0, %a
+  %call = tail call signext i32 @localCall(i32 signext %add)
+  ret i32 %call
+}
+
+define dso_local signext i32 @TailCallLocalNoGlobal(i32 signext %a) local_unnamed_addr {
+; CHECK-S-LABEL: TailCallLocalNoGlobal:
+; CHECK-S:         .localentry TailCallLocalNoGlobal, 1
+; CHECK-S-NEXT:  # %bb.0: # %entry
+; CHECK-S-NEXT:    mflr r0
+; CHECK-S-NEXT:    std r0, 16(r1)
+; CHECK-S-NEXT:    stdu r1, -32(r1)
+; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-S-NEXT:    .cfi_offset lr, 16
+; CHECK-S-NEXT:    bl localCall at notoc
+; CHECK-S-NEXT:    addi r1, r1, 32
+; CHECK-S-NEXT:    ld r0, 16(r1)
+; CHECK-S-NEXT:    mtlr r0
+; CHECK-S-NEXT:    blr
+entry:
+  %call = tail call signext i32 @localCall(i32 signext %a)
+  ret i32 %call
+}
+
+define dso_local signext i32 @TailCallExtern1(i32 signext %a) local_unnamed_addr {
+; CHECK-S-LABEL: TailCallExtern1:
+; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep10 at ha
+; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep10 at l
+; CHECK-S:         .localentry     TailCallExtern1, .Lfunc_lep10-.Lfunc_gep10
+; CHECK-S:       # %bb.0: # %entry
+; CHECK-S-NEXT:    mflr r0
+; CHECK-S-NEXT:    std r0, 16(r1)
+; CHECK-S-NEXT:    stdu r1, -32(r1)
+; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-S-NEXT:    .cfi_offset lr, 16
+; CHECK-S-NEXT:    addis r4, r2, globalVar at toc@ha
+; CHECK-S-NEXT:    lwz r4, globalVar at toc@l(r4)
+; CHECK-S-NEXT:    add r3, r4, r3
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    bl externCall
+; CHECK-S-NEXT:    nop
+; CHECK-S-NEXT:    addi r1, r1, 32
+; CHECK-S-NEXT:    ld r0, 16(r1)
+; CHECK-S-NEXT:    mtlr r0
+; CHECK-S-NEXT:    blr
+entry:
+  %0 = load i32, i32* @globalVar, align 4
+  %add = add nsw i32 %0, %a
+  %call = tail call signext i32 @externCall(i32 signext %add)
+  ret i32 %call
+}
+
+define dso_local signext i32 @TailCallExtern2(i32 signext %a) local_unnamed_addr {
+; CHECK-S-LABEL: TailCallExtern2:
+; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep11 at ha
+; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep11 at l
+; CHECK-S:         .localentry     TailCallExtern2, .Lfunc_lep11-.Lfunc_gep11
+; CHECK-S:       # %bb.0: # %entry
+; CHECK-S-NEXT:    mflr r0
+; CHECK-S-NEXT:    std r0, 16(r1)
+; CHECK-S-NEXT:    stdu r1, -32(r1)
+; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-S-NEXT:    .cfi_offset lr, 16
+; CHECK-S-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-S-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-S-NEXT:    lwz r4, 0(r4)
+; CHECK-S-NEXT:    add r3, r4, r3
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    bl externCall
+; CHECK-S-NEXT:    nop
+; CHECK-S-NEXT:    addi r1, r1, 32
+; CHECK-S-NEXT:    ld r0, 16(r1)
+; CHECK-S-NEXT:    mtlr r0
+; CHECK-S-NEXT:    blr
+entry:
+  %0 = load i32, i32* @externGlobalVar, align 4
+  %add = add nsw i32 %0, %a
+  %call = tail call signext i32 @externCall(i32 signext %add)
+  ret i32 %call
+}
+
+define dso_local signext i32 @TailCallExternNoGlobal(i32 signext %a) local_unnamed_addr {
+; CHECK-S-LABEL: TailCallExternNoGlobal:
+; CHECK-S:         .localentry TailCallExternNoGlobal, 1
+; CHECK-S-NEXT:  # %bb.0: # %entry
+; CHECK-S-NEXT:    mflr r0
+; CHECK-S-NEXT:    std r0, 16(r1)
+; CHECK-S-NEXT:    stdu r1, -32(r1)
+; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-S-NEXT:    .cfi_offset lr, 16
+; CHECK-S-NEXT:    bl externCall at notoc
+; CHECK-S-NEXT:    addi r1, r1, 32
+; CHECK-S-NEXT:    ld r0, 16(r1)
+; CHECK-S-NEXT:    mtlr r0
+; CHECK-S-NEXT:    blr
+entry:
+  %call = tail call signext i32 @externCall(i32 signext %a)
+  ret i32 %call
+}
+
+define dso_local signext i32 @IndirectCall1(i32 signext %a, i32 signext %b) local_unnamed_addr {
+; CHECK-S-LABEL: IndirectCall1:
+; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep13 at ha
+; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep13 at l
+; CHECK-S:         .localentry     IndirectCall1, .Lfunc_lep13-.Lfunc_gep13
+; CHECK-S:       # %bb.0: # %entry
+; CHECK-S-NEXT:    mflr r0
+; CHECK-S-NEXT:    std r0, 16(r1)
+; CHECK-S-NEXT:    stdu r1, -32(r1)
+; CHECK-S-NEXT:    std r2, 24(r1)
+; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-S-NEXT:    .cfi_offset lr, 16
+; CHECK-S-NEXT:    addis r5, r2, indirectCall at toc@ha
+; CHECK-S-NEXT:    ld r12, indirectCall at toc@l(r5)
+; CHECK-S-NEXT:    add r3, r4, r3
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    mtctr r12
+; CHECK-S-NEXT:    bctrl
+; CHECK-S-NEXT:    ld 2, 24(r1)
+; CHECK-S-NEXT:    addis r4, r2, globalVar at toc@ha
+; CHECK-S-NEXT:    lwz r4, globalVar at toc@l(r4)
+; CHECK-S-NEXT:    mullw r3, r4, r3
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    addi r1, r1, 32
+; CHECK-S-NEXT:    ld r0, 16(r1)
+; CHECK-S-NEXT:    mtlr r0
+; CHECK-S-NEXT:    blr
+entry:
+  %add = add nsw i32 %b, %a
+  %0 = load i32 (i32)*, i32 (i32)** @indirectCall, align 8
+  %call = tail call signext i32 %0(i32 signext %add)
+  %1 = load i32, i32* @globalVar, align 4
+  %mul = mul nsw i32 %1, %call
+  ret i32 %mul
+}
+
+define dso_local signext i32 @IndirectCall2(i32 signext %a, i32 signext %b) local_unnamed_addr {
+; CHECK-S-LABEL: IndirectCall2:
+; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep14 at ha
+; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep14 at l
+; CHECK-S:         .localentry     IndirectCall2, .Lfunc_lep14-.Lfunc_gep14
+; CHECK-S:       # %bb.0: # %entry
+; CHECK-S-NEXT:    mflr r0
+; CHECK-S-NEXT:    std r0, 16(r1)
+; CHECK-S-NEXT:    stdu r1, -32(r1)
+; CHECK-S-NEXT:    std r2, 24(r1)
+; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-S-NEXT:    .cfi_offset lr, 16
+; CHECK-S-NEXT:    addis r5, r2, indirectCall at toc@ha
+; CHECK-S-NEXT:    ld r12, indirectCall at toc@l(r5)
+; CHECK-S-NEXT:    add r3, r4, r3
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    mtctr r12
+; CHECK-S-NEXT:    bctrl
+; CHECK-S-NEXT:    ld 2, 24(r1)
+; CHECK-S-NEXT:    addis r4, r2, .LC0 at toc@ha
+; CHECK-S-NEXT:    ld r4, .LC0 at toc@l(r4)
+; CHECK-S-NEXT:    lwz r4, 0(r4)
+; CHECK-S-NEXT:    mullw r3, r4, r3
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    addi r1, r1, 32
+; CHECK-S-NEXT:    ld r0, 16(r1)
+; CHECK-S-NEXT:    mtlr r0
+; CHECK-S-NEXT:    blr
+entry:
+  %add = add nsw i32 %b, %a
+  %0 = load i32 (i32)*, i32 (i32)** @indirectCall, align 8
+  %call = tail call signext i32 %0(i32 signext %add)
+  %1 = load i32, i32* @externGlobalVar, align 4
+  %mul = mul nsw i32 %1, %call
+  ret i32 %mul
+}
+
+define dso_local signext i32 @IndirectCall3(i32 signext %a, i32 signext %b, i32 (i32)* nocapture %call_param) local_unnamed_addr {
+; CHECK-S-LABEL: IndirectCall3:
+; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep15 at ha
+; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep15 at l
+; CHECK-S:         .localentry     IndirectCall3, .Lfunc_lep15-.Lfunc_gep15
+; CHECK-S:       # %bb.0: # %entry
+; CHECK-S-NEXT:    mflr r0
+; CHECK-S-NEXT:    std r0, 16(r1)
+; CHECK-S-NEXT:    stdu r1, -32(r1)
+; CHECK-S-NEXT:    std r2, 24(r1)
+; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-S-NEXT:    .cfi_offset lr, 16
+; CHECK-S-NEXT:    add r3, r4, r3
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    mtctr r5
+; CHECK-S-NEXT:    mr r12, r5
+; CHECK-S-NEXT:    bctrl
+; CHECK-S-NEXT:    ld 2, 24(r1)
+; CHECK-S-NEXT:    addis r4, r2, globalVar at toc@ha
+; CHECK-S-NEXT:    lwz r4, globalVar at toc@l(r4)
+; CHECK-S-NEXT:    mullw r3, r4, r3
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    addi r1, r1, 32
+; CHECK-S-NEXT:    ld r0, 16(r1)
+; CHECK-S-NEXT:    mtlr r0
+; CHECK-S-NEXT:    blr
+entry:
+  %add = add nsw i32 %b, %a
+  %call = tail call signext i32 %call_param(i32 signext %add)
+  %0 = load i32, i32* @globalVar, align 4
+  %mul = mul nsw i32 %0, %call
+  ret i32 %mul
+}
+
+define dso_local signext i32 @IndirectCallNoGlobal(i32 signext %a, i32 signext %b, i32 (i32)* nocapture %call_param) local_unnamed_addr {
+; CHECK-S-LABEL: IndirectCallNoGlobal:
+; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep16 at ha
+; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep16 at l
+; CHECK-S:         .localentry     IndirectCallNoGlobal, .Lfunc_lep16-.Lfunc_gep16
+; CHECK-S:       # %bb.0: # %entry
+; CHECK-S-NEXT:    mflr r0
+; CHECK-S-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-S-NEXT:    .cfi_offset lr, 16
+; CHECK-S-NEXT:    .cfi_offset r30, -16
+; CHECK-S-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-S-NEXT:    std r0, 16(r1)
+; CHECK-S-NEXT:    stdu r1, -48(r1)
+; CHECK-S-NEXT:    mtctr r5
+; CHECK-S-NEXT:    mr r12, r5
+; CHECK-S-NEXT:    std r2, 24(r1)
+; CHECK-S-NEXT:    mr r30, r4
+; CHECK-S-NEXT:    bctrl
+; CHECK-S-NEXT:    ld 2, 24(r1)
+; CHECK-S-NEXT:    add r3, r3, r30
+; CHECK-S-NEXT:    extsw r3, r3
+; CHECK-S-NEXT:    addi r1, r1, 48
+; CHECK-S-NEXT:    ld r0, 16(r1)
+; CHECK-S-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-S-NEXT:    mtlr r0
+; CHECK-S-NEXT:    blr
+entry:
+  %call = tail call signext i32 %call_param(i32 signext %a)
+  %add = add nsw i32 %call, %b
+  ret i32 %add
+}
+
+define dso_local signext i32 @IndirectCallOnly(i32 signext %a, i32 (i32)* nocapture %call_param) local_unnamed_addr {
+; CHECK-S-LABEL: IndirectCallOnly:
+; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep17 at ha
+; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep17 at l
+; CHECK-S:         .localentry     IndirectCallOnly, .Lfunc_lep17-.Lfunc_gep17
+; CHECK-S:       # %bb.0: # %entry
+; CHECK-S-NEXT:    mflr r0
+; CHECK-S-NEXT:    std r0, 16(r1)
+; CHECK-S-NEXT:    stdu r1, -32(r1)
+; CHECK-S-NEXT:    std r2, 24(r1)
+; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-S-NEXT:    .cfi_offset lr, 16
+; CHECK-S-NEXT:    mtctr r4
+; CHECK-S-NEXT:    mr r12, r4
+; CHECK-S-NEXT:    bctrl
+; CHECK-S-NEXT:    ld 2, 24(r1)
+; CHECK-S-NEXT:    addi r1, r1, 32
+; CHECK-S-NEXT:    ld r0, 16(r1)
+; CHECK-S-NEXT:    mtlr r0
+; CHECK-S-NEXT:    blr
+entry:
+  %call = tail call signext i32 %call_param(i32 signext %a)
+  ret i32 %call
+}
+
+attributes #0 = { noinline }
+

diff  --git a/llvm/test/MC/PowerPC/ppc64-localentry-error1.s b/llvm/test/MC/PowerPC/ppc64-localentry-error1.s
index c028da8ce619..3c54606bd15a 100644
--- a/llvm/test/MC/PowerPC/ppc64-localentry-error1.s
+++ b/llvm/test/MC/PowerPC/ppc64-localentry-error1.s
@@ -1,11 +1,11 @@
 
-# RUN: not --crash llvm-mc -triple powerpc64-unknown-unknown -filetype=obj < %s 2> %t
+# RUN: not llvm-mc -triple powerpc64-unknown-unknown -filetype=obj < %s 2> %t
 # RUN: FileCheck < %t %s
-# RUN: not --crash llvm-mc -triple powerpc64le-unknown-unknown -filetype=obj < %s 2> %t
+# RUN: not llvm-mc -triple powerpc64le-unknown-unknown -filetype=obj < %s 2> %t
 # RUN: FileCheck < %t %s
 
 sym:
 	.localentry sym, 123
 
-# CHECK: LLVM ERROR: .localentry expression cannot be encoded.
+# CHECK: error: .localentry expression is not a valid power of 2.
 

diff  --git a/llvm/test/MC/PowerPC/ppc64-localentry-error2.s b/llvm/test/MC/PowerPC/ppc64-localentry-error2.s
index 89a30ee4c8ac..8603dfb37c63 100644
--- a/llvm/test/MC/PowerPC/ppc64-localentry-error2.s
+++ b/llvm/test/MC/PowerPC/ppc64-localentry-error2.s
@@ -1,12 +1,12 @@
 
-# RUN: not --crash llvm-mc -triple powerpc64-unknown-unknown -filetype=obj < %s 2> %t
+# RUN: not llvm-mc -triple powerpc64-unknown-unknown -filetype=obj < %s 2> %t
 # RUN: FileCheck < %t %s
-# RUN: not --crash llvm-mc -triple powerpc64le-unknown-unknown -filetype=obj < %s 2> %t
+# RUN: not llvm-mc -triple powerpc64le-unknown-unknown -filetype=obj < %s 2> %t
 # RUN: FileCheck < %t %s
 
 	.globl remote_sym
 sym:
 	.localentry sym, remote_sym
 
-# CHECK: LLVM ERROR: .localentry expression must be absolute.
+# CHECK: error: .localentry expression must be absolute.
 


        


More information about the llvm-commits mailing list