[llvm] a60251d - [PowerPC] Add linker opt for PC Relative GOT indirect accesses

Stefan Pintilie via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 22 07:14:20 PDT 2020


Author: Stefan Pintilie
Date: 2020-07-22T09:08:23-05:00
New Revision: a60251d739b97d506bbb9ea71d5f8ac9a3c155ce

URL: https://github.com/llvm/llvm-project/commit/a60251d739b97d506bbb9ea71d5f8ac9a3c155ce
DIFF: https://github.com/llvm/llvm-project/commit/a60251d739b97d506bbb9ea71d5f8ac9a3c155ce.diff

LOG: [PowerPC] Add linker opt for PC Relative GOT indirect accesses

A linker optimization is available on PowerPC for GOT indirect PCRelative loads.

The idea is that we can mark a usual GOT indirect load:

pld 3, vec at got@pcrel(0), 1
lwa 3, 4(3)

With a relocation to say that if we don't need to go through the GOT we can let
the linker further optimize this and replace a load with a nop.

  pld 3, vec at got@pcrel(0), 1
.Lpcrel1:
.reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
  lwa 3, 4(3)

This patch adds the logic that allows the compiler to add the R_PPC64_PCREL_OPT.

Reviewers: nemanjai, lei, hfinkel, sfertile, efriedma, tstellar, grosbach

Reviewed By: nemanjai

Differential Revision: https://reviews.llvm.org/D79864

Added: 
    llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll

Modified: 
    llvm/include/llvm/MC/MCExpr.h
    llvm/lib/MC/MCExpr.cpp
    llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
    llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
    llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
    llvm/lib/Target/PowerPC/PPC.h
    llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
    llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
    llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
    llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
    llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
    llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll
    llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
    llvm/test/CodeGen/PowerPC/pcrel.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h
index 803c0d443bee..a424957bb0b2 100644
--- a/llvm/include/llvm/MC/MCExpr.h
+++ b/llvm/include/llvm/MC/MCExpr.h
@@ -302,6 +302,7 @@ class MCSymbolRefExpr : public MCExpr {
     VK_PPC_TLSLD,           // symbol at tlsld
     VK_PPC_LOCAL,           // symbol at local
     VK_PPC_NOTOC,           // symbol at notoc
+    VK_PPC_PCREL_OPT,       // .reloc expr, R_PPC64_PCREL_OPT, expr
 
     VK_COFF_IMGREL32, // symbol at imgrel (image-relative)
 

diff  --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index 9b78eca35caa..11fd32e48a4a 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -324,6 +324,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_PPC_TLSLD: return "tlsld";
   case VK_PPC_LOCAL: return "local";
   case VK_PPC_NOTOC: return "notoc";
+  case VK_PPC_PCREL_OPT: return "<<invalid>>";
   case VK_COFF_IMGREL32: return "IMGREL";
   case VK_Hexagon_LO16: return "LO16";
   case VK_Hexagon_HI16: return "HI16";

diff  --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
index 4373778cc96c..386d59266096 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
@@ -20,6 +20,7 @@
 
 
 #include "PPCELFStreamer.h"
+#include "PPCFixupKinds.h"
 #include "PPCInstrInfo.h"
 #include "PPCMCCodeEmitter.h"
 #include "llvm/BinaryFormat/ELF.h"
@@ -89,12 +90,33 @@ void PPCELFStreamer::emitInstruction(const MCInst &Inst,
   PPCMCCodeEmitter *Emitter =
       static_cast<PPCMCCodeEmitter*>(getAssembler().getEmitterPtr());
 
+  // If the instruction is a part of the GOT to PC-Rel link time optimization
+  // instruction pair, return a value, otherwise return None. A true returned
+  // value means the instruction is the PLDpc and a false value means it is
+  // the user instruction.
+  Optional<bool> IsPartOfGOTToPCRelPair = isPartOfGOTToPCRelPair(Inst, STI);
+
+  // User of the GOT-indirect address.
+  // For example, the load that will get the relocation as follows:
+  // .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
+  //  lwa 3, 4(3)
+  if (IsPartOfGOTToPCRelPair.hasValue() && !IsPartOfGOTToPCRelPair.getValue())
+    emitGOTToPCRelReloc(Inst);
+
   // Special handling is only for prefixed instructions.
   if (!Emitter->isPrefixedInstruction(Inst)) {
     MCELFStreamer::emitInstruction(Inst, STI);
     return;
   }
   emitPrefixedInstruction(Inst, STI);
+
+  // Producer of the GOT-indirect address.
+  // For example, the prefixed load from the got that will get the label as
+  // follows:
+  //  pld 3, vec at got@pcrel(0), 1
+  // .Lpcrel1:
+  if (IsPartOfGOTToPCRelPair.hasValue() && IsPartOfGOTToPCRelPair.getValue())
+    emitGOTToPCRelLabel(Inst);
 }
 
 void PPCELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
@@ -103,6 +125,102 @@ void PPCELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
   MCELFStreamer::emitLabel(Symbol);
 }
 
+// This linker time GOT PC Relative optimization relocation will look like this:
+//   pld <reg> symbol at got@pcrel
+// <Label###>:
+//   .reloc Label###-8,R_PPC64_PCREL_OPT,.-(Label###-8)
+//   load <loadedreg>, 0(<reg>)
+// The reason we place the label after the PLDpc instruction is that there
+// may be an alignment nop before it since prefixed instructions must not
+// cross a 64-byte boundary (please see
+// PPCELFStreamer::emitPrefixedInstruction()). When referring to the
+// label, we subtract the width of a prefixed instruction (8 bytes) to ensure
+// we refer to the PLDpc.
+void PPCELFStreamer::emitGOTToPCRelReloc(const MCInst &Inst) {
+  // Get the last operand which contains the symbol.
+  const MCOperand &Operand = Inst.getOperand(Inst.getNumOperands() - 1);
+  assert(Operand.isExpr() && "Expecting an MCExpr.");
+  // Cast the last operand to MCSymbolRefExpr to get the symbol.
+  const MCExpr *Expr = Operand.getExpr();
+  const MCSymbolRefExpr *SymExpr = static_cast<const MCSymbolRefExpr *>(Expr);
+  assert(SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT &&
+         "Expecting a symbol of type VK_PPC_PCREL_OPT");
+  MCSymbol *LabelSym =
+      getContext().getOrCreateSymbol(SymExpr->getSymbol().getName());
+  const MCExpr *LabelExpr = MCSymbolRefExpr::create(LabelSym, getContext());
+  const MCExpr *Eight = MCConstantExpr::create(8, getContext());
+  // SubExpr is just Label###-8
+  const MCExpr *SubExpr =
+      MCBinaryExpr::createSub(LabelExpr, Eight, getContext());
+  MCSymbol *CurrentLocation = getContext().createTempSymbol();
+  const MCExpr *CurrentLocationExpr =
+      MCSymbolRefExpr::create(CurrentLocation, getContext());
+  // SubExpr2 is .-(Label###-8)
+  const MCExpr *SubExpr2 =
+      MCBinaryExpr::createSub(CurrentLocationExpr, SubExpr, getContext());
+
+  MCDataFragment *DF = static_cast<MCDataFragment *>(LabelSym->getFragment());
+  assert(DF && "Expecting a valid data fragment.");
+  MCFixupKind FixupKind = static_cast<MCFixupKind>(FirstLiteralRelocationKind +
+                                                   ELF::R_PPC64_PCREL_OPT);
+  DF->getFixups().push_back(
+      MCFixup::create(LabelSym->getOffset() - 8, SubExpr2,
+                      FixupKind, Inst.getLoc()));
+  emitLabel(CurrentLocation, Inst.getLoc());
+}
+
+// Emit the label that immediately follows the PLDpc for a link time GOT PC Rel
+// optimization.
+void PPCELFStreamer::emitGOTToPCRelLabel(const MCInst &Inst) {
+  // Get the last operand which contains the symbol.
+  const MCOperand &Operand = Inst.getOperand(Inst.getNumOperands() - 1);
+  assert(Operand.isExpr() && "Expecting an MCExpr.");
+  // Cast the last operand to MCSymbolRefExpr to get the symbol.
+  const MCExpr *Expr = Operand.getExpr();
+  const MCSymbolRefExpr *SymExpr = static_cast<const MCSymbolRefExpr *>(Expr);
+  assert(SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT &&
+         "Expecting a symbol of type VK_PPC_PCREL_OPT");
+  MCSymbol *LabelSym =
+      getContext().getOrCreateSymbol(SymExpr->getSymbol().getName());
+  emitLabel(LabelSym, Inst.getLoc());
+}
+
+// This funciton checks if the parameter Inst is part of the setup for a link
+// time GOT PC Relative optimization. For example in this situation:
+// <MCInst PLDpc <MCOperand Reg:282> <MCOperand Expr:(glob_double at got@pcrel)>
+//   <MCOperand Imm:0> <MCOperand Expr:(.Lpcrel@<<invalid>>)>>
+// <MCInst SOME_LOAD <MCOperand Reg:22> <MCOperand Imm:0> <MCOperand Reg:282>
+//   <MCOperand Expr:(.Lpcrel@<<invalid>>)>>
+// The above is a pair of such instructions and this function will not return
+// None for either one of them. In both cases we are looking for the last
+// operand <MCOperand Expr:(.Lpcrel@<<invalid>>)> which needs to be an MCExpr
+// and has the flag MCSymbolRefExpr::VK_PPC_PCREL_OPT. After that we just look
+// at the opcode and in the case of PLDpc we will return true. For the load
+// (or store) this function will return false indicating it has found the second
+// instruciton in the pair.
+Optional<bool> llvm::isPartOfGOTToPCRelPair(const MCInst &Inst,
+                                            const MCSubtargetInfo &STI) {
+  // Need at least two operands.
+  if (Inst.getNumOperands() < 2)
+    return None;
+
+  unsigned LastOp = Inst.getNumOperands() - 1;
+  // The last operand needs to be an MCExpr and it needs to have a variant kind
+  // of VK_PPC_PCREL_OPT. If it does not satisfy these conditions it is not a
+  // link time GOT PC Rel opt instruction and we can ignore it and return None.
+  const MCOperand &Operand = Inst.getOperand(LastOp);
+  if (!Operand.isExpr())
+    return None;
+
+  // Check for the variant kind VK_PPC_PCREL_OPT in this expression.
+  const MCExpr *Expr = Operand.getExpr();
+  const MCSymbolRefExpr *SymExpr = static_cast<const MCSymbolRefExpr *>(Expr);
+  if (!SymExpr || SymExpr->getKind() != MCSymbolRefExpr::VK_PPC_PCREL_OPT)
+    return None;
+
+  return (Inst.getOpcode() == PPC::PLDpc);
+}
+
 MCELFStreamer *llvm::createPPCELFStreamer(
     MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
     std::unique_ptr<MCObjectWriter> OW,

diff  --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
index 51863232d071..f44200104f32 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
@@ -43,8 +43,15 @@ class PPCELFStreamer : public MCELFStreamer {
   void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
 private:
   void emitPrefixedInstruction(const MCInst &Inst, const MCSubtargetInfo &STI);
+  void emitGOTToPCRelReloc(const MCInst &Inst);
+  void emitGOTToPCRelLabel(const MCInst &Inst);
 };
 
+// Check if the instruction Inst is part of a pair of instructions that make up
+// a link time GOT PC Rel optimization.
+Optional<bool> isPartOfGOTToPCRelPair(const MCInst &Inst,
+                                      const MCSubtargetInfo &STI);
+
 MCELFStreamer *createPPCELFStreamer(MCContext &Context,
                                     std::unique_ptr<MCAsmBackend> MAB,
                                     std::unique_ptr<MCObjectWriter> OW,

diff  --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
index 16da62a74b8c..222bf2fa8283 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@@ -92,6 +92,36 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
     return;
   }
 
+  // Check if the last operand is an expression with the variant kind
+  // VK_PPC_PCREL_OPT. If this is the case then this is a linker optimization
+  // relocation and the .reloc directive needs to be added.
+  unsigned LastOp = MI->getNumOperands() - 1;
+  if (MI->getNumOperands() > 1) {
+    const MCOperand &Operand = MI->getOperand(LastOp);
+    if (Operand.isExpr()) {
+      const MCExpr *Expr = Operand.getExpr();
+      const MCSymbolRefExpr *SymExpr =
+          static_cast<const MCSymbolRefExpr *>(Expr);
+
+      if (SymExpr && SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT) {
+        const MCSymbol &Symbol = SymExpr->getSymbol();
+        if (MI->getOpcode() == PPC::PLDpc) {
+          printInstruction(MI, Address, O);
+          O << "\n";
+          Symbol.print(O, &MAI);
+          O << ":";
+          return;
+        } else {
+          O << "\t.reloc ";
+          Symbol.print(O, &MAI);
+          O << "-8,R_PPC64_PCREL_OPT,.-(";
+          Symbol.print(O, &MAI);
+          O << "-8)\n";
+        }
+      }
+    }
+  }
+
   // Check for slwi/srwi mnemonics.
   if (MI->getOpcode() == PPC::RLWINM) {
     unsigned char SH = MI->getOperand(2).getImm();

diff  --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index 7e0aa2c6061d..3106290442af 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -107,6 +107,10 @@ namespace llvm {
     /// produce the relocation @got at pcrel. Fixup is VK_PPC_GOT_PCREL.
     MO_GOT_FLAG = 8,
 
+    // MO_PCREL_OPT_FLAG - If this bit is set the operand is part of a
+    // PC Relative linker optimization.
+    MO_PCREL_OPT_FLAG = 16,
+
     /// The next are not flags but distinct values.
     MO_ACCESS_MASK = 0xf00,
 

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 6b822a803132..99e25bb130ce 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2283,7 +2283,8 @@ PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
       {MO_PLT, "ppc-plt"},
       {MO_PIC_FLAG, "ppc-pic"},
       {MO_PCREL_FLAG, "ppc-pcrel"},
-      {MO_GOT_FLAG, "ppc-got"}};
+      {MO_GOT_FLAG, "ppc-got"},
+      {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"}};
   return makeArrayRef(TargetFlags);
 }
 

diff  --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 236f98f32e18..1f51a55b1683 100644
--- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -100,6 +100,8 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
         MIOpcode == PPC::BL8_NOTOC) {
       RefKind = MCSymbolRefExpr::VK_PPC_NOTOC;
     }
+    if (MO.getTargetFlags() == PPCII::MO_PCREL_OPT_FLAG)
+      RefKind = MCSymbolRefExpr::VK_PPC_PCREL_OPT;
   }
 
   const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx);

diff  --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 4ea714ff15f7..65539166e5ef 100644
--- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -21,8 +21,8 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
 
 using namespace llvm;
@@ -43,6 +43,46 @@ RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
                    cl::desc("Run pre-emit peephole optimizations."));
 
 namespace {
+
+static bool hasPCRelativeForm(MachineInstr &Use) {
+  switch (Use.getOpcode()) {
+  default:
+    return false;
+  case PPC::LBZ:
+  case PPC::LBZ8:
+  case PPC::LHA:
+  case PPC::LHA8:
+  case PPC::LHZ:
+  case PPC::LHZ8:
+  case PPC::LWZ:
+  case PPC::LWZ8:
+  case PPC::STB:
+  case PPC::STB8:
+  case PPC::STH:
+  case PPC::STH8:
+  case PPC::STW:
+  case PPC::STW8:
+  case PPC::LD:
+  case PPC::STD:
+  case PPC::LWA:
+  case PPC::LXSD:
+  case PPC::LXSSP:
+  case PPC::LXV:
+  case PPC::STXSD:
+  case PPC::STXSSP:
+  case PPC::STXV:
+  case PPC::LFD:
+  case PPC::LFS:
+  case PPC::STFD:
+  case PPC::STFS:
+  case PPC::DFLOADf32:
+  case PPC::DFLOADf64:
+  case PPC::DFSTOREf32:
+  case PPC::DFSTOREf64:
+    return true;
+  }
+}
+
   class PPCPreEmitPeephole : public MachineFunctionPass {
   public:
     static char ID;
@@ -172,6 +212,135 @@ namespace {
       return !InstrsToErase.empty();
     }
 
+    // Check if this instruction is a PLDpc that is part of a GOT indirect
+    // access.
+    bool isGOTPLDpc(MachineInstr &Instr) {
+      if (Instr.getOpcode() != PPC::PLDpc)
+        return false;
+
+      // The result must be a register.
+      const MachineOperand &LoadedAddressReg = Instr.getOperand(0);
+      if (!LoadedAddressReg.isReg())
+        return false;
+
+      // Make sure that this is a global symbol.
+      const MachineOperand &SymbolOp = Instr.getOperand(1);
+      if (!SymbolOp.isGlobal())
+        return false;
+
+      // Finally return true only if the GOT flag is present.
+      return (SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG);
+    }
+
+    bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) {
+      MachineFunction *MF = MBB.getParent();
+      // Add this linker opt only if we are using PC Relative memops.
+      if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls())
+        return false;
+
+      // Struct to keep track of one def/use pair for a GOT indirect access.
+      struct GOTDefUsePair {
+        MachineBasicBlock::iterator DefInst;
+        MachineBasicBlock::iterator UseInst;
+        Register DefReg;
+        Register UseReg;
+        bool StillValid;
+      };
+      // Vector of def/ues pairs in this basic block.
+      SmallVector<GOTDefUsePair, 4> CandPairs;
+      SmallVector<GOTDefUsePair, 4> ValidPairs;
+      bool MadeChange = false;
+
+      // Run through all of the instructions in the basic block and try to
+      // collect potential pairs of GOT indirect access instructions.
+      for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
+        // Look for the initial GOT indirect load.
+        if (isGOTPLDpc(*BBI)) {
+          GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(),
+                                    BBI->getOperand(0).getReg(),
+                                    PPC::NoRegister, true};
+          CandPairs.push_back(CurrentPair);
+          continue;
+        }
+
+        // We haven't encountered any new PLD instructions, nothing to check.
+        if (CandPairs.empty())
+          continue;
+
+        // Run through the candidate pairs and see if any of the registers
+        // defined in the PLD instructions are used by this instruction.
+        // Note: the size of CandPairs can change in the loop.
+        for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) {
+          GOTDefUsePair &Pair = CandPairs[Idx];
+          // The instruction does not use or modify this PLD's def reg,
+          // ignore it.
+          if (!BBI->readsRegister(Pair.DefReg, TRI) &&
+              !BBI->modifiesRegister(Pair.DefReg, TRI))
+            continue;
+
+          // The use needs to be used in the address compuation and not
+          // as the register being stored for a store.
+          const MachineOperand *UseOp =
+              hasPCRelativeForm(*BBI) ? &BBI->getOperand(2) : nullptr;
+
+          // Check for a valid use.
+          if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg &&
+              UseOp->isUse() && UseOp->isKill()) {
+            Pair.UseInst = BBI;
+            Pair.UseReg = BBI->getOperand(0).getReg();
+            ValidPairs.push_back(Pair);
+          }
+          CandPairs.erase(CandPairs.begin() + Idx);
+        }
+      }
+
+      // Go through all of the pairs and check for any more valid uses.
+      for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) {
+        // We shouldn't be here if we don't have a valid pair.
+        assert(Pair->UseInst.isValid() && Pair->StillValid &&
+               "Kept an invalid def/use pair for GOT PCRel opt");
+        // We have found a potential pair. Search through the instructions
+        // between the def and the use to see if it is valid to mark this as a
+        // linker opt.
+        MachineBasicBlock::iterator BBI = Pair->DefInst;
+        ++BBI;
+        for (; BBI != Pair->UseInst; ++BBI) {
+          if (BBI->readsRegister(Pair->UseReg, TRI) ||
+              BBI->modifiesRegister(Pair->UseReg, TRI)) {
+            Pair->StillValid = false;
+            break;
+          }
+        }
+
+        if (!Pair->StillValid)
+          continue;
+
+        // The load/store instruction that uses the address from the PLD will
+        // either use a register (for a store) or define a register (for the
+        // load). That register will be added as an implicit def to the PLD
+        // and as an implicit use on the second memory op. This is a precaution
+        // to prevent future passes from using that register between the two
+        // instructions.
+        MachineOperand ImplDef =
+            MachineOperand::CreateReg(Pair->UseReg, true, true);
+        MachineOperand ImplUse =
+            MachineOperand::CreateReg(Pair->UseReg, false, true);
+        Pair->DefInst->addOperand(ImplDef);
+        Pair->UseInst->addOperand(ImplUse);
+
+        // Create the symbol.
+        MCContext &Context = MF->getContext();
+        MCSymbol *Symbol =
+            Context.createTempSymbol(Twine("pcrel"), false, false);
+        MachineOperand PCRelLabel =
+            MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG);
+        Pair->DefInst->addOperand(*MF, PCRelLabel);
+        Pair->UseInst->addOperand(*MF, PCRelLabel);
+        MadeChange |= true;
+      }
+      return MadeChange;
+    }
+
     bool runOnMachineFunction(MachineFunction &MF) override {
       if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
         // Remove UNENCODED_NOP even when this pass is disabled.
@@ -192,6 +361,7 @@ namespace {
       SmallVector<MachineInstr *, 4> InstrsToErase;
       for (MachineBasicBlock &MBB : MF) {
         Changed |= removeRedundantLIs(MBB, TRI);
+        Changed |= addLinkerOpt(MBB, TRI);
         for (MachineInstr &MI : MBB) {
           unsigned Opc = MI.getOpcode();
           if (Opc == PPC::UNENCODED_NOP) {

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
index 1bad2e726341..0a4f2f38c816 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
@@ -67,6 +67,8 @@ define dso_local signext i32 @DirectCallLocal2(i32 signext %a, i32 signext %b) l
 ; CHECK-S-NEXT:    extsw r3, r3
 ; CHECK-S-NEXT:    bl localCall at notoc
 ; CHECK-S-NEXT:    pld r4, externGlobalVar at got@pcrel(0), 1
+; CHECK-S-NEXT: .Lpcrel:
+; CHECK-S-NEXT:    .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8)
 ; CHECK-S-NEXT:    lwz r4, 0(r4)
 ; CHECK-S-NEXT:    mullw r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
@@ -150,6 +152,8 @@ define dso_local signext i32 @DirectCallExtern2(i32 signext %a, i32 signext %b)
 ; CHECK-S-NEXT:    extsw r3, r3
 ; CHECK-S-NEXT:    bl externCall at notoc
 ; CHECK-S-NEXT:    pld r4, externGlobalVar at got@pcrel(0), 1
+; CHECK-S-NEXT:  .Lpcrel0:
+; CHECK-S-NEXT:    .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
 ; CHECK-S-NEXT:    lwz r4, 0(r4)
 ; CHECK-S-NEXT:    mullw r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
@@ -212,6 +216,8 @@ define dso_local signext i32 @TailCallLocal2(i32 signext %a) local_unnamed_addr
 ; CHECK-S:         .localentry     TailCallLocal2
 ; CHECK-S:       # %bb.0: # %entry
 ; CHECK-S:         pld r4, externGlobalVar at got@pcrel(0), 1
+; CHECK-S-NEXT:  .Lpcrel1:
+; CHECK-S-NEXT:    .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
 ; CHECK-S-NEXT:    lwz r4, 0(r4)
 ; CHECK-S-NEXT:    add r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
@@ -254,6 +260,8 @@ define dso_local signext i32 @TailCallExtern2(i32 signext %a) local_unnamed_addr
 ; CHECK-S:         .localentry     TailCallExtern2
 ; CHECK-S:       # %bb.0: # %entry
 ; CHECK-S:         pld r4, externGlobalVar at got@pcrel(0), 1
+; CHECK-S-NEXT:  .Lpcrel2:
+; CHECK-S-NEXT:    .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8)
 ; CHECK-S-NEXT:    lwz r4, 0(r4)
 ; CHECK-S-NEXT:    add r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
@@ -319,6 +327,8 @@ define dso_local signext i32 @IndirectCall2(i32 signext %a, i32 signext %b) loca
 ; CHECK-S-NEXT:    mtctr r12
 ; CHECK-S-NEXT:    bctrl
 ; CHECK-S-NEXT:    pld r4, externGlobalVar at got@pcrel(0), 1
+; CHECK-S-NEXT:  .Lpcrel3:
+; CHECK-S-NEXT:    .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8)
 ; CHECK-S-NEXT:    lwz r4, 0(r4)
 ; CHECK-S-NEXT:    mullw r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
index 625d91d2eb5b..4d61b66d3bb7 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
@@ -23,12 +23,16 @@ define dso_local signext i32 @ReadGlobalVarChar() local_unnamed_addr  {
 ; LE-LABEL: ReadGlobalVarChar:
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    pld r3, valChar at got@pcrel(0), 1
+; LE-NEXT:  .Lpcrel:
+; LE-NEXT:    .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8)
 ; LE-NEXT:    lbz r3, 0(r3)
 ; LE-NEXT:    blr
 ;
 ; BE-LABEL: ReadGlobalVarChar:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    pld r3, valChar at got@pcrel(0), 1
+; BE-NEXT:  .Lpcrel:
+; BE-NEXT:    .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8)
 ; BE-NEXT:    lbz r3, 0(r3)
 ; BE-NEXT:    blr
 entry:
@@ -60,12 +64,16 @@ define dso_local signext i32 @ReadGlobalVarShort() local_unnamed_addr  {
 ; LE-LABEL: ReadGlobalVarShort:
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    pld r3, valShort at got@pcrel(0), 1
+; LE-NEXT:  .Lpcrel0:
+; LE-NEXT:    .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
 ; LE-NEXT:    lha r3, 0(r3)
 ; LE-NEXT:    blr
 ;
 ; BE-LABEL: ReadGlobalVarShort:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    pld r3, valShort at got@pcrel(0), 1
+; BE-NEXT:  .Lpcrel0:
+; BE-NEXT:    .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
 ; BE-NEXT:    lha r3, 0(r3)
 ; BE-NEXT:    blr
 entry:
@@ -97,12 +105,16 @@ define dso_local signext i32 @ReadGlobalVarInt() local_unnamed_addr  {
 ; LE-LABEL: ReadGlobalVarInt:
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    pld r3, valInt at got@pcrel(0), 1
+; LE-NEXT:  .Lpcrel1:
+; LE-NEXT:    .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
 ; LE-NEXT:    lwa r3, 0(r3)
 ; LE-NEXT:    blr
 ;
 ; BE-LABEL: ReadGlobalVarInt:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    pld r3, valInt at got@pcrel(0), 1
+; BE-NEXT:  .Lpcrel1:
+; BE-NEXT:    .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
 ; BE-NEXT:    lwa r3, 0(r3)
 ; BE-NEXT:    blr
 entry:
@@ -133,12 +145,16 @@ define dso_local signext i32 @ReadGlobalVarUnsigned() local_unnamed_addr  {
 ; LE-LABEL: ReadGlobalVarUnsigned:
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    pld r3, valUnsigned at got@pcrel(0), 1
+; LE-NEXT:  .Lpcrel2:
+; LE-NEXT:    .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8)
 ; LE-NEXT:    lwa r3, 0(r3)
 ; LE-NEXT:    blr
 ;
 ; BE-LABEL: ReadGlobalVarUnsigned:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    pld r3, valUnsigned at got@pcrel(0), 1
+; BE-NEXT:  .Lpcrel2:
+; BE-NEXT:    .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8)
 ; BE-NEXT:    lwa r3, 0(r3)
 ; BE-NEXT:    blr
 entry:
@@ -169,12 +185,16 @@ define dso_local signext i32 @ReadGlobalVarLong() local_unnamed_addr  {
 ; LE-LABEL: ReadGlobalVarLong:
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    pld r3, valLong at got@pcrel(0), 1
+; LE-NEXT:  .Lpcrel3:
+; LE-NEXT:    .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8)
 ; LE-NEXT:    lwa r3, 0(r3)
 ; LE-NEXT:    blr
 ;
 ; BE-LABEL: ReadGlobalVarLong:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    pld r3, valLong at got@pcrel(0), 1
+; BE-NEXT:  .Lpcrel3:
+; BE-NEXT:    .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8)
 ; BE-NEXT:    lwa r3, 4(r3)
 ; BE-NEXT:    blr
 entry:
@@ -206,12 +226,16 @@ define dso_local i32* @ReadGlobalPtr() local_unnamed_addr  {
 ; LE-LABEL: ReadGlobalPtr:
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    pld r3, ptr at got@pcrel(0), 1
+; LE-NEXT:  .Lpcrel4:
+; LE-NEXT:    .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8)
 ; LE-NEXT:    ld r3, 0(r3)
 ; LE-NEXT:    blr
 ;
 ; BE-LABEL: ReadGlobalPtr:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    pld r3, ptr at got@pcrel(0), 1
+; BE-NEXT:  .Lpcrel4:
+; BE-NEXT:    .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8)
 ; BE-NEXT:    ld r3, 0(r3)
 ; BE-NEXT:    blr
 entry:
@@ -223,7 +247,9 @@ define dso_local void @WriteGlobalPtr() local_unnamed_addr  {
 ; LE-LABEL: WriteGlobalPtr:
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    pld r3, ptr at got@pcrel(0), 1
+; LE-NEXT:  .Lpcrel5:
 ; LE-NEXT:    li r4, 3
+; LE-NEXT:    .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
 ; LE-NEXT:    ld r3, 0(r3)
 ; LE-NEXT:    stw r4, 0(r3)
 ; LE-NEXT:    blr
@@ -231,7 +257,9 @@ define dso_local void @WriteGlobalPtr() local_unnamed_addr  {
 ; BE-LABEL: WriteGlobalPtr:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    pld r3, ptr at got@pcrel(0), 1
+; BE-NEXT:  .Lpcrel5:
 ; BE-NEXT:    li r4, 3
+; BE-NEXT:    .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
 ; BE-NEXT:    ld r3, 0(r3)
 ; BE-NEXT:    stw r4, 0(r3)
 ; BE-NEXT:    blr
@@ -259,12 +287,16 @@ define dso_local signext i32 @ReadGlobalArray() local_unnamed_addr  {
 ; LE-LABEL: ReadGlobalArray:
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    pld r3, array at got@pcrel(0), 1
+; LE-NEXT:  .Lpcrel6:
+; LE-NEXT:    .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8)
 ; LE-NEXT:    lwa r3, 12(r3)
 ; LE-NEXT:    blr
 ;
 ; BE-LABEL: ReadGlobalArray:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    pld r3, array at got@pcrel(0), 1
+; BE-NEXT:  .Lpcrel6:
+; BE-NEXT:    .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8)
 ; BE-NEXT:    lwa r3, 12(r3)
 ; BE-NEXT:    blr
 entry:
@@ -295,12 +327,16 @@ define dso_local signext i32 @ReadGlobalStruct() local_unnamed_addr  {
 ; LE-LABEL: ReadGlobalStruct:
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    pld r3, structure at got@pcrel(0), 1
+; LE-NEXT:  .Lpcrel7:
+; LE-NEXT:    .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8)
 ; LE-NEXT:    lwa r3, 4(r3)
 ; LE-NEXT:    blr
 ;
 ; BE-LABEL: ReadGlobalStruct:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    pld r3, structure at got@pcrel(0), 1
+; BE-NEXT:  .Lpcrel7:
+; BE-NEXT:    .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8)
 ; BE-NEXT:    lwa r3, 4(r3)
 ; BE-NEXT:    blr
 entry:
@@ -332,6 +368,8 @@ define dso_local void @ReadFuncPtr() local_unnamed_addr  {
 ; LE:         .localentry ReadFuncPtr, 1
 ; LE-NEXT:  # %bb.0: # %entry
 ; LE-NEXT:    pld r3, ptrfunc at got@pcrel(0), 1
+; LE-NEXT:  .Lpcrel8:
+; LE-NEXT:    .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8)
 ; LE-NEXT:    ld r12, 0(r3)
 ; LE-NEXT:    mtctr r12
 ; LE-NEXT:    bctr
@@ -341,6 +379,8 @@ define dso_local void @ReadFuncPtr() local_unnamed_addr  {
 ; BE:         .localentry ReadFuncPtr, 1
 ; BE-NEXT:  # %bb.0: # %entry
 ; BE-NEXT:    pld r3, ptrfunc at got@pcrel(0), 1
+; BE-NEXT:  .Lpcrel8:
+; BE-NEXT:    .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8)
 ; BE-NEXT:    ld r12, 0(r3)
 ; BE-NEXT:    mtctr r12
 ; BE-NEXT:    bctr

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll b/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
new file mode 100644
index 000000000000..e878e7439911
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
@@ -0,0 +1,395 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN:   < %s | FileCheck %s
+
+; On future CPU with PC Relative addressing enabled, it is possible for the
+; linker to optimize GOT indirect accesses. In order for the linker to do this
+; the compiler needs to add a hint using the R_PPC64_PCREL_OPT relocation.
+; This test checks that the compiler adds the R_PPC64_PCREL_OPT relocation
+; correctly.
+
+ at input8 = external local_unnamed_addr global i8, align 1
+ at output8 = external local_unnamed_addr global i8, align 1
+ at input16 = external local_unnamed_addr global i16, align 2
+ at output16 = external local_unnamed_addr global i16, align 2
+ at input32 = external global i32, align 4
+ at output32 = external local_unnamed_addr global i32, align 4
+ at input64 = external local_unnamed_addr global i64, align 8
+ at output64 = external local_unnamed_addr global i64, align 8
+ at input128 = external local_unnamed_addr global i128, align 16
+ at output128 = external local_unnamed_addr global i128, align 16
+ at inputf32 = external local_unnamed_addr global float, align 4
+ at outputf32 = external local_unnamed_addr global float, align 4
+ at inputf64 = external local_unnamed_addr global double, align 8
+ at outputf64 = external local_unnamed_addr global double, align 8
+ at inputVi32 = external local_unnamed_addr global <4 x i32>, align 16
+ at outputVi32 = external local_unnamed_addr global <4 x i32>, align 16
+ at inputVi64 = external local_unnamed_addr global <2 x i64>, align 16
+ at outputVi64 = external local_unnamed_addr global <2 x i64>, align 16
+ at ArrayIn = external global [10 x i32], align 4
+ at ArrayOut = external local_unnamed_addr global [10 x i32], align 4
+ at IntPtrIn = external local_unnamed_addr global i32*, align 8
+ at IntPtrOut = external local_unnamed_addr global i32*, align 8
+ at FuncPtrIn = external local_unnamed_addr global void (...)*, align 8
+ at FuncPtrOut = external local_unnamed_addr global void (...)*, align 8
+
+define dso_local void @ReadWrite8() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWrite8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, input8 at got@pcrel(0), 1
+; CHECK-NEXT:  .Lpcrel:
+; CHECK-NEXT:    pld r4, output8 at got@pcrel(0), 1
+; CHECK-NEXT:    .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8)
+; CHECK-NEXT:    lbz r3, 0(r3)
+; In this test the stb r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lbz r3, 0(r3)
+; which is defined between the pld and the stb.
+; CHECK-NEXT:    stb r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i8, i8* @input8, align 1
+  store i8 %0, i8* @output8, align 1
+  ret void
+}
+
+define dso_local void @ReadWrite16() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWrite16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, input16 at got@pcrel(0), 1
+; CHECK-NEXT:  .Lpcrel0:
+; CHECK-NEXT:    pld r4, output16 at got@pcrel(0), 1
+; CHECK-NEXT:    .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
+; CHECK-NEXT:    lhz r3, 0(r3)
+; In this test the sth r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lhz r3, 0(r3)
+; which is defined between the pld and the sth.
+; CHECK-NEXT:    sth r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i16, i16* @input16, align 2
+  store i16 %0, i16* @output16, align 2
+  ret void
+}
+
+define dso_local void @ReadWrite32() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWrite32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, input32 at got@pcrel(0), 1
+; CHECK-NEXT:  .Lpcrel1:
+; CHECK-NEXT:    pld r4, output32 at got@pcrel(0), 1
+; CHECK-NEXT:    .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
+; CHECK-NEXT:    lwz r3, 0(r3)
+; CHECK-NEXT:    stw r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32, i32* @input32, align 4
+  store i32 %0, i32* @output32, align 4
+  ret void
+}
+
+define dso_local void @ReadWrite64() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWrite64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, input64 at got@pcrel(0), 1
+; CHECK-NEXT:  .Lpcrel2:
+; CHECK-NEXT:    pld r4, output64 at got@pcrel(0), 1
+; CHECK-NEXT:    .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8)
+; CHECK-NEXT:    ld r3, 0(r3)
+; CHECK-NEXT:    std r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i64, i64* @input64, align 8
+  store i64 %0, i64* @output64, align 8
+  ret void
+}
+
+; FIXME: we should always convert X-Form instructions that use
+; PPC::ZERO[8] to the corresponding D-Form so we can perform this opt.
+define dso_local void @ReadWrite128() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWrite128:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, input128 at got@pcrel(0), 1
+; CHECK-NEXT:    lxvx vs0, 0, r3
+; CHECK-NEXT:    pld r3, output128 at got@pcrel(0), 1
+; CHECK-NEXT:    stxvx vs0, 0, r3
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i128, i128* @input128, align 16
+  store i128 %0, i128* @output128, align 16
+  ret void
+}
+
+define dso_local void @ReadWritef32() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWritef32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, inputf32 at got@pcrel(0), 1
+; CHECK-NEXT:  .Lpcrel3:
+; CHECK-NEXT:    xxspltidp vs1, 1078103900
+; CHECK-NEXT:    .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8)
+; CHECK-NEXT:    lfs f0, 0(r3)
+; CHECK-NEXT:    pld r3, outputf32 at got@pcrel(0), 1
+; CHECK-NEXT:    xsaddsp f0, f0, f1
+; CHECK-NEXT:    stfs f0, 0(r3)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load float, float* @inputf32, align 4
+  %add = fadd float %0, 0x400851EB80000000
+  store float %add, float* @outputf32, align 4
+  ret void
+}
+
+define dso_local void @ReadWritef64() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWritef64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, inputf64 at got@pcrel(0), 1
+; CHECK-NEXT:  .Lpcrel4:
+; CHECK-NEXT:    plfd f1, .LCPI6_0 at PCREL(0), 1
+; CHECK-NEXT:    .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8)
+; CHECK-NEXT:    lfd f0, 0(r3)
+; CHECK-NEXT:    pld r3, outputf64 at got@pcrel(0), 1
+; CHECK-NEXT:    xsadddp f0, f0, f1
+; CHECK-NEXT:    stfd f0, 0(r3)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load double, double* @inputf64, align 8
+  %add = fadd double %0, 6.800000e+00
+  store double %add, double* @outputf64, align 8
+  ret void
+}
+
+; FIXME: we should always convert X-Form instructions that use
+; PPC::ZERO[8] to the corresponding D-Form so we can perform this opt.
+define dso_local void @ReadWriteVi32() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWriteVi32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, inputVi32 at got@pcrel(0), 1
+; CHECK-NEXT:    li r4, 45
+; CHECK-NEXT:    mtfprwz f1, r4
+; CHECK-NEXT:    lxvx vs0, 0, r3
+; CHECK-NEXT:    pld r3, outputVi32 at got@pcrel(0), 1
+; CHECK-NEXT:    xxinsertw vs0, vs1, 8
+; CHECK-NEXT:    stxvx vs0, 0, r3
+; CHECK-NEXT:    blr
+entry:
+  %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16
+  %vecins = insertelement <4 x i32> %0, i32 45, i32 1
+  store <4 x i32> %vecins, <4 x i32>* @outputVi32, align 16
+  ret void
+}
+
+define dso_local void @ReadWriteVi64() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWriteVi64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, inputVi64 at got@pcrel(0), 1
+; CHECK-NEXT:    lxvx vs0, 0, r3
+; CHECK-NEXT:    pld r3, outputVi64 at got@pcrel(0), 1
+; CHECK-NEXT:    stxvx vs0, 0, r3
+; CHECK-NEXT:    blr
+entry:
+  %0 = load <2 x i64>, <2 x i64>* @inputVi64, align 16
+  store <2 x i64> %0, <2 x i64>* @outputVi64, align 16
+  ret void
+}
+
+define dso_local void @ReadWriteArray() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWriteArray:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, ArrayIn at got@pcrel(0), 1
+; CHECK-NEXT:  .Lpcrel5:
+; CHECK-NEXT:    pld r4, ArrayOut at got@pcrel(0), 1
+; CHECK-NEXT:    .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
+; CHECK-NEXT:    lwz r3, 28(r3)
+; CHECK-NEXT:    addi r3, r3, 42
+; CHECK-NEXT:    stw r3, 8(r4)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 7), align 4
+  %add = add nsw i32 %0, 42
+  store i32 %add, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayOut, i64 0, i64 2), align 4
+  ret void
+}
+
+define dso_local void @ReadWriteSameArray() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWriteSameArray:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, ArrayIn at got@pcrel(0), 1
+; CHECK-NEXT:    lwz r4, 12(r3)
+; CHECK-NEXT:    addi r4, r4, 8
+; CHECK-NEXT:    stw r4, 24(r3)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 3), align 4
+  %add = add nsw i32 %0, 8
+  store i32 %add, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 6), align 4
+  ret void
+}
+
+define dso_local void @ReadWriteIntPtr() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWriteIntPtr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, IntPtrIn at got@pcrel(0), 1
+; CHECK-NEXT:  .Lpcrel6:
+; CHECK-NEXT:    pld r4, IntPtrOut at got@pcrel(0), 1
+; CHECK-NEXT:  .Lpcrel7:
+; CHECK-NEXT:    .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8)
+; CHECK-NEXT:    ld r3, 0(r3)
+; CHECK-NEXT:    .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8)
+; CHECK-NEXT:    ld r4, 0(r4)
+; CHECK-NEXT:    lwz r5, 216(r3)
+; CHECK-NEXT:    lwz r3, 48(r3)
+; CHECK-NEXT:    add r3, r3, r5
+; CHECK-NEXT:    stw r3, 136(r4)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32*, i32** @IntPtrIn, align 8
+  %arrayidx = getelementptr inbounds i32, i32* %0, i64 54
+  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 12
+  %2 = load i32, i32* %arrayidx1, align 4
+  %add = add nsw i32 %2, %1
+  %3 = load i32*, i32** @IntPtrOut, align 8
+  %arrayidx2 = getelementptr inbounds i32, i32* %3, i64 34
+  store i32 %add, i32* %arrayidx2, align 4
+  ret void
+}
+
+define dso_local void @ReadWriteFuncPtr() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWriteFuncPtr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, FuncPtrIn at got@pcrel(0), 1
+; CHECK-NEXT:  .Lpcrel8:
+; CHECK-NEXT:    pld r4, FuncPtrOut at got@pcrel(0), 1
+; CHECK-NEXT:    .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8)
+; CHECK-NEXT:    ld r3, 0(r3)
+; CHECK-NEXT:    std r3, 0(r4)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i64, i64* bitcast (void (...)** @FuncPtrIn to i64*), align 8
+  store i64 %0, i64* bitcast (void (...)** @FuncPtrOut to i64*), align 8
+  ret void
+}
+
+define dso_local void @FuncPtrCopy() local_unnamed_addr #0 {
+; CHECK-LABEL: FuncPtrCopy:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, FuncPtrOut at got@pcrel(0), 1
+; CHECK-NEXT:    pld r4, Callee at got@pcrel(0), 1
+; CHECK-NEXT:    std r4, 0(r3)
+; CHECK-NEXT:    blr
+entry:
+  store void (...)* @Callee, void (...)** @FuncPtrOut, align 8
+  ret void
+}
+
+declare void @Callee(...)
+
+define dso_local void @FuncPtrCall() local_unnamed_addr #0 {
+; CHECK-LABEL: FuncPtrCall:
+; CHECK:         .localentry FuncPtrCall, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, FuncPtrIn at got@pcrel(0), 1
+; CHECK-NEXT:  .Lpcrel9:
+; CHECK-NEXT:    .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8)
+; CHECK-NEXT:    ld r12, 0(r3)
+; CHECK-NEXT:    mtctr r12
+; CHECK-NEXT:    bctr
+; CHECK-NEXT:    #TC_RETURNr8 ctr 0
+entry:
+  %0 = load void ()*, void ()** bitcast (void (...)** @FuncPtrIn to void ()**), align 8
+  tail call void %0()
+  ret void
+}
+
+define dso_local signext i32 @ReadVecElement() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadVecElement:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, inputVi32 at got@pcrel(0), 1
+; CHECK-NEXT:  .Lpcrel10:
+; CHECK-NEXT:    .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8)
+; CHECK-NEXT:    lwa r3, 4(r3)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16
+  %vecext = extractelement <4 x i32> %0, i32 1
+  ret i32 %vecext
+}
+
+define dso_local signext i32 @VecMultiUse() local_unnamed_addr #0 {
+; CHECK-LABEL: VecMultiUse:
+; CHECK:         .localentry VecMultiUse, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -64(r1)
+; CHECK-NEXT:    pld r30, inputVi32 at got@pcrel(0), 1
+; CHECK-NEXT:    lwz r29, 4(r30)
+; CHECK-NEXT:    bl Callee at notoc
+; CHECK-NEXT:    lwz r3, 8(r30)
+; CHECK-NEXT:    add r29, r3, r29
+; CHECK-NEXT:    bl Callee at notoc
+; CHECK-NEXT:    lwz r3, 0(r30)
+; CHECK-NEXT:    add r3, r29, r3
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    addi r1, r1, 64
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+entry:
+  %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16
+  tail call void bitcast (void (...)* @Callee to void ()*)()
+  %1 = load <4 x i32>, <4 x i32>* @inputVi32, align 16
+  %2 = extractelement <4 x i32> %1, i32 2
+  %3 = extractelement <4 x i32> %0, i64 1
+  %4 = add nsw i32 %2, %3
+  tail call void bitcast (void (...)* @Callee to void ()*)()
+  %5 = load <4 x i32>, <4 x i32>* @inputVi32, align 16
+  %vecext2 = extractelement <4 x i32> %5, i32 0
+  %add3 = add nsw i32 %4, %vecext2
+  ret i32 %add3
+}
+
+define dso_local signext i32 @UseAddr(i32 signext %a) local_unnamed_addr #0 {
+; CHECK-LABEL: UseAddr:
+; CHECK:         .localentry UseAddr, 1
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    pld r4, ArrayIn at got@pcrel(0), 1
+; CHECK-NEXT:    lwz r5, 16(r4)
+; CHECK-NEXT:    add r30, r5, r3
+; CHECK-NEXT:    mr r3, r4
+; CHECK-NEXT:    bl getAddr at notoc
+; CHECK-NEXT:    add r3, r30, r3
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    addi r1, r1, 48
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 4), align 4
+  %add = add nsw i32 %0, %a
+  %call = tail call signext i32 @getAddr(i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 0))
+  %add1 = add nsw i32 %add, %call
+  ret i32 %add1
+}
+
+declare signext i32 @getAddr(i32*) local_unnamed_addr
+
+define dso_local nonnull i32* @AddrTaken32() local_unnamed_addr #0 {
+; CHECK-LABEL: AddrTaken32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, input32 at got@pcrel(0), 1
+; CHECK-NEXT:    blr
+entry:
+  ret i32* @input32
+}
+
+attributes #0 = { nounwind }

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll b/llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll
index 5c2eb0d5ec46..44d3f7a50e9b 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll
@@ -50,12 +50,15 @@ define dso_local signext i32 @getElementExtern4() local_unnamed_addr {
 ; CHECK-S-LABEL: getElementExtern4:
 ; CHECK-S:       # %bb.0: # %entry
 ; CHECK-S-NEXT:    pld r3, array1 at got@pcrel(0), 1
+; CHECK-S-NEXT:  .Lpcrel:
+; CHECK-S-NEXT:    .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8)
 ; CHECK-S-NEXT:    lwa r3, 16(r3)
 ; CHECK-S-NEXT:    blr
 ; CHECK-O-LABEL: <getElementExtern4>:
 ; CHECK-O:         pld 3, 0(0), 1
 ; CHECK-O-NEXT:      R_PPC64_GOT_PCREL34  array1
-; CHECK-O-NEXT:    lwa 3, 16(3)
+; CHECK-O-NEXT:      R_PPC64_PCREL_OPT *ABS*+0x8
+; CHECK-O:         lwa 3, 16(3)
 ; CHECK-O-NEXT:    blr
 entry:
   %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @array1, i64 0, i64 4), align 4
@@ -66,12 +69,15 @@ define dso_local signext i32 @getElementExternNegative() local_unnamed_addr {
 ; CHECK-S-LABEL: getElementExternNegative:
 ; CHECK-S:       # %bb.0: # %entry
 ; CHECK-S-NEXT:    pld r3, array1 at got@pcrel(0), 1
+; CHECK-S-NEXT:  .Lpcrel0:
+; CHECK-S-NEXT:    .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
 ; CHECK-S-NEXT:    lwa r3, -4(r3)
 ; CHECK-S-NEXT:    blr
 ; CHECK-O-LABEL: <getElementExternNegative>:
 ; CHECK-O:         pld 3, 0(0), 1
 ; CHECK-O-NEXT:      R_PPC64_GOT_PCREL34  array1
-; CHECK-O-NEXT:    lwa 3, -4(3)
+; CHECK-O-NEXT:      R_PPC64_PCREL_OPT *ABS*+0x8
+; CHECK-O:         lwa 3, -4(3)
 ; CHECK-O-NEXT:    blr
 entry:
   %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @array1, i64 0, i64 -1), align 4

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
index 51eb7a3fbbaa..56e49780c5f0 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
@@ -51,6 +51,8 @@ define dso_local void @TailCallExtrnFuncPtr() local_unnamed_addr {
 ; CHECK:         .localentry TailCallExtrnFuncPtr, 1
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    pld r3, Func at got@pcrel(0), 1
+; CHECK-NEXT:  .Lpcrel:
+; CHECK-NEXT:    .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8)
 ; CHECK-NEXT:    ld r12, 0(r3)
 ; CHECK-NEXT:    mtctr r12
 ; CHECK-NEXT:    bctr

diff  --git a/llvm/test/CodeGen/PowerPC/pcrel.ll b/llvm/test/CodeGen/PowerPC/pcrel.ll
index e9ebc6b2dd23..55783180cfac 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel.ll
@@ -41,12 +41,15 @@ define dso_local signext i32 @ReadGlobalVarInt() local_unnamed_addr  {
 ; CHECK-S-LABEL: ReadGlobalVarInt
 ; CHECK-S:       # %bb.0: # %entry
 ; CHECK-S-NEXT:    pld r3, valIntGlob at got@pcrel(0), 1
+; CHECK-S-NEXT: .Lpcrel:
+; CHECK-S-NEXT:    .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8)
 ; CHECK-S-NEXT:    lwa r3, 0(r3)
 ; CHECK-S-NEXT:    blr
 
 ; CHECK-O-LABEL: ReadGlobalVarInt
 ; CHECK-O:         pld 3, 0(0), 1
 ; CHECK-O-NEXT:    R_PPC64_GOT_PCREL34 valIntGlob
+; CHECK-O-NEXT:    R_PPC64_PCREL_OPT *ABS*+0x8
 ; CHECK-O-NEXT:    lwa 3, 0(3)
 ; CHECK-O-NEXT:    blr
 entry:


        


More information about the llvm-commits mailing list