[llvm] a60251d - [PowerPC] Add linker opt for PC Relative GOT indirect accesses
Stefan Pintilie via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 22 07:14:20 PDT 2020
Author: Stefan Pintilie
Date: 2020-07-22T09:08:23-05:00
New Revision: a60251d739b97d506bbb9ea71d5f8ac9a3c155ce
URL: https://github.com/llvm/llvm-project/commit/a60251d739b97d506bbb9ea71d5f8ac9a3c155ce
DIFF: https://github.com/llvm/llvm-project/commit/a60251d739b97d506bbb9ea71d5f8ac9a3c155ce.diff
LOG: [PowerPC] Add linker opt for PC Relative GOT indirect accesses
A linker optimization is available on PowerPC for GOT indirect PCRelative loads.
The idea is that we can mark a usual GOT indirect load:
pld 3, vec at got@pcrel(0), 1
lwa 3, 4(3)
With a relocation to say that if we don't need to go through the GOT we can let
the linker further optimize this and replace a load with a nop.
pld 3, vec at got@pcrel(0), 1
.Lpcrel1:
.reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
lwa 3, 4(3)
This patch adds the logic that allows the compiler to add the R_PPC64_PCREL_OPT.
Reviewers: nemanjai, lei, hfinkel, sfertile, efriedma, tstellar, grosbach
Reviewed By: nemanjai
Differential Revision: https://reviews.llvm.org/D79864
Added:
llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
Modified:
llvm/include/llvm/MC/MCExpr.h
llvm/lib/MC/MCExpr.cpp
llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
llvm/lib/Target/PowerPC/PPC.h
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll
llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
llvm/test/CodeGen/PowerPC/pcrel.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h
index 803c0d443bee..a424957bb0b2 100644
--- a/llvm/include/llvm/MC/MCExpr.h
+++ b/llvm/include/llvm/MC/MCExpr.h
@@ -302,6 +302,7 @@ class MCSymbolRefExpr : public MCExpr {
VK_PPC_TLSLD, // symbol at tlsld
VK_PPC_LOCAL, // symbol at local
VK_PPC_NOTOC, // symbol at notoc
+ VK_PPC_PCREL_OPT, // .reloc expr, R_PPC64_PCREL_OPT, expr
VK_COFF_IMGREL32, // symbol at imgrel (image-relative)
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index 9b78eca35caa..11fd32e48a4a 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -324,6 +324,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_PPC_TLSLD: return "tlsld";
case VK_PPC_LOCAL: return "local";
case VK_PPC_NOTOC: return "notoc";
+ case VK_PPC_PCREL_OPT: return "<<invalid>>";
case VK_COFF_IMGREL32: return "IMGREL";
case VK_Hexagon_LO16: return "LO16";
case VK_Hexagon_HI16: return "HI16";
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
index 4373778cc96c..386d59266096 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
@@ -20,6 +20,7 @@
#include "PPCELFStreamer.h"
+#include "PPCFixupKinds.h"
#include "PPCInstrInfo.h"
#include "PPCMCCodeEmitter.h"
#include "llvm/BinaryFormat/ELF.h"
@@ -89,12 +90,33 @@ void PPCELFStreamer::emitInstruction(const MCInst &Inst,
PPCMCCodeEmitter *Emitter =
static_cast<PPCMCCodeEmitter*>(getAssembler().getEmitterPtr());
+ // If the instruction is a part of the GOT to PC-Rel link time optimization
+ // instruction pair, return a value, otherwise return None. A true returned
+ // value means the instruction is the PLDpc and a false value means it is
+ // the user instruction.
+ Optional<bool> IsPartOfGOTToPCRelPair = isPartOfGOTToPCRelPair(Inst, STI);
+
+ // User of the GOT-indirect address.
+ // For example, the load that will get the relocation as follows:
+ // .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
+ // lwa 3, 4(3)
+ if (IsPartOfGOTToPCRelPair.hasValue() && !IsPartOfGOTToPCRelPair.getValue())
+ emitGOTToPCRelReloc(Inst);
+
// Special handling is only for prefixed instructions.
if (!Emitter->isPrefixedInstruction(Inst)) {
MCELFStreamer::emitInstruction(Inst, STI);
return;
}
emitPrefixedInstruction(Inst, STI);
+
+ // Producer of the GOT-indirect address.
+ // For example, the prefixed load from the got that will get the label as
+ // follows:
+ // pld 3, vec at got@pcrel(0), 1
+ // .Lpcrel1:
+ if (IsPartOfGOTToPCRelPair.hasValue() && IsPartOfGOTToPCRelPair.getValue())
+ emitGOTToPCRelLabel(Inst);
}
void PPCELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
@@ -103,6 +125,102 @@ void PPCELFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
MCELFStreamer::emitLabel(Symbol);
}
+// This linker time GOT PC Relative optimization relocation will look like this:
+// pld <reg> symbol at got@pcrel
+// <Label###>:
+// .reloc Label###-8,R_PPC64_PCREL_OPT,.-(Label###-8)
+// load <loadedreg>, 0(<reg>)
+// The reason we place the label after the PLDpc instruction is that there
+// may be an alignment nop before it since prefixed instructions must not
+// cross a 64-byte boundary (please see
+// PPCELFStreamer::emitPrefixedInstruction()). When referring to the
+// label, we subtract the width of a prefixed instruction (8 bytes) to ensure
+// we refer to the PLDpc.
+void PPCELFStreamer::emitGOTToPCRelReloc(const MCInst &Inst) {
+ // Get the last operand which contains the symbol.
+ const MCOperand &Operand = Inst.getOperand(Inst.getNumOperands() - 1);
+ assert(Operand.isExpr() && "Expecting an MCExpr.");
+ // Cast the last operand to MCSymbolRefExpr to get the symbol.
+ const MCExpr *Expr = Operand.getExpr();
+ const MCSymbolRefExpr *SymExpr = static_cast<const MCSymbolRefExpr *>(Expr);
+ assert(SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT &&
+ "Expecting a symbol of type VK_PPC_PCREL_OPT");
+ MCSymbol *LabelSym =
+ getContext().getOrCreateSymbol(SymExpr->getSymbol().getName());
+ const MCExpr *LabelExpr = MCSymbolRefExpr::create(LabelSym, getContext());
+ const MCExpr *Eight = MCConstantExpr::create(8, getContext());
+ // SubExpr is just Label###-8
+ const MCExpr *SubExpr =
+ MCBinaryExpr::createSub(LabelExpr, Eight, getContext());
+ MCSymbol *CurrentLocation = getContext().createTempSymbol();
+ const MCExpr *CurrentLocationExpr =
+ MCSymbolRefExpr::create(CurrentLocation, getContext());
+ // SubExpr2 is .-(Label###-8)
+ const MCExpr *SubExpr2 =
+ MCBinaryExpr::createSub(CurrentLocationExpr, SubExpr, getContext());
+
+ MCDataFragment *DF = static_cast<MCDataFragment *>(LabelSym->getFragment());
+ assert(DF && "Expecting a valid data fragment.");
+ MCFixupKind FixupKind = static_cast<MCFixupKind>(FirstLiteralRelocationKind +
+ ELF::R_PPC64_PCREL_OPT);
+ DF->getFixups().push_back(
+ MCFixup::create(LabelSym->getOffset() - 8, SubExpr2,
+ FixupKind, Inst.getLoc()));
+ emitLabel(CurrentLocation, Inst.getLoc());
+}
+
+// Emit the label that immediately follows the PLDpc for a link time GOT PC Rel
+// optimization.
+void PPCELFStreamer::emitGOTToPCRelLabel(const MCInst &Inst) {
+ // Get the last operand which contains the symbol.
+ const MCOperand &Operand = Inst.getOperand(Inst.getNumOperands() - 1);
+ assert(Operand.isExpr() && "Expecting an MCExpr.");
+ // Cast the last operand to MCSymbolRefExpr to get the symbol.
+ const MCExpr *Expr = Operand.getExpr();
+ const MCSymbolRefExpr *SymExpr = static_cast<const MCSymbolRefExpr *>(Expr);
+ assert(SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT &&
+ "Expecting a symbol of type VK_PPC_PCREL_OPT");
+ MCSymbol *LabelSym =
+ getContext().getOrCreateSymbol(SymExpr->getSymbol().getName());
+ emitLabel(LabelSym, Inst.getLoc());
+}
+
+// This funciton checks if the parameter Inst is part of the setup for a link
+// time GOT PC Relative optimization. For example in this situation:
+// <MCInst PLDpc <MCOperand Reg:282> <MCOperand Expr:(glob_double at got@pcrel)>
+// <MCOperand Imm:0> <MCOperand Expr:(.Lpcrel@<<invalid>>)>>
+// <MCInst SOME_LOAD <MCOperand Reg:22> <MCOperand Imm:0> <MCOperand Reg:282>
+// <MCOperand Expr:(.Lpcrel@<<invalid>>)>>
+// The above is a pair of such instructions and this function will not return
+// None for either one of them. In both cases we are looking for the last
+// operand <MCOperand Expr:(.Lpcrel@<<invalid>>)> which needs to be an MCExpr
+// and has the flag MCSymbolRefExpr::VK_PPC_PCREL_OPT. After that we just look
+// at the opcode and in the case of PLDpc we will return true. For the load
+// (or store) this function will return false indicating it has found the second
+// instruciton in the pair.
+Optional<bool> llvm::isPartOfGOTToPCRelPair(const MCInst &Inst,
+ const MCSubtargetInfo &STI) {
+ // Need at least two operands.
+ if (Inst.getNumOperands() < 2)
+ return None;
+
+ unsigned LastOp = Inst.getNumOperands() - 1;
+ // The last operand needs to be an MCExpr and it needs to have a variant kind
+ // of VK_PPC_PCREL_OPT. If it does not satisfy these conditions it is not a
+ // link time GOT PC Rel opt instruction and we can ignore it and return None.
+ const MCOperand &Operand = Inst.getOperand(LastOp);
+ if (!Operand.isExpr())
+ return None;
+
+ // Check for the variant kind VK_PPC_PCREL_OPT in this expression.
+ const MCExpr *Expr = Operand.getExpr();
+ const MCSymbolRefExpr *SymExpr = static_cast<const MCSymbolRefExpr *>(Expr);
+ if (!SymExpr || SymExpr->getKind() != MCSymbolRefExpr::VK_PPC_PCREL_OPT)
+ return None;
+
+ return (Inst.getOpcode() == PPC::PLDpc);
+}
+
MCELFStreamer *llvm::createPPCELFStreamer(
MCContext &Context, std::unique_ptr<MCAsmBackend> MAB,
std::unique_ptr<MCObjectWriter> OW,
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
index 51863232d071..f44200104f32 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
@@ -43,8 +43,15 @@ class PPCELFStreamer : public MCELFStreamer {
void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
private:
void emitPrefixedInstruction(const MCInst &Inst, const MCSubtargetInfo &STI);
+ void emitGOTToPCRelReloc(const MCInst &Inst);
+ void emitGOTToPCRelLabel(const MCInst &Inst);
};
+// Check if the instruction Inst is part of a pair of instructions that make up
+// a link time GOT PC Rel optimization.
+Optional<bool> isPartOfGOTToPCRelPair(const MCInst &Inst,
+ const MCSubtargetInfo &STI);
+
MCELFStreamer *createPPCELFStreamer(MCContext &Context,
std::unique_ptr<MCAsmBackend> MAB,
std::unique_ptr<MCObjectWriter> OW,
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
index 16da62a74b8c..222bf2fa8283 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@@ -92,6 +92,36 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
return;
}
+ // Check if the last operand is an expression with the variant kind
+ // VK_PPC_PCREL_OPT. If this is the case then this is a linker optimization
+ // relocation and the .reloc directive needs to be added.
+ unsigned LastOp = MI->getNumOperands() - 1;
+ if (MI->getNumOperands() > 1) {
+ const MCOperand &Operand = MI->getOperand(LastOp);
+ if (Operand.isExpr()) {
+ const MCExpr *Expr = Operand.getExpr();
+ const MCSymbolRefExpr *SymExpr =
+ static_cast<const MCSymbolRefExpr *>(Expr);
+
+ if (SymExpr && SymExpr->getKind() == MCSymbolRefExpr::VK_PPC_PCREL_OPT) {
+ const MCSymbol &Symbol = SymExpr->getSymbol();
+ if (MI->getOpcode() == PPC::PLDpc) {
+ printInstruction(MI, Address, O);
+ O << "\n";
+ Symbol.print(O, &MAI);
+ O << ":";
+ return;
+ } else {
+ O << "\t.reloc ";
+ Symbol.print(O, &MAI);
+ O << "-8,R_PPC64_PCREL_OPT,.-(";
+ Symbol.print(O, &MAI);
+ O << "-8)\n";
+ }
+ }
+ }
+ }
+
// Check for slwi/srwi mnemonics.
if (MI->getOpcode() == PPC::RLWINM) {
unsigned char SH = MI->getOperand(2).getImm();
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index 7e0aa2c6061d..3106290442af 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -107,6 +107,10 @@ namespace llvm {
/// produce the relocation @got at pcrel. Fixup is VK_PPC_GOT_PCREL.
MO_GOT_FLAG = 8,
+ // MO_PCREL_OPT_FLAG - If this bit is set the operand is part of a
+ // PC Relative linker optimization.
+ MO_PCREL_OPT_FLAG = 16,
+
/// The next are not flags but distinct values.
MO_ACCESS_MASK = 0xf00,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 6b822a803132..99e25bb130ce 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2283,7 +2283,8 @@ PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
{MO_PLT, "ppc-plt"},
{MO_PIC_FLAG, "ppc-pic"},
{MO_PCREL_FLAG, "ppc-pcrel"},
- {MO_GOT_FLAG, "ppc-got"}};
+ {MO_GOT_FLAG, "ppc-got"},
+ {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"}};
return makeArrayRef(TargetFlags);
}
diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 236f98f32e18..1f51a55b1683 100644
--- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -100,6 +100,8 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
MIOpcode == PPC::BL8_NOTOC) {
RefKind = MCSymbolRefExpr::VK_PPC_NOTOC;
}
+ if (MO.getTargetFlags() == PPCII::MO_PCREL_OPT_FLAG)
+ RefKind = MCSymbolRefExpr::VK_PPC_PCREL_OPT;
}
const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx);
diff --git a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
index 4ea714ff15f7..65539166e5ef 100644
--- a/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@@ -21,8 +21,8 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -43,6 +43,46 @@ RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
cl::desc("Run pre-emit peephole optimizations."));
namespace {
+
+static bool hasPCRelativeForm(MachineInstr &Use) {
+ switch (Use.getOpcode()) {
+ default:
+ return false;
+ case PPC::LBZ:
+ case PPC::LBZ8:
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHZ:
+ case PPC::LHZ8:
+ case PPC::LWZ:
+ case PPC::LWZ8:
+ case PPC::STB:
+ case PPC::STB8:
+ case PPC::STH:
+ case PPC::STH8:
+ case PPC::STW:
+ case PPC::STW8:
+ case PPC::LD:
+ case PPC::STD:
+ case PPC::LWA:
+ case PPC::LXSD:
+ case PPC::LXSSP:
+ case PPC::LXV:
+ case PPC::STXSD:
+ case PPC::STXSSP:
+ case PPC::STXV:
+ case PPC::LFD:
+ case PPC::LFS:
+ case PPC::STFD:
+ case PPC::STFS:
+ case PPC::DFLOADf32:
+ case PPC::DFLOADf64:
+ case PPC::DFSTOREf32:
+ case PPC::DFSTOREf64:
+ return true;
+ }
+}
+
class PPCPreEmitPeephole : public MachineFunctionPass {
public:
static char ID;
@@ -172,6 +212,135 @@ namespace {
return !InstrsToErase.empty();
}
+ // Check if this instruction is a PLDpc that is part of a GOT indirect
+ // access.
+ bool isGOTPLDpc(MachineInstr &Instr) {
+ if (Instr.getOpcode() != PPC::PLDpc)
+ return false;
+
+ // The result must be a register.
+ const MachineOperand &LoadedAddressReg = Instr.getOperand(0);
+ if (!LoadedAddressReg.isReg())
+ return false;
+
+ // Make sure that this is a global symbol.
+ const MachineOperand &SymbolOp = Instr.getOperand(1);
+ if (!SymbolOp.isGlobal())
+ return false;
+
+ // Finally return true only if the GOT flag is present.
+ return (SymbolOp.getTargetFlags() & PPCII::MO_GOT_FLAG);
+ }
+
+ bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) {
+ MachineFunction *MF = MBB.getParent();
+ // Add this linker opt only if we are using PC Relative memops.
+ if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls())
+ return false;
+
+ // Struct to keep track of one def/use pair for a GOT indirect access.
+ struct GOTDefUsePair {
+ MachineBasicBlock::iterator DefInst;
+ MachineBasicBlock::iterator UseInst;
+ Register DefReg;
+ Register UseReg;
+ bool StillValid;
+ };
+ // Vector of def/ues pairs in this basic block.
+ SmallVector<GOTDefUsePair, 4> CandPairs;
+ SmallVector<GOTDefUsePair, 4> ValidPairs;
+ bool MadeChange = false;
+
+ // Run through all of the instructions in the basic block and try to
+ // collect potential pairs of GOT indirect access instructions.
+ for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
+ // Look for the initial GOT indirect load.
+ if (isGOTPLDpc(*BBI)) {
+ GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(),
+ BBI->getOperand(0).getReg(),
+ PPC::NoRegister, true};
+ CandPairs.push_back(CurrentPair);
+ continue;
+ }
+
+ // We haven't encountered any new PLD instructions, nothing to check.
+ if (CandPairs.empty())
+ continue;
+
+ // Run through the candidate pairs and see if any of the registers
+ // defined in the PLD instructions are used by this instruction.
+ // Note: the size of CandPairs can change in the loop.
+ for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) {
+ GOTDefUsePair &Pair = CandPairs[Idx];
+ // The instruction does not use or modify this PLD's def reg,
+ // ignore it.
+ if (!BBI->readsRegister(Pair.DefReg, TRI) &&
+ !BBI->modifiesRegister(Pair.DefReg, TRI))
+ continue;
+
+ // The use needs to be used in the address compuation and not
+ // as the register being stored for a store.
+ const MachineOperand *UseOp =
+ hasPCRelativeForm(*BBI) ? &BBI->getOperand(2) : nullptr;
+
+ // Check for a valid use.
+ if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg &&
+ UseOp->isUse() && UseOp->isKill()) {
+ Pair.UseInst = BBI;
+ Pair.UseReg = BBI->getOperand(0).getReg();
+ ValidPairs.push_back(Pair);
+ }
+ CandPairs.erase(CandPairs.begin() + Idx);
+ }
+ }
+
+ // Go through all of the pairs and check for any more valid uses.
+ for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) {
+ // We shouldn't be here if we don't have a valid pair.
+ assert(Pair->UseInst.isValid() && Pair->StillValid &&
+ "Kept an invalid def/use pair for GOT PCRel opt");
+ // We have found a potential pair. Search through the instructions
+ // between the def and the use to see if it is valid to mark this as a
+ // linker opt.
+ MachineBasicBlock::iterator BBI = Pair->DefInst;
+ ++BBI;
+ for (; BBI != Pair->UseInst; ++BBI) {
+ if (BBI->readsRegister(Pair->UseReg, TRI) ||
+ BBI->modifiesRegister(Pair->UseReg, TRI)) {
+ Pair->StillValid = false;
+ break;
+ }
+ }
+
+ if (!Pair->StillValid)
+ continue;
+
+ // The load/store instruction that uses the address from the PLD will
+ // either use a register (for a store) or define a register (for the
+ // load). That register will be added as an implicit def to the PLD
+ // and as an implicit use on the second memory op. This is a precaution
+ // to prevent future passes from using that register between the two
+ // instructions.
+ MachineOperand ImplDef =
+ MachineOperand::CreateReg(Pair->UseReg, true, true);
+ MachineOperand ImplUse =
+ MachineOperand::CreateReg(Pair->UseReg, false, true);
+ Pair->DefInst->addOperand(ImplDef);
+ Pair->UseInst->addOperand(ImplUse);
+
+ // Create the symbol.
+ MCContext &Context = MF->getContext();
+ MCSymbol *Symbol =
+ Context.createTempSymbol(Twine("pcrel"), false, false);
+ MachineOperand PCRelLabel =
+ MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG);
+ Pair->DefInst->addOperand(*MF, PCRelLabel);
+ Pair->UseInst->addOperand(*MF, PCRelLabel);
+ MadeChange |= true;
+ }
+ return MadeChange;
+ }
+
bool runOnMachineFunction(MachineFunction &MF) override {
if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
// Remove UNENCODED_NOP even when this pass is disabled.
@@ -192,6 +361,7 @@ namespace {
SmallVector<MachineInstr *, 4> InstrsToErase;
for (MachineBasicBlock &MBB : MF) {
Changed |= removeRedundantLIs(MBB, TRI);
+ Changed |= addLinkerOpt(MBB, TRI);
for (MachineInstr &MI : MBB) {
unsigned Opc = MI.getOpcode();
if (Opc == PPC::UNENCODED_NOP) {
diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
index 1bad2e726341..0a4f2f38c816 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
@@ -67,6 +67,8 @@ define dso_local signext i32 @DirectCallLocal2(i32 signext %a, i32 signext %b) l
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: bl localCall at notoc
; CHECK-S-NEXT: pld r4, externGlobalVar at got@pcrel(0), 1
+; CHECK-S-NEXT: .Lpcrel:
+; CHECK-S-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8)
; CHECK-S-NEXT: lwz r4, 0(r4)
; CHECK-S-NEXT: mullw r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
@@ -150,6 +152,8 @@ define dso_local signext i32 @DirectCallExtern2(i32 signext %a, i32 signext %b)
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: bl externCall at notoc
; CHECK-S-NEXT: pld r4, externGlobalVar at got@pcrel(0), 1
+; CHECK-S-NEXT: .Lpcrel0:
+; CHECK-S-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
; CHECK-S-NEXT: lwz r4, 0(r4)
; CHECK-S-NEXT: mullw r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
@@ -212,6 +216,8 @@ define dso_local signext i32 @TailCallLocal2(i32 signext %a) local_unnamed_addr
; CHECK-S: .localentry TailCallLocal2
; CHECK-S: # %bb.0: # %entry
; CHECK-S: pld r4, externGlobalVar at got@pcrel(0), 1
+; CHECK-S-NEXT: .Lpcrel1:
+; CHECK-S-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
; CHECK-S-NEXT: lwz r4, 0(r4)
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
@@ -254,6 +260,8 @@ define dso_local signext i32 @TailCallExtern2(i32 signext %a) local_unnamed_addr
; CHECK-S: .localentry TailCallExtern2
; CHECK-S: # %bb.0: # %entry
; CHECK-S: pld r4, externGlobalVar at got@pcrel(0), 1
+; CHECK-S-NEXT: .Lpcrel2:
+; CHECK-S-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8)
; CHECK-S-NEXT: lwz r4, 0(r4)
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
@@ -319,6 +327,8 @@ define dso_local signext i32 @IndirectCall2(i32 signext %a, i32 signext %b) loca
; CHECK-S-NEXT: mtctr r12
; CHECK-S-NEXT: bctrl
; CHECK-S-NEXT: pld r4, externGlobalVar at got@pcrel(0), 1
+; CHECK-S-NEXT: .Lpcrel3:
+; CHECK-S-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8)
; CHECK-S-NEXT: lwz r4, 0(r4)
; CHECK-S-NEXT: mullw r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
diff --git a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
index 625d91d2eb5b..4d61b66d3bb7 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
@@ -23,12 +23,16 @@ define dso_local signext i32 @ReadGlobalVarChar() local_unnamed_addr {
; LE-LABEL: ReadGlobalVarChar:
; LE: # %bb.0: # %entry
; LE-NEXT: pld r3, valChar at got@pcrel(0), 1
+; LE-NEXT: .Lpcrel:
+; LE-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8)
; LE-NEXT: lbz r3, 0(r3)
; LE-NEXT: blr
;
; BE-LABEL: ReadGlobalVarChar:
; BE: # %bb.0: # %entry
; BE-NEXT: pld r3, valChar at got@pcrel(0), 1
+; BE-NEXT: .Lpcrel:
+; BE-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8)
; BE-NEXT: lbz r3, 0(r3)
; BE-NEXT: blr
entry:
@@ -60,12 +64,16 @@ define dso_local signext i32 @ReadGlobalVarShort() local_unnamed_addr {
; LE-LABEL: ReadGlobalVarShort:
; LE: # %bb.0: # %entry
; LE-NEXT: pld r3, valShort at got@pcrel(0), 1
+; LE-NEXT: .Lpcrel0:
+; LE-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
; LE-NEXT: lha r3, 0(r3)
; LE-NEXT: blr
;
; BE-LABEL: ReadGlobalVarShort:
; BE: # %bb.0: # %entry
; BE-NEXT: pld r3, valShort at got@pcrel(0), 1
+; BE-NEXT: .Lpcrel0:
+; BE-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
; BE-NEXT: lha r3, 0(r3)
; BE-NEXT: blr
entry:
@@ -97,12 +105,16 @@ define dso_local signext i32 @ReadGlobalVarInt() local_unnamed_addr {
; LE-LABEL: ReadGlobalVarInt:
; LE: # %bb.0: # %entry
; LE-NEXT: pld r3, valInt at got@pcrel(0), 1
+; LE-NEXT: .Lpcrel1:
+; LE-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
; LE-NEXT: lwa r3, 0(r3)
; LE-NEXT: blr
;
; BE-LABEL: ReadGlobalVarInt:
; BE: # %bb.0: # %entry
; BE-NEXT: pld r3, valInt at got@pcrel(0), 1
+; BE-NEXT: .Lpcrel1:
+; BE-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
; BE-NEXT: lwa r3, 0(r3)
; BE-NEXT: blr
entry:
@@ -133,12 +145,16 @@ define dso_local signext i32 @ReadGlobalVarUnsigned() local_unnamed_addr {
; LE-LABEL: ReadGlobalVarUnsigned:
; LE: # %bb.0: # %entry
; LE-NEXT: pld r3, valUnsigned at got@pcrel(0), 1
+; LE-NEXT: .Lpcrel2:
+; LE-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8)
; LE-NEXT: lwa r3, 0(r3)
; LE-NEXT: blr
;
; BE-LABEL: ReadGlobalVarUnsigned:
; BE: # %bb.0: # %entry
; BE-NEXT: pld r3, valUnsigned at got@pcrel(0), 1
+; BE-NEXT: .Lpcrel2:
+; BE-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8)
; BE-NEXT: lwa r3, 0(r3)
; BE-NEXT: blr
entry:
@@ -169,12 +185,16 @@ define dso_local signext i32 @ReadGlobalVarLong() local_unnamed_addr {
; LE-LABEL: ReadGlobalVarLong:
; LE: # %bb.0: # %entry
; LE-NEXT: pld r3, valLong at got@pcrel(0), 1
+; LE-NEXT: .Lpcrel3:
+; LE-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8)
; LE-NEXT: lwa r3, 0(r3)
; LE-NEXT: blr
;
; BE-LABEL: ReadGlobalVarLong:
; BE: # %bb.0: # %entry
; BE-NEXT: pld r3, valLong at got@pcrel(0), 1
+; BE-NEXT: .Lpcrel3:
+; BE-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8)
; BE-NEXT: lwa r3, 4(r3)
; BE-NEXT: blr
entry:
@@ -206,12 +226,16 @@ define dso_local i32* @ReadGlobalPtr() local_unnamed_addr {
; LE-LABEL: ReadGlobalPtr:
; LE: # %bb.0: # %entry
; LE-NEXT: pld r3, ptr at got@pcrel(0), 1
+; LE-NEXT: .Lpcrel4:
+; LE-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8)
; LE-NEXT: ld r3, 0(r3)
; LE-NEXT: blr
;
; BE-LABEL: ReadGlobalPtr:
; BE: # %bb.0: # %entry
; BE-NEXT: pld r3, ptr at got@pcrel(0), 1
+; BE-NEXT: .Lpcrel4:
+; BE-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8)
; BE-NEXT: ld r3, 0(r3)
; BE-NEXT: blr
entry:
@@ -223,7 +247,9 @@ define dso_local void @WriteGlobalPtr() local_unnamed_addr {
; LE-LABEL: WriteGlobalPtr:
; LE: # %bb.0: # %entry
; LE-NEXT: pld r3, ptr at got@pcrel(0), 1
+; LE-NEXT: .Lpcrel5:
; LE-NEXT: li r4, 3
+; LE-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
; LE-NEXT: ld r3, 0(r3)
; LE-NEXT: stw r4, 0(r3)
; LE-NEXT: blr
@@ -231,7 +257,9 @@ define dso_local void @WriteGlobalPtr() local_unnamed_addr {
; BE-LABEL: WriteGlobalPtr:
; BE: # %bb.0: # %entry
; BE-NEXT: pld r3, ptr at got@pcrel(0), 1
+; BE-NEXT: .Lpcrel5:
; BE-NEXT: li r4, 3
+; BE-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
; BE-NEXT: ld r3, 0(r3)
; BE-NEXT: stw r4, 0(r3)
; BE-NEXT: blr
@@ -259,12 +287,16 @@ define dso_local signext i32 @ReadGlobalArray() local_unnamed_addr {
; LE-LABEL: ReadGlobalArray:
; LE: # %bb.0: # %entry
; LE-NEXT: pld r3, array at got@pcrel(0), 1
+; LE-NEXT: .Lpcrel6:
+; LE-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8)
; LE-NEXT: lwa r3, 12(r3)
; LE-NEXT: blr
;
; BE-LABEL: ReadGlobalArray:
; BE: # %bb.0: # %entry
; BE-NEXT: pld r3, array at got@pcrel(0), 1
+; BE-NEXT: .Lpcrel6:
+; BE-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8)
; BE-NEXT: lwa r3, 12(r3)
; BE-NEXT: blr
entry:
@@ -295,12 +327,16 @@ define dso_local signext i32 @ReadGlobalStruct() local_unnamed_addr {
; LE-LABEL: ReadGlobalStruct:
; LE: # %bb.0: # %entry
; LE-NEXT: pld r3, structure at got@pcrel(0), 1
+; LE-NEXT: .Lpcrel7:
+; LE-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8)
; LE-NEXT: lwa r3, 4(r3)
; LE-NEXT: blr
;
; BE-LABEL: ReadGlobalStruct:
; BE: # %bb.0: # %entry
; BE-NEXT: pld r3, structure at got@pcrel(0), 1
+; BE-NEXT: .Lpcrel7:
+; BE-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8)
; BE-NEXT: lwa r3, 4(r3)
; BE-NEXT: blr
entry:
@@ -332,6 +368,8 @@ define dso_local void @ReadFuncPtr() local_unnamed_addr {
; LE: .localentry ReadFuncPtr, 1
; LE-NEXT: # %bb.0: # %entry
; LE-NEXT: pld r3, ptrfunc at got@pcrel(0), 1
+; LE-NEXT: .Lpcrel8:
+; LE-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8)
; LE-NEXT: ld r12, 0(r3)
; LE-NEXT: mtctr r12
; LE-NEXT: bctr
@@ -341,6 +379,8 @@ define dso_local void @ReadFuncPtr() local_unnamed_addr {
; BE: .localentry ReadFuncPtr, 1
; BE-NEXT: # %bb.0: # %entry
; BE-NEXT: pld r3, ptrfunc at got@pcrel(0), 1
+; BE-NEXT: .Lpcrel8:
+; BE-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8)
; BE-NEXT: ld r12, 0(r3)
; BE-NEXT: mtctr r12
; BE-NEXT: bctr
diff --git a/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll b/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
new file mode 100644
index 000000000000..e878e7439911
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
@@ -0,0 +1,395 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; RUN: < %s | FileCheck %s
+
+; On future CPU with PC Relative addressing enabled, it is possible for the
+; linker to optimize GOT indirect accesses. In order for the linker to do this
+; the compiler needs to add a hint using the R_PPC64_PCREL_OPT relocation.
+; This test checks that the compiler adds the R_PPC64_PCREL_OPT relocation
+; correctly.
+
+ at input8 = external local_unnamed_addr global i8, align 1
+ at output8 = external local_unnamed_addr global i8, align 1
+ at input16 = external local_unnamed_addr global i16, align 2
+ at output16 = external local_unnamed_addr global i16, align 2
+ at input32 = external global i32, align 4
+ at output32 = external local_unnamed_addr global i32, align 4
+ at input64 = external local_unnamed_addr global i64, align 8
+ at output64 = external local_unnamed_addr global i64, align 8
+ at input128 = external local_unnamed_addr global i128, align 16
+ at output128 = external local_unnamed_addr global i128, align 16
+ at inputf32 = external local_unnamed_addr global float, align 4
+ at outputf32 = external local_unnamed_addr global float, align 4
+ at inputf64 = external local_unnamed_addr global double, align 8
+ at outputf64 = external local_unnamed_addr global double, align 8
+ at inputVi32 = external local_unnamed_addr global <4 x i32>, align 16
+ at outputVi32 = external local_unnamed_addr global <4 x i32>, align 16
+ at inputVi64 = external local_unnamed_addr global <2 x i64>, align 16
+ at outputVi64 = external local_unnamed_addr global <2 x i64>, align 16
+ at ArrayIn = external global [10 x i32], align 4
+ at ArrayOut = external local_unnamed_addr global [10 x i32], align 4
+ at IntPtrIn = external local_unnamed_addr global i32*, align 8
+ at IntPtrOut = external local_unnamed_addr global i32*, align 8
+ at FuncPtrIn = external local_unnamed_addr global void (...)*, align 8
+ at FuncPtrOut = external local_unnamed_addr global void (...)*, align 8
+
+define dso_local void @ReadWrite8() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWrite8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pld r3, input8 at got@pcrel(0), 1
+; CHECK-NEXT: .Lpcrel:
+; CHECK-NEXT: pld r4, output8 at got@pcrel(0), 1
+; CHECK-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8)
+; CHECK-NEXT: lbz r3, 0(r3)
+; In this test the stb r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lbz r3, 0(r3)
+; which is defined between the pld and the stb.
+; CHECK-NEXT: stb r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %0 = load i8, i8* @input8, align 1
+ store i8 %0, i8* @output8, align 1
+ ret void
+}
+
+define dso_local void @ReadWrite16() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWrite16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pld r3, input16 at got@pcrel(0), 1
+; CHECK-NEXT: .Lpcrel0:
+; CHECK-NEXT: pld r4, output16 at got@pcrel(0), 1
+; CHECK-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
+; CHECK-NEXT: lhz r3, 0(r3)
+; In this test the sth r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lhz r3, 0(r3)
+; which is defined between the pld and the sth.
+; CHECK-NEXT: sth r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %0 = load i16, i16* @input16, align 2
+ store i16 %0, i16* @output16, align 2
+ ret void
+}
+
+define dso_local void @ReadWrite32() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWrite32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pld r3, input32 at got@pcrel(0), 1
+; CHECK-NEXT: .Lpcrel1:
+; CHECK-NEXT: pld r4, output32 at got@pcrel(0), 1
+; CHECK-NEXT: .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
+; CHECK-NEXT: lwz r3, 0(r3)
+; CHECK-NEXT: stw r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %0 = load i32, i32* @input32, align 4
+ store i32 %0, i32* @output32, align 4
+ ret void
+}
+
+define dso_local void @ReadWrite64() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWrite64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pld r3, input64 at got@pcrel(0), 1
+; CHECK-NEXT: .Lpcrel2:
+; CHECK-NEXT: pld r4, output64 at got@pcrel(0), 1
+; CHECK-NEXT: .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8)
+; CHECK-NEXT: ld r3, 0(r3)
+; CHECK-NEXT: std r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %0 = load i64, i64* @input64, align 8
+ store i64 %0, i64* @output64, align 8
+ ret void
+}
+
+; FIXME: we should always convert X-Form instructions that use
+; PPC::ZERO[8] to the corresponding D-Form so we can perform this opt.
+define dso_local void @ReadWrite128() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWrite128:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pld r3, input128 at got@pcrel(0), 1
+; CHECK-NEXT: lxvx vs0, 0, r3
+; CHECK-NEXT: pld r3, output128 at got@pcrel(0), 1
+; CHECK-NEXT: stxvx vs0, 0, r3
+; CHECK-NEXT: blr
+entry:
+ %0 = load i128, i128* @input128, align 16
+ store i128 %0, i128* @output128, align 16
+ ret void
+}
+
+define dso_local void @ReadWritef32() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWritef32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pld r3, inputf32 at got@pcrel(0), 1
+; CHECK-NEXT: .Lpcrel3:
+; CHECK-NEXT: xxspltidp vs1, 1078103900
+; CHECK-NEXT: .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8)
+; CHECK-NEXT: lfs f0, 0(r3)
+; CHECK-NEXT: pld r3, outputf32 at got@pcrel(0), 1
+; CHECK-NEXT: xsaddsp f0, f0, f1
+; CHECK-NEXT: stfs f0, 0(r3)
+; CHECK-NEXT: blr
+entry:
+ %0 = load float, float* @inputf32, align 4
+ %add = fadd float %0, 0x400851EB80000000
+ store float %add, float* @outputf32, align 4
+ ret void
+}
+
+define dso_local void @ReadWritef64() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWritef64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pld r3, inputf64 at got@pcrel(0), 1
+; CHECK-NEXT: .Lpcrel4:
+; CHECK-NEXT: plfd f1, .LCPI6_0 at PCREL(0), 1
+; CHECK-NEXT: .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8)
+; CHECK-NEXT: lfd f0, 0(r3)
+; CHECK-NEXT: pld r3, outputf64 at got@pcrel(0), 1
+; CHECK-NEXT: xsadddp f0, f0, f1
+; CHECK-NEXT: stfd f0, 0(r3)
+; CHECK-NEXT: blr
+entry:
+ %0 = load double, double* @inputf64, align 8
+ %add = fadd double %0, 6.800000e+00
+ store double %add, double* @outputf64, align 8
+ ret void
+}
+
+; FIXME: we should always convert X-Form instructions that use
+; PPC::ZERO[8] to the corresponding D-Form so we can perform this opt.
+define dso_local void @ReadWriteVi32() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWriteVi32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pld r3, inputVi32 at got@pcrel(0), 1
+; CHECK-NEXT: li r4, 45
+; CHECK-NEXT: mtfprwz f1, r4
+; CHECK-NEXT: lxvx vs0, 0, r3
+; CHECK-NEXT: pld r3, outputVi32 at got@pcrel(0), 1
+; CHECK-NEXT: xxinsertw vs0, vs1, 8
+; CHECK-NEXT: stxvx vs0, 0, r3
+; CHECK-NEXT: blr
+entry:
+ %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16
+ %vecins = insertelement <4 x i32> %0, i32 45, i32 1
+ store <4 x i32> %vecins, <4 x i32>* @outputVi32, align 16
+ ret void
+}
+
+define dso_local void @ReadWriteVi64() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWriteVi64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pld r3, inputVi64 at got@pcrel(0), 1
+; CHECK-NEXT: lxvx vs0, 0, r3
+; CHECK-NEXT: pld r3, outputVi64 at got@pcrel(0), 1
+; CHECK-NEXT: stxvx vs0, 0, r3
+; CHECK-NEXT: blr
+entry:
+ %0 = load <2 x i64>, <2 x i64>* @inputVi64, align 16
+ store <2 x i64> %0, <2 x i64>* @outputVi64, align 16
+ ret void
+}
+
+define dso_local void @ReadWriteArray() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWriteArray:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pld r3, ArrayIn at got@pcrel(0), 1
+; CHECK-NEXT: .Lpcrel5:
+; CHECK-NEXT: pld r4, ArrayOut at got@pcrel(0), 1
+; CHECK-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
+; CHECK-NEXT: lwz r3, 28(r3)
+; CHECK-NEXT: addi r3, r3, 42
+; CHECK-NEXT: stw r3, 8(r4)
+; CHECK-NEXT: blr
+entry:
+ %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 7), align 4
+ %add = add nsw i32 %0, 42
+ store i32 %add, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayOut, i64 0, i64 2), align 4
+ ret void
+}
+
+define dso_local void @ReadWriteSameArray() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWriteSameArray:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pld r3, ArrayIn at got@pcrel(0), 1
+; CHECK-NEXT: lwz r4, 12(r3)
+; CHECK-NEXT: addi r4, r4, 8
+; CHECK-NEXT: stw r4, 24(r3)
+; CHECK-NEXT: blr
+entry:
+ %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 3), align 4
+ %add = add nsw i32 %0, 8
+ store i32 %add, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 6), align 4
+ ret void
+}
+
+define dso_local void @ReadWriteIntPtr() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWriteIntPtr:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pld r3, IntPtrIn at got@pcrel(0), 1
+; CHECK-NEXT: .Lpcrel6:
+; CHECK-NEXT: pld r4, IntPtrOut at got@pcrel(0), 1
+; CHECK-NEXT: .Lpcrel7:
+; CHECK-NEXT: .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8)
+; CHECK-NEXT: ld r3, 0(r3)
+; CHECK-NEXT: .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8)
+; CHECK-NEXT: ld r4, 0(r4)
+; CHECK-NEXT: lwz r5, 216(r3)
+; CHECK-NEXT: lwz r3, 48(r3)
+; CHECK-NEXT: add r3, r3, r5
+; CHECK-NEXT: stw r3, 136(r4)
+; CHECK-NEXT: blr
+entry:
+ %0 = load i32*, i32** @IntPtrIn, align 8
+ %arrayidx = getelementptr inbounds i32, i32* %0, i64 54
+ %1 = load i32, i32* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 12
+ %2 = load i32, i32* %arrayidx1, align 4
+ %add = add nsw i32 %2, %1
+ %3 = load i32*, i32** @IntPtrOut, align 8
+ %arrayidx2 = getelementptr inbounds i32, i32* %3, i64 34
+ store i32 %add, i32* %arrayidx2, align 4
+ ret void
+}
+
+define dso_local void @ReadWriteFuncPtr() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadWriteFuncPtr:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pld r3, FuncPtrIn at got@pcrel(0), 1
+; CHECK-NEXT: .Lpcrel8:
+; CHECK-NEXT: pld r4, FuncPtrOut at got@pcrel(0), 1
+; CHECK-NEXT: .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8)
+; CHECK-NEXT: ld r3, 0(r3)
+; CHECK-NEXT: std r3, 0(r4)
+; CHECK-NEXT: blr
+entry:
+ %0 = load i64, i64* bitcast (void (...)** @FuncPtrIn to i64*), align 8
+ store i64 %0, i64* bitcast (void (...)** @FuncPtrOut to i64*), align 8
+ ret void
+}
+
+define dso_local void @FuncPtrCopy() local_unnamed_addr #0 {
+; CHECK-LABEL: FuncPtrCopy:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pld r3, FuncPtrOut at got@pcrel(0), 1
+; CHECK-NEXT: pld r4, Callee at got@pcrel(0), 1
+; CHECK-NEXT: std r4, 0(r3)
+; CHECK-NEXT: blr
+entry:
+ store void (...)* @Callee, void (...)** @FuncPtrOut, align 8
+ ret void
+}
+
+declare void @Callee(...)
+
+define dso_local void @FuncPtrCall() local_unnamed_addr #0 {
+; CHECK-LABEL: FuncPtrCall:
+; CHECK: .localentry FuncPtrCall, 1
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: pld r3, FuncPtrIn at got@pcrel(0), 1
+; CHECK-NEXT: .Lpcrel9:
+; CHECK-NEXT: .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8)
+; CHECK-NEXT: ld r12, 0(r3)
+; CHECK-NEXT: mtctr r12
+; CHECK-NEXT: bctr
+; CHECK-NEXT: #TC_RETURNr8 ctr 0
+entry:
+ %0 = load void ()*, void ()** bitcast (void (...)** @FuncPtrIn to void ()**), align 8
+ tail call void %0()
+ ret void
+}
+
+define dso_local signext i32 @ReadVecElement() local_unnamed_addr #0 {
+; CHECK-LABEL: ReadVecElement:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pld r3, inputVi32 at got@pcrel(0), 1
+; CHECK-NEXT: .Lpcrel10:
+; CHECK-NEXT: .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8)
+; CHECK-NEXT: lwa r3, 4(r3)
+; CHECK-NEXT: blr
+entry:
+ %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16
+ %vecext = extractelement <4 x i32> %0, i32 1
+ ret i32 %vecext
+}
+
+define dso_local signext i32 @VecMultiUse() local_unnamed_addr #0 {
+; CHECK-LABEL: VecMultiUse:
+; CHECK: .localentry VecMultiUse, 1
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r0, 16(r1)
+; CHECK-NEXT: stdu r1, -64(r1)
+; CHECK-NEXT: pld r30, inputVi32 at got@pcrel(0), 1
+; CHECK-NEXT: lwz r29, 4(r30)
+; CHECK-NEXT: bl Callee at notoc
+; CHECK-NEXT: lwz r3, 8(r30)
+; CHECK-NEXT: add r29, r3, r29
+; CHECK-NEXT: bl Callee at notoc
+; CHECK-NEXT: lwz r3, 0(r30)
+; CHECK-NEXT: add r3, r29, r3
+; CHECK-NEXT: extsw r3, r3
+; CHECK-NEXT: addi r1, r1, 64
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+entry:
+ %0 = load <4 x i32>, <4 x i32>* @inputVi32, align 16
+ tail call void bitcast (void (...)* @Callee to void ()*)()
+ %1 = load <4 x i32>, <4 x i32>* @inputVi32, align 16
+ %2 = extractelement <4 x i32> %1, i32 2
+ %3 = extractelement <4 x i32> %0, i64 1
+ %4 = add nsw i32 %2, %3
+ tail call void bitcast (void (...)* @Callee to void ()*)()
+ %5 = load <4 x i32>, <4 x i32>* @inputVi32, align 16
+ %vecext2 = extractelement <4 x i32> %5, i32 0
+ %add3 = add nsw i32 %4, %vecext2
+ ret i32 %add3
+}
+
+define dso_local signext i32 @UseAddr(i32 signext %a) local_unnamed_addr #0 {
+; CHECK-LABEL: UseAddr:
+; CHECK: .localentry UseAddr, 1
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r0, 16(r1)
+; CHECK-NEXT: stdu r1, -48(r1)
+; CHECK-NEXT: pld r4, ArrayIn at got@pcrel(0), 1
+; CHECK-NEXT: lwz r5, 16(r4)
+; CHECK-NEXT: add r30, r5, r3
+; CHECK-NEXT: mr r3, r4
+; CHECK-NEXT: bl getAddr at notoc
+; CHECK-NEXT: add r3, r30, r3
+; CHECK-NEXT: extsw r3, r3
+; CHECK-NEXT: addi r1, r1, 48
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+entry:
+ %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 4), align 4
+ %add = add nsw i32 %0, %a
+ %call = tail call signext i32 @getAddr(i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ArrayIn, i64 0, i64 0))
+ %add1 = add nsw i32 %add, %call
+ ret i32 %add1
+}
+
+declare signext i32 @getAddr(i32*) local_unnamed_addr
+
+define dso_local nonnull i32* @AddrTaken32() local_unnamed_addr #0 {
+; CHECK-LABEL: AddrTaken32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pld r3, input32 at got@pcrel(0), 1
+; CHECK-NEXT: blr
+entry:
+ ret i32* @input32
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll b/llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll
index 5c2eb0d5ec46..44d3f7a50e9b 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll
@@ -50,12 +50,15 @@ define dso_local signext i32 @getElementExtern4() local_unnamed_addr {
; CHECK-S-LABEL: getElementExtern4:
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: pld r3, array1 at got@pcrel(0), 1
+; CHECK-S-NEXT: .Lpcrel:
+; CHECK-S-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8)
; CHECK-S-NEXT: lwa r3, 16(r3)
; CHECK-S-NEXT: blr
; CHECK-O-LABEL: <getElementExtern4>:
; CHECK-O: pld 3, 0(0), 1
; CHECK-O-NEXT: R_PPC64_GOT_PCREL34 array1
-; CHECK-O-NEXT: lwa 3, 16(3)
+; CHECK-O-NEXT: R_PPC64_PCREL_OPT *ABS*+0x8
+; CHECK-O: lwa 3, 16(3)
; CHECK-O-NEXT: blr
entry:
%0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @array1, i64 0, i64 4), align 4
@@ -66,12 +69,15 @@ define dso_local signext i32 @getElementExternNegative() local_unnamed_addr {
; CHECK-S-LABEL: getElementExternNegative:
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: pld r3, array1 at got@pcrel(0), 1
+; CHECK-S-NEXT: .Lpcrel0:
+; CHECK-S-NEXT: .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
; CHECK-S-NEXT: lwa r3, -4(r3)
; CHECK-S-NEXT: blr
; CHECK-O-LABEL: <getElementExternNegative>:
; CHECK-O: pld 3, 0(0), 1
; CHECK-O-NEXT: R_PPC64_GOT_PCREL34 array1
-; CHECK-O-NEXT: lwa 3, -4(3)
+; CHECK-O-NEXT: R_PPC64_PCREL_OPT *ABS*+0x8
+; CHECK-O: lwa 3, -4(3)
; CHECK-O-NEXT: blr
entry:
%0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @array1, i64 0, i64 -1), align 4
diff --git a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
index 51eb7a3fbbaa..56e49780c5f0 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll
@@ -51,6 +51,8 @@ define dso_local void @TailCallExtrnFuncPtr() local_unnamed_addr {
; CHECK: .localentry TailCallExtrnFuncPtr, 1
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: pld r3, Func at got@pcrel(0), 1
+; CHECK-NEXT: .Lpcrel:
+; CHECK-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8)
; CHECK-NEXT: ld r12, 0(r3)
; CHECK-NEXT: mtctr r12
; CHECK-NEXT: bctr
diff --git a/llvm/test/CodeGen/PowerPC/pcrel.ll b/llvm/test/CodeGen/PowerPC/pcrel.ll
index e9ebc6b2dd23..55783180cfac 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel.ll
@@ -41,12 +41,15 @@ define dso_local signext i32 @ReadGlobalVarInt() local_unnamed_addr {
; CHECK-S-LABEL: ReadGlobalVarInt
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: pld r3, valIntGlob at got@pcrel(0), 1
+; CHECK-S-NEXT: .Lpcrel:
+; CHECK-S-NEXT: .reloc .Lpcrel-8,R_PPC64_PCREL_OPT,.-(.Lpcrel-8)
; CHECK-S-NEXT: lwa r3, 0(r3)
; CHECK-S-NEXT: blr
; CHECK-O-LABEL: ReadGlobalVarInt
; CHECK-O: pld 3, 0(0), 1
; CHECK-O-NEXT: R_PPC64_GOT_PCREL34 valIntGlob
+; CHECK-O-NEXT: R_PPC64_PCREL_OPT *ABS*+0x8
; CHECK-O-NEXT: lwa 3, 0(3)
; CHECK-O-NEXT: blr
entry:
More information about the llvm-commits
mailing list