[lld] cddb0db - [LLD][PowerPC] Implement GOT to PC-Rel relaxation

Nemanja Ivanovic via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 17 07:36:48 PDT 2020


Author: Nemanja Ivanovic
Date: 2020-08-17T09:36:09-05:00
New Revision: cddb0dbcefd83109eee99a3bb1d8aa72629cf21d

URL: https://github.com/llvm/llvm-project/commit/cddb0dbcefd83109eee99a3bb1d8aa72629cf21d
DIFF: https://github.com/llvm/llvm-project/commit/cddb0dbcefd83109eee99a3bb1d8aa72629cf21d.diff

LOG: [LLD][PowerPC] Implement GOT to PC-Rel relaxation

This patch implements the handling for the R_PPC64_PCREL_OPT relocation as well
as the GOT relocation for the associated R_PPC64_GOT_PCREL34 relocation.

On Power10 targets with PC-Relative addressing, the linker can relax
GOT-relative accesses to PC-Relative under some conditions. Since the sequence
consists of a prefixed load, followed by a non-prefixed access (load or store),
the linker needs to replace the first instruction (as the replacement
instruction will be prefixed). The compiler communicates to the linker that
this optimization is safe by placing the two aforementioned relocations on the
GOT load (of the address).
The linker then does two things:

- Convert the load from the got into a PC-Relative add to compute the address
  relative to the PC
- Find the instruction referred to by the second relocation (R_PPC64_PCREL_OPT)
  and replace the first with the PC-Relative version of it

It is important to synchronize the mapping from legacy memory instructions to
their PC-Relative form. Hence, this patch adds a file to be included by both
the compiler and the linker so they're always in agreement.

Differential revision: https://reviews.llvm.org/D84360

Added: 
    lld/ELF/Arch/PPCInsns.def
    lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s
    lld/test/ELF/ppc64-got-to-pcrel-relaxation.s

Modified: 
    lld/ELF/Arch/PPC64.cpp
    lld/ELF/Config.h
    lld/ELF/Driver.cpp
    lld/ELF/InputSection.cpp
    lld/ELF/Options.td
    lld/ELF/Relocations.cpp
    lld/ELF/Relocations.h

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp
index c1ad72eaa2c9..cfb3ca9df406 100644
--- a/lld/ELF/Arch/PPC64.cpp
+++ b/lld/ELF/Arch/PPC64.cpp
@@ -62,6 +62,90 @@ enum DFormOpcd {
   ADDI = 14
 };
 
+enum class PPCLegacyInsn : uint32_t {
+  NOINSN = 0,
+  // Loads.
+  LBZ = 0x88000000,
+  LHZ = 0xa0000000,
+  LWZ = 0x80000000,
+  LHA = 0xa8000000,
+  LWA = 0xe8000002,
+  LD = 0xe8000000,
+  LFS = 0xC0000000,
+  LXSSP = 0xe4000003,
+  LFD = 0xc8000000,
+  LXSD = 0xe4000002,
+  LXV = 0xf4000001,
+  LXVP = 0x18000000,
+
+  // Stores.
+  STB = 0x98000000,
+  STH = 0xb0000000,
+  STW = 0x90000000,
+  STD = 0xf8000000,
+  STFS = 0xd0000000,
+  STXSSP = 0xf4000003,
+  STFD = 0xd8000000,
+  STXSD = 0xf4000002,
+  STXV = 0xf4000005,
+  STXVP = 0x18000001
+};
+enum class PPCPrefixedInsn : uint64_t {
+  NOINSN = 0,
+  PREFIX_MLS = 0x0610000000000000,
+  PREFIX_8LS = 0x0410000000000000,
+
+  // Loads.
+  PLBZ = PREFIX_MLS,
+  PLHZ = PREFIX_MLS,
+  PLWZ = PREFIX_MLS,
+  PLHA = PREFIX_MLS,
+  PLWA = PREFIX_8LS | 0xa4000000,
+  PLD = PREFIX_8LS | 0xe4000000,
+  PLFS = PREFIX_MLS,
+  PLXSSP = PREFIX_8LS | 0xac000000,
+  PLFD = PREFIX_MLS,
+  PLXSD = PREFIX_8LS | 0xa8000000,
+  PLXV = PREFIX_8LS | 0xc8000000,
+  PLXVP = PREFIX_8LS | 0xe8000000,
+
+  // Stores.
+  PSTB = PREFIX_MLS,
+  PSTH = PREFIX_MLS,
+  PSTW = PREFIX_MLS,
+  PSTD = PREFIX_8LS | 0xf4000000,
+  PSTFS = PREFIX_MLS,
+  PSTXSSP = PREFIX_8LS | 0xbc000000,
+  PSTFD = PREFIX_MLS,
+  PSTXSD = PREFIX_8LS | 0xb8000000,
+  PSTXV = PREFIX_8LS | 0xd8000000,
+  PSTXVP = PREFIX_8LS | 0xf8000000
+};
+static bool checkPPCLegacyInsn(uint32_t encoding) {
+  PPCLegacyInsn insn = static_cast<PPCLegacyInsn>(encoding);
+  if (insn == PPCLegacyInsn::NOINSN)
+    return false;
+#define PCREL_OPT(Legacy, PCRel, InsnMask)                                     \
+  if (insn == PPCLegacyInsn::Legacy)                                           \
+    return true;
+#include "PPCInsns.def"
+#undef PCREL_OPT
+  return false;
+}
+
+// Masks to apply to legacy instructions when converting them to prefixed,
+// pc-relative versions. For the most part, the primary opcode is shared
+// between the legacy instruction and the suffix of its prefixed version.
+// However, there are some instances where that isn't the case (DS-Form and
+// DQ-form instructions).
+enum class LegacyToPrefixMask : uint64_t {
+  NOMASK = 0x0,
+  OPC_AND_RST = 0xffe00000, // Primary opc (0-5) and R[ST] (6-10).
+  ONLY_RST = 0x3e00000,     // [RS]T (6-10).
+  ST_STX28_TO5 =
+      0x8000000003e00000, // S/T (6-10) - The [S/T]X bit moves from 28 to 5.
+};
+
 uint64_t elf::getPPC64TocBase() {
   // The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The
   // TOC starts where the first of these sections starts. We always create a
@@ -333,6 +417,7 @@ static bool isDQFormInstruction(uint32_t encoding) {
   switch (getPrimaryOpCode(encoding)) {
   default:
     return false;
+  case 6: // Power10 paired loads/stores (lxvp, stxvp).
   case 56:
     // The only instruction with a primary opcode of 56 is `lq`.
     return true;
@@ -344,6 +429,78 @@ static bool isDQFormInstruction(uint32_t encoding) {
   }
 }
 
+static bool isDSFormInstruction(PPCLegacyInsn insn) {
+  switch (insn) {
+  default:
+    return false;
+  case PPCLegacyInsn::LWA:
+  case PPCLegacyInsn::LD:
+  case PPCLegacyInsn::LXSD:
+  case PPCLegacyInsn::LXSSP:
+  case PPCLegacyInsn::STD:
+  case PPCLegacyInsn::STXSD:
+  case PPCLegacyInsn::STXSSP:
+    return true;
+  }
+}
+
+static PPCLegacyInsn getPPCLegacyInsn(uint32_t encoding) {
+  uint32_t opc = encoding & 0xfc000000;
+
+  // If the primary opcode is shared between multiple instructions, we need to
+  // fix it up to match the actual instruction we are after.
+  if ((opc == 0xe4000000 || opc == 0xe8000000 || opc == 0xf4000000 ||
+       opc == 0xf8000000) &&
+      !isDQFormInstruction(encoding))
+    opc = encoding & 0xfc000003;
+  else if (opc == 0xf4000000)
+    opc = encoding & 0xfc000007;
+  else if (opc == 0x18000000)
+    opc = encoding & 0xfc00000f;
+
+  // If the value is not one of the enumerators in PPCLegacyInsn, we want to
+  // return PPCLegacyInsn::NOINSN.
+  if (!checkPPCLegacyInsn(opc))
+    return PPCLegacyInsn::NOINSN;
+  return static_cast<PPCLegacyInsn>(opc);
+}
+
+static PPCPrefixedInsn getPCRelativeForm(PPCLegacyInsn insn) {
+  switch (insn) {
+#define PCREL_OPT(Legacy, PCRel, InsnMask)                                     \
+  case PPCLegacyInsn::Legacy:                                                  \
+    return PPCPrefixedInsn::PCRel
+#include "PPCInsns.def"
+#undef PCREL_OPT
+  }
+  return PPCPrefixedInsn::NOINSN;
+}
+
+static LegacyToPrefixMask getInsnMask(PPCLegacyInsn insn) {
+  switch (insn) {
+#define PCREL_OPT(Legacy, PCRel, InsnMask)                                     \
+  case PPCLegacyInsn::Legacy:                                                  \
+    return LegacyToPrefixMask::InsnMask
+#include "PPCInsns.def"
+#undef PCREL_OPT
+  }
+  return LegacyToPrefixMask::NOMASK;
+}
+static uint64_t getPCRelativeForm(uint32_t encoding) {
+  PPCLegacyInsn origInsn = getPPCLegacyInsn(encoding);
+  PPCPrefixedInsn pcrelInsn = getPCRelativeForm(origInsn);
+  if (pcrelInsn == PPCPrefixedInsn::NOINSN)
+    return UINT64_C(-1);
+  LegacyToPrefixMask origInsnMask = getInsnMask(origInsn);
+  uint64_t pcrelEncoding =
+      (uint64_t)pcrelInsn | (encoding & (uint64_t)origInsnMask);
+
+  // If the mask requires moving bit 28 to bit 5, do that now.
+  if (origInsnMask == LegacyToPrefixMask::ST_STX28_TO5)
+    pcrelEncoding |= (encoding & 0x8) << 23;
+  return pcrelEncoding;
+}
+
 static bool isInstructionUpdateForm(uint32_t encoding) {
   switch (getPrimaryOpCode(encoding)) {
   default:
@@ -368,6 +525,25 @@ static bool isInstructionUpdateForm(uint32_t encoding) {
   }
 }
 
+// Compute the total displacement between the prefixed instruction that gets
+// to the start of the data and the load/store instruction that has the offset
+// into the data structure.
+// For example:
+// paddi 3, 0, 1000, 1
+// lwz 3, 20(3)
+// Should add up to 1020 for total displacement.
+static int64_t getTotalDisp(uint64_t prefixedInsn, uint32_t accessInsn) {
+  int64_t disp34 = llvm::SignExtend64(
+      ((prefixedInsn & 0x3ffff00000000) >> 16) | (prefixedInsn & 0xffff), 34);
+  int32_t disp16 = llvm::SignExtend32(accessInsn & 0xffff, 16);
+  // For DS and DQ form instructions, we need to mask out the XO bits.
+  if (isDQFormInstruction(accessInsn))
+    disp16 &= ~0xf;
+  else if (isDSFormInstruction(getPPCLegacyInsn(accessInsn)))
+    disp16 &= ~0x3;
+  return disp34 + disp16;
+}
+
 // There are a number of places when we either want to read or write an
 // instruction when handling a half16 relocation type. On big-endian the buffer
 // pointer is pointing into the middle of the word we want to extract, and on
@@ -475,6 +651,49 @@ void PPC64::relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const {
     relocateNoSym(loc, R_PPC64_TOC16_LO, val);
     break;
   }
+  case R_PPC64_GOT_PCREL34: {
+    // Clear the first 8 bits of the prefix and the first 6 bits of the
+    // instruction (the primary opcode).
+    uint64_t insn = readPrefixedInstruction(loc);
+    if ((insn & 0xfc000000) != 0xe4000000)
+      error("expected a 'pld' for got-indirect to pc-relative relaxing");
+    insn &= ~0xff000000fc000000;
+
+    // Replace the cleared bits with the values for PADDI (0x600000038000000);
+    insn |= 0x600000038000000;
+    writePrefixedInstruction(loc, insn);
+    relocate(loc, rel, val);
+    break;
+  }
+  case R_PPC64_PCREL_OPT: {
+    // We can only relax this if the R_PPC64_GOT_PCREL34 at this offset can
+    // be relaxed. The eligibility for the relaxation needs to be determined
+    // on that relocation since this one does not relocate a symbol.
+    uint64_t insn = readPrefixedInstruction(loc);
+    uint32_t accessInsn = read32(loc + rel.addend);
+    uint64_t pcRelInsn = getPCRelativeForm(accessInsn);
+
+    // This error is not necessary for correctness but is emitted for now
+    // to ensure we don't miss these opportunities in real code. It can be
+    // removed at a later date.
+    if (pcRelInsn == UINT64_C(-1)) {
+      errorOrWarn(
+          "unrecognized instruction for R_PPC64_PCREL_OPT relaxation: 0x" +
+          Twine::utohexstr(accessInsn));
+      break;
+    }
+
+    int64_t totalDisp = getTotalDisp(insn, accessInsn);
+    if (!isInt<34>(totalDisp))
+      break; // Displacement doesn't fit.
+    // Convert the PADDI to the prefixed version of accessInsn and convert
+    // accessInsn to a nop.
+    writePrefixedInstruction(loc, pcRelInsn |
+                                      ((totalDisp & 0x3ffff0000) << 16) |
+                                      (totalDisp & 0xffff));
+    write32(loc + rel.addend, 0x60000000); // nop accessInsn.
+    break;
+  }
   default:
     llvm_unreachable("unexpected relocation type");
   }
@@ -668,6 +887,7 @@ RelExpr PPC64::getRelExpr(RelType type, const Symbol &s,
   case R_PPC64_TOC16_LO:
     return R_GOTREL;
   case R_PPC64_GOT_PCREL34:
+  case R_PPC64_PCREL_OPT:
     return R_GOT_PC;
   case R_PPC64_TOC16_HA:
   case R_PPC64_TOC16_LO_DS:
@@ -1024,6 +1244,9 @@ void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
                              (val & si1Mask));
     break;
   }
+  // If we encounter a PCREL_OPT relocation that we won't optimize.
+  case R_PPC64_PCREL_OPT:
+    break;
   default:
     llvm_unreachable("unknown relocation");
   }
@@ -1080,6 +1303,14 @@ bool PPC64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
 
 RelExpr PPC64::adjustRelaxExpr(RelType type, const uint8_t *data,
                                RelExpr expr) const {
+  if ((type == R_PPC64_GOT_PCREL34 || type == R_PPC64_PCREL_OPT) &&
+      config->pcRelOptimize) {
+    // It only makes sense to optimize pld since paddi means that the address
+    // of the object in the GOT is required rather than the object itself.
+    assert(data && "Expecting an instruction encoding here");
+    if ((readPrefixedInstruction(data) & 0xfc000000) == 0xe4000000)
+      return R_PPC64_RELAX_GOT_PC;
+  }
   if (expr == R_RELAX_TLS_GD_TO_IE)
     return R_RELAX_TLS_GD_TO_IE_GOT_OFF;
   if (expr == R_RELAX_TLS_LD_TO_LE)

diff  --git a/lld/ELF/Arch/PPCInsns.def b/lld/ELF/Arch/PPCInsns.def
new file mode 100644
index 000000000000..1baa8fd6c2ad
--- /dev/null
+++ b/lld/ELF/Arch/PPCInsns.def
@@ -0,0 +1,27 @@
+#ifndef PCREL_OPT
+#error "Need to define function-style macro PCREL_OPT"
+#endif
+PCREL_OPT(NOINSN, NOINSN, NOMASK);
+PCREL_OPT(LBZ, PLBZ, OPC_AND_RST);
+PCREL_OPT(LHZ, PLHZ, OPC_AND_RST);
+PCREL_OPT(LWZ, PLWZ, OPC_AND_RST);
+PCREL_OPT(LHA, PLHA, OPC_AND_RST);
+PCREL_OPT(LWA, PLWA, ONLY_RST);
+PCREL_OPT(LD, PLD , ONLY_RST);
+PCREL_OPT(LFS, PLFS, OPC_AND_RST);
+PCREL_OPT(LXSSP, PLXSSP, ONLY_RST);
+PCREL_OPT(LFD, PLFD, OPC_AND_RST);
+PCREL_OPT(LXSD, PLXSD, ONLY_RST);
+PCREL_OPT(LXV, PLXV, ST_STX28_TO5);
+PCREL_OPT(LXVP, PLXVP, OPC_AND_RST);
+
+PCREL_OPT(STB, PSTB, OPC_AND_RST);
+PCREL_OPT(STH, PSTH, OPC_AND_RST);
+PCREL_OPT(STW, PSTW, OPC_AND_RST);
+PCREL_OPT(STD, PSTD, ONLY_RST);
+PCREL_OPT(STFS, PSTFS, OPC_AND_RST);
+PCREL_OPT(STXSSP, PSTXSSP, ONLY_RST);
+PCREL_OPT(STFD, PSTFD, OPC_AND_RST);
+PCREL_OPT(STXSD, PSTXSD, ONLY_RST);
+PCREL_OPT(STXV, PSTXV, ST_STX28_TO5);
+PCREL_OPT(STXVP, PSTXVP, OPC_AND_RST);

diff  --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index 1afeee02efb3..f043d1d4d30d 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -206,6 +206,7 @@ struct Configuration {
   bool thinLTOIndexOnly;
   bool timeTraceEnabled;
   bool tocOptimize;
+  bool pcRelOptimize;
   bool undefinedVersion;
   bool unique;
   bool useAndroidRelrTags = false;

diff  --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 12c8e5d7b059..34f2cd633e42 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -309,6 +309,9 @@ static void checkOptions() {
   if (config->tocOptimize && config->emachine != EM_PPC64)
     error("--toc-optimize is only supported on the PowerPC64 target");
 
+  if (config->pcRelOptimize && config->emachine != EM_PPC64)
+    error("--pcrel--optimize is only supported on the PowerPC64 target");
+
   if (config->pie && config->shared)
     error("-shared and -pie may not be used together");
 
@@ -1288,6 +1291,8 @@ static void setConfigs(opt::InputArgList &args) {
 
   config->tocOptimize =
       args.hasFlag(OPT_toc_optimize, OPT_no_toc_optimize, m == EM_PPC64);
+  config->pcRelOptimize =
+      args.hasFlag(OPT_pcrel_optimize, OPT_no_pcrel_optimize, m == EM_PPC64);
 }
 
 // Returns a value of "-format" option.

diff  --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 6440e8791e3a..914c4e0d5962 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -807,6 +807,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
   case R_PPC64_TOCBASE:
     return getPPC64TocBase() + a;
   case R_RELAX_GOT_PC:
+  case R_PPC64_RELAX_GOT_PC:
     return sym.getVA(a) - p;
   case R_RELAX_TLS_GD_TO_LE:
   case R_RELAX_TLS_IE_TO_LE:
@@ -1004,6 +1005,7 @@ void InputSectionBase::relocate(uint8_t *buf, uint8_t *bufEnd) {
 void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
   assert(flags & SHF_ALLOC);
   const unsigned bits = config->wordsize * 8;
+  uint64_t lastPPCRelaxedRelocOff = UINT64_C(-1);
 
   for (const Relocation &rel : relocations) {
     if (rel.expr == R_NONE)
@@ -1025,6 +1027,20 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
     case R_RELAX_GOT_PC_NOPIC:
       target->relaxGot(bufLoc, rel, targetVA);
       break;
+    case R_PPC64_RELAX_GOT_PC: {
+      // The R_PPC64_PCREL_OPT relocation must appear immediately after
+      // R_PPC64_GOT_PCREL34 in the relocations table at the same offset.
+      // We can only relax R_PPC64_PCREL_OPT if we have also relaxed
+      // the associated R_PPC64_GOT_PCREL34 since only the latter has an
+      // associated symbol. So save the offset when relaxing R_PPC64_GOT_PCREL34
+      // and only relax the other if the saved offset matches.
+      if (type == R_PPC64_GOT_PCREL34)
+        lastPPCRelaxedRelocOff = offset;
+      if (type == R_PPC64_PCREL_OPT && offset != lastPPCRelaxedRelocOff)
+        break;
+      target->relaxGot(bufLoc, rel, targetVA);
+      break;
+    }
     case R_PPC64_RELAX_TOC:
       // rel.sym refers to the STT_SECTION symbol associated to the .toc input
       // section. If an R_PPC64_TOC16_LO (.toc + addend) references the TOC

diff  --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index c3cadafdccd2..5563a956be56 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -404,6 +404,10 @@ defm toc_optimize : B<"toc-optimize",
     "(PowerPC64) Enable TOC related optimizations (default)",
     "(PowerPC64) Disable TOC related optimizations">;
 
+defm pcrel_optimize : B<"pcrel-optimize",
+    "(PowerPC64) Enable PC-relative optimizations (default)",
+    "(PowerPC64) Disable PC-relative optimizations">;
+
 def trace: F<"trace">, HelpText<"Print the names of the input files">;
 
 defm trace_symbol: Eq<"trace-symbol", "Trace references to symbols">;

diff  --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 751ded397768..7fc9b492cbe7 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -376,7 +376,7 @@ static bool needsGot(RelExpr expr) {
 static bool isRelExpr(RelExpr expr) {
   return oneof<R_PC, R_GOTREL, R_GOTPLTREL, R_MIPS_GOTREL, R_PPC64_CALL,
                R_PPC64_RELAX_TOC, R_AARCH64_PAGE_PC, R_RELAX_GOT_PC,
-               R_RISCV_PC_INDIRECT>(expr);
+               R_RISCV_PC_INDIRECT, R_PPC64_RELAX_GOT_PC>(expr);
 }
 
 // Returns true if a given relocation can be computed at link-time.

diff  --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index ec59c63410d0..4f48082b8be9 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -96,6 +96,7 @@ enum RelExpr {
   R_PPC64_CALL_PLT,
   R_PPC64_RELAX_TOC,
   R_PPC64_TOCBASE,
+  R_PPC64_RELAX_GOT_PC,
   R_RISCV_ADD,
   R_RISCV_PC_INDIRECT,
 };

diff  --git a/lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s b/lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s
new file mode 100644
index 000000000000..c7936bf803fe
--- /dev/null
+++ b/lld/test/ELF/Inputs/ppc64-got-to-pcrel-relaxation-def.s
@@ -0,0 +1,66 @@
+	.section	".text"
+	.comm	storeVal_vector,8,8
+	.comm	useVal_vector,8,8
+	.globl storeVal_longlong, useAddr_longlong, useVal_longlong, storeVal_sshort
+	.globl useAddr_sshort, useVal_sshort, storeVal_sint, useAddr_sint, useVal_sint
+	.globl storeVal_double, useAddr_double, useVal_double, storeVal_float
+	.globl useAddr_float, useVal_float, storeVal_uint, storeVal_uint
+	.globl useVal_uint, storeVal_ushort, useAddr_ushort, useVal_ushort
+	.globl storeVal, useAddr, useVal
+	.section	".data"
+	.align 3
+	.type	storeVal_longlong, @object
+	.size	storeVal_longlong, 8
+storeVal_longlong:
+	.quad	18
+useAddr_longlong:
+	.quad	17
+useVal_longlong:
+	.quad	16
+storeVal_sshort:
+	.short	-15
+useAddr_sshort:
+	.short	-14
+useVal_sshort:
+	.short	-13
+	.zero	2
+storeVal_sint:
+	.long	-12
+useAddr_sint:
+	.long	-11
+useVal_sint:
+	.long	-10
+	.zero	4
+storeVal_double:
+	.long	858993459
+	.long	1076966195
+useAddr_double:
+	.long	-1717986918
+	.long	-1070589543
+useVal_double:
+	.long	0
+	.long	1076756480
+storeVal_float:
+	.long	1045220557
+useAddr_float:
+	.long	-1050568294
+useVal_float:
+	.long	1095761920
+storeVal_uint:
+	.long	12
+useAddr_uint:
+	.long	11
+useVal_uint:
+	.long	10
+storeVal_ushort:
+	.short	1
+useAddr_ushort:
+	.short	10
+useVal_ushort:
+	.short	5
+storeVal:
+	.byte	-1
+useAddr:
+	.byte	10
+useVal:
+	.byte	5

diff  --git a/lld/test/ELF/ppc64-got-to-pcrel-relaxation.s b/lld/test/ELF/ppc64-got-to-pcrel-relaxation.s
new file mode 100644
index 000000000000..37fe65cfc3da
--- /dev/null
+++ b/lld/test/ELF/ppc64-got-to-pcrel-relaxation.s
@@ -0,0 +1,392 @@
+# REQUIRES: ppc
+# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t1.o
+# RUN: llvm-mc -filetype=obj -triple=powerpc64le %p/Inputs/ppc64-got-to-pcrel-relaxation-def.s -o %t2.o
+# RUN: ld.lld --shared %t2.o -o %t2.so --soname=t2
+# RUN: ld.lld %t1.o %t2.o -o %t
+# RUN: ld.lld %t1.o %t2.so -o %ts
+# RUN: ld.lld %t1.o %t2.o -o %tn --no-pcrel-optimize
+# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s --check-prefix=CHECK-S
+# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %ts | FileCheck %s --check-prefix=CHECK-D
+# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %tn | FileCheck %s --check-prefix=CHECK-D
+
+# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t1.o
+# RUN: llvm-mc -filetype=obj -triple=powerpc64 %p/Inputs/ppc64-got-to-pcrel-relaxation-def.s -o %t2.o
+# RUN: ld.lld --shared %t2.o -o %t2.so --soname=t2
+# RUN: ld.lld %t1.o %t2.o -o %t
+# RUN: ld.lld %t1.o %t2.so -o %ts
+# RUN: ld.lld %t1.o %t2.o -o %tn --no-pcrel-optimize
+# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %t | FileCheck %s --check-prefix=CHECK-S
+# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %ts | FileCheck %s --check-prefix=CHECK-D
+# RUN: llvm-objdump -d --no-show-raw-insn --mcpu=pwr10 %tn | FileCheck %s --check-prefix=CHECK-D
+
+# CHECK-S-LABEL: <check_LBZ_STB>:
+# CHECK-S-NEXT:    plbz 10
+# CHECK-S-NEXT:    paddi 9
+# CHECK-S-NEXT:    li 3, 0
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    rldicl 9, 9, 9, 60
+# CHECK-S-NEXT:    add 9, 9, 10
+# CHECK-S-NEXT:    pstb 9
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    blr
+
+# CHECK-D-LABEL: <check_LBZ_STB>:
+# CHECK-D-NEXT:    pld 8
+# CHECK-D-NEXT:    pld 9
+# CHECK-D-NEXT:    li 3, 0
+# CHECK-D-NEXT:    lbz 10, 0(8)
+# CHECK-D-NEXT:    rldicl 9, 9, 9, 60
+# CHECK-D-NEXT:    add 9, 9, 10
+# CHECK-D-NEXT:    pld 10
+# CHECK-D-NEXT:    stb 9, 0(10)
+# CHECK-D-NEXT:    blr
+check_LBZ_STB:
+  pld 8,useVal at got@pcrel(0),1
+.Lpcrel1:
+  pld 9,useAddr at got@pcrel(0),1
+  li 3,0
+  .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
+  lbz 10,0(8)
+  rldicl 9,9,9,60
+  add 9,9,10
+  pld 10,storeVal at got@pcrel(0),1
+.Lpcrel2:
+  .reloc .Lpcrel2-8,R_PPC64_PCREL_OPT,.-(.Lpcrel2-8)
+  stb 9,0(10)
+  blr
+
+# CHECK-S-LABEL: <check_LHZ_STH>:
+# CHECK-S-NEXT:    plhz 3
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    psth 3
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    blr
+
+# CHECK-D-LABEL: <check_LHZ_STH>:
+# CHECK-D-NEXT:    pld 9
+# CHECK-D-NEXT:    lhz 3, 0(9)
+# CHECK-D-NEXT:    nop
+# CHECK-D-NEXT:    pld 9
+# CHECK-D-NEXT:    sth 3, 0(9)
+# CHECK-D-NEXT:    blr
+check_LHZ_STH:
+  pld 9,useVal_ushort at got@pcrel(0),1
+.Lpcrel3:
+  .reloc .Lpcrel3-8,R_PPC64_PCREL_OPT,.-(.Lpcrel3-8)
+  lhz 3,0(9)
+  pld 9,storeVal_ushort at got@pcrel(0),1
+.Lpcrel4:
+  .reloc .Lpcrel4-8,R_PPC64_PCREL_OPT,.-(.Lpcrel4-8)
+  sth 3,0(9)
+  blr
+
+# CHECK-S-LABEL: <check_LWZ_STW>:
+# CHECK-S-NEXT:    plwz 3
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    pstw 3
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    blr
+
+# CHECK-D-LABEL: <check_LWZ_STW>:
+# CHECK-D-NEXT:    pld 9
+# CHECK-D-NEXT:    lwz 3, 0(9)
+# CHECK-D-NEXT:    pld 9
+# CHECK-D-NEXT:    stw 3, 0(9)
+# CHECK-D-NEXT:    blr
+check_LWZ_STW:
+  pld 9,useVal_uint at got@pcrel(0),1
+.Lpcrel5:
+  .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
+  lwz 3,0(9)
+  pld 9,storeVal_uint at got@pcrel(0),1
+.Lpcrel6:
+  .reloc .Lpcrel6-8,R_PPC64_PCREL_OPT,.-(.Lpcrel6-8)
+  stw 3,0(9)
+  blr
+
+# CHECK-S-LABEL: <check_LFS_STFS>:
+# CHECK-S-NEXT:    plfs 1
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    pstfs 1
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    blr
+
+# CHECK-D-LABEL: <check_LFS_STFS>:
+# CHECK-D-NEXT:    pld 9
+# CHECK-D-NEXT:    lfs 1, 0(9)
+# CHECK-D-NEXT:    pld 9
+# CHECK-D-NEXT:    stfs 1, 0(9)
+# CHECK-D-NEXT:    blr
+check_LFS_STFS:
+  pld 9,useVal_float at got@pcrel(0),1
+.Lpcrel7:
+  .reloc .Lpcrel7-8,R_PPC64_PCREL_OPT,.-(.Lpcrel7-8)
+  lfs 1,0(9)
+  pld 9,storeVal_float at got@pcrel(0),1
+.Lpcrel8:
+  .reloc .Lpcrel8-8,R_PPC64_PCREL_OPT,.-(.Lpcrel8-8)
+  stfs 1,0(9)
+  blr
+
+# CHECK-S-LABEL: <check_LFD_STFD>:
+# CHECK-S-NEXT:    plfd 1
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    pstfd 1
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    blr
+
+# CHECK-D-LABEL: <check_LFD_STFD>:
+# CHECK-D-NEXT:    pld 9
+# CHECK-D-NEXT:    lfd 1, 0(9)
+# CHECK-D-NEXT:    pld 9
+# CHECK-D-NEXT:    stfd 1, 0(9)
+# CHECK-D-NEXT:    blr
+check_LFD_STFD:
+  pld 9,useVal_double at got@pcrel(0),1
+.Lpcrel9:
+  .reloc .Lpcrel9-8,R_PPC64_PCREL_OPT,.-(.Lpcrel9-8)
+  lfd 1,0(9)
+  pld 9,storeVal_double at got@pcrel(0),1
+.Lpcrel10:
+  .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8)
+  stfd 1,0(9)
+  blr
+
+# CHECK-S-LABEL: <check_LWA_STW>:
+# CHECK-S-NEXT:    mr 9, 3
+# CHECK-S-NEXT:    plwa 3
+# CHECK-S-NEXT:    pstw 9
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    blr
+
+# CHECK-D-LABEL: <check_LWA_STW>:
+# CHECK-D-NEXT:    mr 9, 3
+# CHECK-D-NEXT:    pld 8
+# CHECK-D-NEXT:    pld 10
+# CHECK-D-NEXT:    lwa 3, 0(8)
+# CHECK-D-NEXT:    stw 9, 0(10)
+# CHECK-D-NEXT:    blr
+check_LWA_STW:
+  mr 9,3
+  pld 8,useVal_sint at got@pcrel(0),1
+.Lpcrel11:
+  pld 10,storeVal_sint at got@pcrel(0),1
+.Lpcrel12:
+  .reloc .Lpcrel11-8,R_PPC64_PCREL_OPT,.-(.Lpcrel11-8)
+  lwa 3,0(8)
+  .reloc .Lpcrel12-8,R_PPC64_PCREL_OPT,.-(.Lpcrel12-8)
+  stw 9,0(10)
+  blr
+
+# CHECK-S-LABEL: <check_LHA_STH>:
+# CHECK-S-NEXT:    mr 9, 3
+# CHECK-S-NEXT:    plha 3
+# CHECK-S-NEXT:    psth 9
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    blr
+
+# CHECK-D-LABEL: <check_LHA_STH>:
+# CHECK-D-NEXT:    mr 9, 3
+# CHECK-D-NEXT:    pld 8
+# CHECK-D-NEXT:    pld 10
+# CHECK-D-NEXT:    lha 3, 0(8)
+# CHECK-D-NEXT:    sth 9, 0(10)
+# CHECK-D-NEXT:    blr
+check_LHA_STH:
+  mr 9,3
+  pld 8,useVal_sshort at got@pcrel(0),1
+.Lpcrel13:
+  pld 10,storeVal_sshort at got@pcrel(0),1
+.Lpcrel14:
+  .reloc .Lpcrel13-8,R_PPC64_PCREL_OPT,.-(.Lpcrel13-8)
+  lha 3,0(8)
+  .reloc .Lpcrel14-8,R_PPC64_PCREL_OPT,.-(.Lpcrel14-8)
+  sth 9,0(10)
+  blr
+
+# CHECK-S-LABEL: <check_LD_STD>:
+# CHECK-S-NEXT:    pld 3
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    pstd 3
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    blr
+
+# CHECK-D-LABEL: <check_LD_STD>:
+# CHECK-D-NEXT:    pld 9
+# CHECK-D-NEXT:    ld 3, 0(9)
+# CHECK-D-NEXT:    pld 9
+# CHECK-D-NEXT:    std 3, 0(9)
+# CHECK-D-NEXT:    blr
+check_LD_STD:
+  pld 9,useVal_longlong at got@pcrel(0),1
+.Lpcrel15:
+  .reloc .Lpcrel15-8,R_PPC64_PCREL_OPT,.-(.Lpcrel15-8)
+  ld 3,0(9)
+  pld 9,storeVal_longlong at got@pcrel(0),1
+.Lpcrel16:
+  .reloc .Lpcrel16-8,R_PPC64_PCREL_OPT,.-(.Lpcrel16-8)
+  std 3,0(9)
+  blr
+
+# CHECK-S-LABEL: <check_LXV_STXV>:
+# CHECK-S-NEXT:    plxv 34
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    pstxv 34
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    blr
+
+# CHECK-D-LABEL: <check_LXV_STXV>:
+# CHECK-D-NEXT:    pld 9
+# CHECK-D-NEXT:    lxv 34, 0(9)
+# CHECK-D-NEXT:    pld 9
+# CHECK-D-NEXT:    stxv 34, 0(9)
+# CHECK-D-NEXT:    blr
+check_LXV_STXV:
+  pld 9,useVal_vector at got@pcrel(0),1
+.Lpcrel17:
+  .reloc .Lpcrel17-8,R_PPC64_PCREL_OPT,.-(.Lpcrel17-8)
+  lxv 34,0(9)
+  pld 9,storeVal_vector at got@pcrel(0),1
+.Lpcrel18:
+  .reloc .Lpcrel18-8,R_PPC64_PCREL_OPT,.-(.Lpcrel18-8)
+  stxv 34,0(9)
+  blr
+
+# CHECK-S-LABEL: <check_LXSSP_STXSSP>:
+# CHECK-S-NEXT:    plxssp 1
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    pstxssp 1
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    blr
+
+# CHECK-D-LABEL: <check_LXSSP_STXSSP>:
+# CHECK-D-NEXT:    pld 9
+# CHECK-D-NEXT:    lxssp 1, 0(9)
+# CHECK-D-NEXT:    pld 9
+# CHECK-D-NEXT:    stxssp 1, 0(9)
+# CHECK-D-NEXT:    blr
+check_LXSSP_STXSSP:
+  pld 9,useVal_float at got@pcrel(0),1
+.Lpcrel19:
+  .reloc .Lpcrel19-8,R_PPC64_PCREL_OPT,.-(.Lpcrel19-8)
+  lxssp 1,0(9)
+  pld 9,storeVal_float at got@pcrel(0),1
+.Lpcrel20:
+  .reloc .Lpcrel20-8,R_PPC64_PCREL_OPT,.-(.Lpcrel20-8)
+  stxssp 1,0(9)
+  blr
+
+# CHECK-S-LABEL: <check_LXSD_STXSD>:
+# CHECK-S-NEXT:    plxsd 1, [[#ADDR1:]]
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    pstxsd 1, [[#ADDR2:]]
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    blr
+
+# CHECK-D-LABEL: <check_LXSD_STXSD>:
+# CHECK-D-NEXT:    pld 9
+# CHECK-D-NEXT:    lxsd 1, 0(9)
+# CHECK-D-NEXT:    pld 9
+# CHECK-D-NEXT:    stxsd 1, 0(9)
+# CHECK-D-NEXT:    blr
+check_LXSD_STXSD:
+  pld 9,useVal_double at got@pcrel(0),1
+.Lpcrel21:
+  .reloc .Lpcrel21-8,R_PPC64_PCREL_OPT,.-(.Lpcrel21-8)
+  lxsd 1,0(9)
+  pld 9,storeVal_double at got@pcrel(0),1
+.Lpcrel22:
+  .reloc .Lpcrel22-8,R_PPC64_PCREL_OPT,.-(.Lpcrel22-8)
+  stxsd 1,0(9)
+  blr
+
+# The respective displacements are computed relative to the PC which advanced
+# by 28 bytes in this function. Since the displacements in the two access
+# instructions are 8 and 32 so the displacements are those computed above minus
+# 20 and plus 4 (+8 - 28 and +32 - 28) respectively.
+# CHECK-S-LABEL: <check_LXSD_STXSD_aggr>:
+# CHECK-S-NEXT:    plxsd 1, [[#ADDR1-20]]
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    pstxsd 1, [[#ADDR2+4]]
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    blr
+
+# CHECK-D-LABEL: <check_LXSD_STXSD_aggr>:
+# CHECK-D-NEXT:    pld 9
+# CHECK-D-NEXT:    lxsd 1, 8(9)
+# CHECK-D-NEXT:    pld 9
+# CHECK-D-NEXT:    stxsd 1, 32(9)
+# CHECK-D-NEXT:    blr
+check_LXSD_STXSD_aggr:
+  pld 9,useVal_double at got@pcrel(0),1
+.Lpcrel23:
+  .reloc .Lpcrel23-8,R_PPC64_PCREL_OPT,.-(.Lpcrel23-8)
+  lxsd 1,8(9)
+  pld 9,storeVal_double at got@pcrel(0),1
+.Lpcrel24:
+  .reloc .Lpcrel24-8,R_PPC64_PCREL_OPT,.-(.Lpcrel24-8)
+  stxsd 1,32(9)
+  blr
+
+# This includes a nop but that is not emitted by the linker.
+# It is an alignment nop to prevent the prefixed instruction from
+# crossing a 64-byte boundary.
+# CHECK-S-LABEL: <check_LD_STD_W_PADDI>:
+# CHECK-S-NEXT:    paddi 9
+# CHECK-S-NEXT:    ld 3, 0(9)
+# CHECK-S-NEXT:    nop
+# CHECK-S-NEXT:    paddi 9
+# CHECK-S-NEXT:    std 3, 0(9)
+# CHECK-S-NEXT:    blr
+
+# CHECK-D-LABEL: <check_LD_STD_W_PADDI>:
+# CHECK-D-NEXT:    paddi 9
+# CHECK-D-NEXT:    ld 3, 0(9)
+# CHECK-D-NEXT:    nop
+# CHECK-D-NEXT:    paddi 9
+# CHECK-D-NEXT:    std 3, 0(9)
+# CHECK-D-NEXT:    blr
+check_LD_STD_W_PADDI:
+  paddi 9,0,useVal_longlong at got@pcrel,1
+.Lpcrel25:
+  .reloc .Lpcrel25-8,R_PPC64_PCREL_OPT,.-(.Lpcrel25-8)
+  ld 3,0(9)
+  paddi 9,0,storeVal_longlong at got@pcrel,1
+.Lpcrel26:
+  .reloc .Lpcrel26-8,R_PPC64_PCREL_OPT,.-(.Lpcrel26-8)
+  std 3,0(9)
+  blr
+# CHECK-S-LABEL: <check_LXSD_STXSD_aggr_notoc>:
+# CHECK-S-NEXT:    paddi 3, 0, -12, 1
+# CHECK-S-NEXT:    lwz 4, 8(3)
+# CHECK-S-NEXT:    paddi 3, 0, -24, 1
+# CHECK-S-NEXT:    stw 4, 32(3)
+# CHECK-S-NEXT:    blr
+
+# CHECK-D-LABEL: <check_LXSD_STXSD_aggr_notoc>:
+# CHECK-D-NEXT:    paddi 3, 0, -12, 1
+# CHECK-D-NEXT:    lwz 4, 8(3)
+# CHECK-D-NEXT:    paddi 3, 0, -24, 1
+# CHECK-D-NEXT:    stw 4, 32(3)
+# CHECK-D-NEXT:    blr
+.type	Arr, at object                     # @Arr
+.globl	Arr
+.p2align	2
+Arr:
+.long	11                              # 0xb
+.long	22                              # 0x16
+.long	33                              # 0x21
+check_LXSD_STXSD_aggr_notoc:
+  paddi 3, 0, Arr at PCREL, 1
+.Lpcrel27:
+  .reloc .Lpcrel27-8,R_PPC64_PCREL_OPT,.-(.Lpcrel27-8)
+  lwz 4,8(3)
+  paddi 3, 0, Arr at PCREL, 1
+.Lpcrel28:
+  .reloc .Lpcrel28-8,R_PPC64_PCREL_OPT,.-(.Lpcrel28-8)
+  stw 4,32(3)
+  blr
+


        


More information about the llvm-commits mailing list