[llvm-branch-commits] [lld] 6f8ad14 - [ELF][PPC32] Support canonical PLT

Fangrui Song via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sat Jan 25 17:57:40 PST 2020


Author: Fangrui Song
Date: 2020-01-25T17:57:26-08:00
New Revision: 6f8ad14521ec2983f2398bae32a717842e2cf313

URL: https://github.com/llvm/llvm-project/commit/6f8ad14521ec2983f2398bae32a717842e2cf313
DIFF: https://github.com/llvm/llvm-project/commit/6f8ad14521ec2983f2398bae32a717842e2cf313.diff

LOG: [ELF][PPC32] Support canonical PLT

-fno-pie produces a pair of non-GOT-non-PLT relocations R_PPC_ADDR16_{HA,LO} (R_ABS) referencing external
functions.

```
lis 3, func at ha
la 3, func at l(3)
```

In a -no-pie/-pie link, if func is not defined in the executable, a canonical PLT entry (st_value>0, st_shndx=0) will be needed.
References to func in shared objects will be resolved to this address.
-fno-pie -pie should fail with "can't create dynamic relocation ... against ...", so we just need to think about -no-pie.

On x86, the PLT entry passes the JMP_SLOT offset to the rtld PLT resolver.
On x86-64: the PLT entry passes the JUMP_SLOT index to the rtld PLT resolver.
On ARM/AArch64: the PLT entry passes &.got.plt[n]. The PLT header passes &.got.plt[fixed-index]. The rtld PLT resolver can compute the JUMP_SLOT index from the two addresses.

For these targets, the canonical PLT entry can just reuse the regular PLT entry (in PltSection).

On PPC32: PltSection (.glink) consists of `b PLTresolve` instructions and `PLTresolve`. The rtld PLT resolver depends on r11 having been set up to the .plt (GotPltSection) entry.
On PPC64 ELFv2: PltSection (.glink) consists of `__glink_PLTresolve` and `bl __glink_PLTresolve`. The rtld PLT resolver depends on r12 having been set up to the .plt (GotPltSection) entry.

We cannot reuse a `b PLTresolve`/`bl __glink_PLTresolve` in PltSection as a canonical PLT entry. PPC64 ELFv2 avoids the problem by using TOC for any external reference, even in non-pic code, so the canonical PLT entry scenario should not happen in the first place.
For PPC32, we have to create a PLT call stub as the canonical PLT entry. The code sequence sets up r11.

Reviewed By: Bdragon28

Differential Revision: https://reviews.llvm.org/D73399

(cherry picked from commit 837e8a9c0cd097034e023dfba146d17ce132998c)

Added: 
    lld/test/ELF/ppc32-canonical-plt.s

Modified: 
    lld/ELF/Arch/PPC.cpp
    lld/ELF/Relocations.cpp
    lld/ELF/SyntheticSections.cpp
    lld/ELF/SyntheticSections.h

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp
index 1d4e80184dcd..e2ad92de737e 100644
--- a/lld/ELF/Arch/PPC.cpp
+++ b/lld/ELF/Arch/PPC.cpp
@@ -67,6 +67,18 @@ static void writeFromHalf16(uint8_t *loc, uint32_t insn) {
 }
 
 void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) {
+  // Create canonical PLT entries for non-PIE code. Compilers don't generate
+  // non-GOT-non-PLT relocations referencing external functions for -fpie/-fPIE.
+  uint32_t glink = in.plt->getVA(); // VA of .glink
+  if (!config->isPic) {
+    for (const Symbol *sym : in.plt->entries)
+      if (sym->needsPltAddr) {
+        writePPC32PltCallStub(buf, sym->getGotPltVA(), nullptr, 0);
+        buf += 16;
+        glink += 16;
+      }
+  }
+
   // On PPC Secure PLT ABI, bl foo at plt jumps to a call stub, which loads an
   // absolute address from a specific .plt slot (usually called .got.plt on
   // other targets) and jumps there.
@@ -85,15 +97,14 @@ void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) {
   // computes the PLT index (by computing the distance from the landing b to
   // itself) and calls _dl_runtime_resolve() (in glibc).
   uint32_t got = in.got->getVA();
-  uint32_t glink = in.plt->getVA(); // VA of .glink
   const uint8_t *end = buf + 64;
   if (config->isPic) {
-    uint32_t afterBcl = in.plt->getSize() - target->pltHeaderSize + 12;
+    uint32_t afterBcl = 4 * in.plt->getNumEntries() + 12;
     uint32_t gotBcl = got + 4 - (glink + afterBcl);
     write32(buf + 0, 0x3d6b0000 | ha(afterBcl));  // addis r11,r11,1f-glink at ha
     write32(buf + 4, 0x7c0802a6);                 // mflr r0
     write32(buf + 8, 0x429f0005);                 // bcl 20,30,.+4
-    write32(buf + 12, 0x396b0000 | lo(afterBcl)); // 1: addi r11,r11,1b-.glink at l
+    write32(buf + 12, 0x396b0000 | lo(afterBcl)); // 1: addi r11,r11,1b-glink at l
     write32(buf + 16, 0x7d8802a6);                // mflr r12
     write32(buf + 20, 0x7c0803a6);                // mtlr r0
     write32(buf + 24, 0x7d6c5850);                // sub r11,r11,r12
@@ -113,16 +124,16 @@ void writePPC32GlinkSection(uint8_t *buf, size_t numEntries) {
     buf += 56;
   } else {
     write32(buf + 0, 0x3d800000 | ha(got + 4));   // lis     r12,GOT+4 at ha
-    write32(buf + 4, 0x3d6b0000 | ha(-glink));    // addis   r11,r11,-Glink at ha
+    write32(buf + 4, 0x3d6b0000 | ha(-glink));    // addis   r11,r11,-glink at ha
     if (ha(got + 4) == ha(got + 8))
       write32(buf + 8, 0x800c0000 | lo(got + 4)); // lwz r0,GOT+4 at l(r12)
     else
       write32(buf + 8, 0x840c0000 | lo(got + 4)); // lwzu r0,GOT+4 at l(r12)
-    write32(buf + 12, 0x396b0000 | lo(-glink));   // addi    r11,r11,-Glink at l
+    write32(buf + 12, 0x396b0000 | lo(-glink));   // addi    r11,r11,-glink at l
     write32(buf + 16, 0x7c0903a6);                // mtctr   r0
     write32(buf + 20, 0x7c0b5a14);                // add     r0,r11,r11
     if (ha(got + 4) == ha(got + 8))
-      write32(buf + 24, 0x818c0000 | lo(got + 8)); // lwz r12,GOT+8 at ha(r12)
+      write32(buf + 24, 0x818c0000 | lo(got + 8)); // lwz r12,GOT+8 at l(r12)
     else
       write32(buf + 24, 0x818c0000 | 4);          // lwz r12,4(r12)
     write32(buf + 28, 0x7d605a14);                // add     r11,r0,r11
@@ -146,7 +157,7 @@ PPC::PPC() {
   gotBaseSymInGotPlt = false;
   gotHeaderEntriesNum = 3;
   gotPltHeaderEntriesNum = 0;
-  pltHeaderSize = 64; // size of PLTresolve in .glink
+  pltHeaderSize = 0;
   pltEntrySize = 4;
   ipltEntrySize = 16;
 
@@ -178,7 +189,7 @@ void PPC::writeGotHeader(uint8_t *buf) const {
 
 void PPC::writeGotPlt(uint8_t *buf, const Symbol &s) const {
   // Address of the symbol resolver stub in .glink .
-  write32(buf, in.plt->getVA() + 4 * s.pltIndex);
+  write32(buf, in.plt->getVA() + in.plt->headerSize + 4 * s.pltIndex);
 }
 
 bool PPC::needsThunk(RelExpr expr, RelType type, const InputFile *file,

diff  --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index ced9991f2003..d826d5d0663d 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1198,10 +1198,16 @@ static void processRelocAux(InputSectionBase &sec, RelExpr expr, RelType type,
                     getLocation(sec, sym, offset));
       if (!sym.isInPlt())
         addPltEntry(in.plt, in.gotPlt, in.relaPlt, target->pltRel, sym);
-      if (!sym.isDefined())
+      if (!sym.isDefined()) {
         replaceWithDefined(
             sym, in.plt,
             target->pltHeaderSize + target->pltEntrySize * sym.pltIndex, 0);
+        if (config->emachine == EM_PPC) {
+          // PPC32 canonical PLT entries are at the beginning of .glink
+          cast<Defined>(sym).value = in.plt->headerSize;
+          in.plt->headerSize += 16;
+        }
+      }
       sym.needsPltAddr = true;
       sec.relocations.push_back({expr, type, offset, addend, &sym});
       return;

diff  --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 550a5b38b89b..21cbfac46468 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -2449,6 +2449,9 @@ PltSection::PltSection()
   if (config->emachine == EM_PPC || config->emachine == EM_PPC64) {
     name = ".glink";
     alignment = 4;
+    // PLTresolve is at the end.
+    if (config->emachine == EM_PPC)
+      footerSize = 64;
   }
 
   // On x86 when IBT is enabled, this section contains the second PLT (lazy
@@ -2486,7 +2489,7 @@ void PltSection::addEntry(Symbol &sym) {
 }
 
 size_t PltSection::getSize() const {
-  return headerSize + entries.size() * target->pltEntrySize;
+  return headerSize + entries.size() * target->pltEntrySize + footerSize;
 }
 
 bool PltSection::isNeeded() const {

diff  --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index f0a598dda51d..65f9aabdc13f 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -683,9 +683,9 @@ class PltSection : public SyntheticSection {
   void addEntry(Symbol &sym);
   size_t getNumEntries() const { return entries.size(); }
 
-  size_t headerSize = 0;
+  size_t headerSize;
+  size_t footerSize = 0;
 
-private:
   std::vector<const Symbol *> entries;
 };
 

diff  --git a/lld/test/ELF/ppc32-canonical-plt.s b/lld/test/ELF/ppc32-canonical-plt.s
new file mode 100644
index 000000000000..371aa7325b2d
--- /dev/null
+++ b/lld/test/ELF/ppc32-canonical-plt.s
@@ -0,0 +1,72 @@
+# REQUIRES: ppc
+
+## Test that we create canonical PLT entries for -no-pie.
+
+# RUN: llvm-mc -filetype=obj -triple=powerpc %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple=powerpc %p/Inputs/canonical-plt-pcrel.s -o %t1.o
+# RUN: ld.lld %t1.o -o %t1.so -shared -soname=so
+
+# RUN: ld.lld %t.o %t1.so -o %t
+# RUN: llvm-readobj -r %t | FileCheck --check-prefix=REL %s
+# RUN: llvm-readelf -S -s %t | FileCheck --check-prefix=SYM %s
+# RUN: llvm-readelf -x .plt %t | FileCheck --check-prefix=HEX %s
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
+
+# REL:      Relocations [
+# REL-NEXT:   .rela.plt {
+# REL-NEXT:     R_PPC_JMP_SLOT func 0x0
+# REL-NEXT:     R_PPC_JMP_SLOT ifunc 0x0
+# REL-NEXT:   }
+# REL-NEXT: ]
+
+# SYM: .glink PROGBITS 100101dc
+
+## st_value points to the canonical PLT entry in .glink
+# SYM: Symbol table '.dynsym'
+# SYM: 100101dc 0 FUNC GLOBAL DEFAULT UND func
+# SYM: 100101ec 0 FUNC GLOBAL DEFAULT UND ifunc
+# SYM: Symbol table '.symtab'
+# SYM: 100101dc 0 FUNC GLOBAL DEFAULT UND func
+# SYM: 100101ec 0 FUNC GLOBAL DEFAULT UND ifunc
+
+# HEX: 0x100302b4 100101fc 10010200
+
+## Canonical PLT entry of func.
+## 0x100101dc + 4*2 + 64 = 0x10010224
+## 0x1001021c = 65536*4099+692
+# CHECK:      100101dc .glink:
+# CHECK-NEXT:           lis 11, 4099
+# CHECK-NEXT:           lwz 11, 692(11)
+# CHECK-NEXT:           mtctr 11
+# CHECK-NEXT:           bctr
+
+## Canonical PLT entry of ifunc.
+## 0x10010220 = 65536*4099+696
+# CHECK-NEXT: 100101ec: lis 11, 4099
+# CHECK-NEXT:           lwz 11, 696(11)
+# CHECK-NEXT:           mtctr 11
+# CHECK-NEXT:           bctr
+
+## The 2 b instructions are referenced by .plt entries.
+# CHECK-NEXT: 100101fc: b .+8
+# CHECK-NEXT:           b .+4
+
+## PLTresolve of 64 bytes is at the end.
+## Operands of addis & addi: -0x100101fc = 65536*-4097-508
+# CHECK-NEXT:           lis 12, 0
+# CHECK-NEXT:           addis 11, 11, -4097
+# CHECK-NEXT:           lwz 0, 4(12)
+# CHECK-NEXT:           addi 11, 11, -508
+# CHECK-NEXT:           mtctr 0
+# CHECK-NEXT:           add 0, 11, 11
+# CHECK-NEXT:           lwz 12, 8(12)
+# CHECK-NEXT:           add 11, 0, 11
+# CHECK-NEXT:           bctr
+# CHECK-COUNT-7:        nop
+
+.globl _start
+_start:
+  lis 3, func at ha
+  la 3, func at l(3)
+  lis 4, ifunc at ha
+  la 4, ifunc at l(4)


        


More information about the llvm-branch-commits mailing list