[lld] d224175 - [PowerPC][LLD] Extend R2 save stub to support offsets of more than 26 bits

Stefan Pintilie via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 25 04:39:20 PDT 2020


Author: Stefan Pintilie
Date: 2020-09-25T06:39:14-05:00
New Revision: d224175230d1ab232cfdf71f9da63a732f405c91

URL: https://github.com/llvm/llvm-project/commit/d224175230d1ab232cfdf71f9da63a732f405c91
DIFF: https://github.com/llvm/llvm-project/commit/d224175230d1ab232cfdf71f9da63a732f405c91.diff

LOG: [PowerPC][LLD] Extend R2 save stub to support offsets of more than 26 bits

The R2 save stub will now support offsets up to 64 bits.

There are three cases that will be used.
1) The offset fits in 26 bits.
```
b <26 bit offset>
```
2) The offset does not fit in 26 bits but fits in 34 bits.
```
paddi r12, 0, <34 bit offset>, 1
mtctr r12
bctr
```
3) The offset does not fit in 34 bits. Since this is an R2 save stub we can use
the TOC in R2. We are not loading the offset but the actual address we want to
branch to.
```
addis r12, r2, <address in TOC lo>
ld r12 <address in TOC hi>(r12)
mtctr r12
bctr
```

In case 1) the stub is only 8 bytes while in cases 2) and 3) the stub will be
20 bytes.

Reviewed By: MaskRay, sfertile, NeHuang

Differential Revision: https://reviews.llvm.org/D87916

Added: 
    

Modified: 
    lld/ELF/Thunks.cpp
    lld/test/ELF/ppc64-toc-call-to-pcrel-long-jump.s

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp
index eada4254a3c2..6ac895051389 100644
--- a/lld/ELF/Thunks.cpp
+++ b/lld/ELF/Thunks.cpp
@@ -289,10 +289,33 @@ class PPC64PltCallStub final : public Thunk {
 // 2) Tail calls the callee.
 class PPC64R2SaveStub final : public Thunk {
 public:
-  PPC64R2SaveStub(Symbol &dest) : Thunk(dest, 0) {}
-  uint32_t size() override { return 8; }
+  PPC64R2SaveStub(Symbol &dest, int64_t addend) : Thunk(dest, addend) {
+    alignment = 16;
+  }
+
+  // To prevent oscillations in layout when moving from short to long thunks
+  // we make sure that once a thunk has been set to long it cannot go back.
+  bool getMayUseShortThunk() {
+    if (!mayUseShortThunk)
+      return false;
+    if (!isInt<26>(computeOffset())) {
+      mayUseShortThunk = false;
+      return false;
+    }
+    return true;
+  }
+  uint32_t size() override { return getMayUseShortThunk() ? 8 : 20; }
   void writeTo(uint8_t *buf) override;
   void addSymbols(ThunkSection &isec) override;
+
+private:
+  // Transitioning from long to short can create layout oscillations in
+  // certain corner cases which would prevent the layout from converging.
+  // This is similar to the handling for ARMThunk.
+  bool mayUseShortThunk = true;
+  int64_t computeOffset() const {
+    return destination.getVA() - (getThunkTargetSym()->getVA() + 4);
+  }
 };
 
 // PPC64 R12 Setup Stub
@@ -893,12 +916,25 @@ bool PPC64PltCallStub::isCompatibleWith(const InputSection &isec,
 }
 
 void PPC64R2SaveStub::writeTo(uint8_t *buf) {
-  int64_t offset = destination.getVA() - (getThunkTargetSym()->getVA() + 4);
-  // The branch offset needs to fit in 26 bits.
-  if (!isInt<26>(offset))
-    reportRangeError(buf, offset, 26, destination, "R2 save stub offset");
+  const int64_t offset = computeOffset();
   write32(buf + 0, 0xf8410018);                         // std  r2,24(r1)
-  write32(buf + 4, 0x48000000 | (offset & 0x03fffffc)); // b    <offset>
+  // The branch offset needs to fit in 26 bits.
+  if (getMayUseShortThunk()) {
+    write32(buf + 4, 0x48000000 | (offset & 0x03fffffc)); // b    <offset>
+  } else if (isInt<34>(offset)) {
+    const uint64_t paddi = PADDI_R12_NO_DISP |
+                           (((offset >> 16) & 0x3ffff) << 32) |
+                           (offset & 0xffff);
+    writePrefixedInstruction(buf + 4, paddi); // paddi r12, 0, func at pcrel, 1
+    write32(buf + 12, MTCTR_R12);             // mtctr r12
+    write32(buf + 16, BCTR);                  // bctr
+  } else {
+    in.ppc64LongBranchTarget->addEntry(&destination, addend);
+    const int64_t offsetFromTOC =
+        in.ppc64LongBranchTarget->getEntryVA(&destination, addend) -
+        getPPC64TocBase();
+    writePPC64LoadAndBranch(buf + 4, offsetFromTOC);
+  }
 }
 
 void PPC64R2SaveStub::addSymbols(ThunkSection &isec) {
@@ -1109,7 +1145,7 @@ static Thunk *addThunkPPC64(RelType type, Symbol &s, int64_t a) {
   // then the callee clobbers the TOC and we need an R2 save stub when RelType
   // is R_PPC64_REL14 or R_PPC64_REL24.
   if ((type == R_PPC64_REL14 || type == R_PPC64_REL24) && (s.stOther >> 5) == 1)
-    return make<PPC64R2SaveStub>(s);
+    return make<PPC64R2SaveStub>(s, a);
 
   if (type == R_PPC64_REL24_NOTOC)
     return (s.stOther >> 5) > 1

diff  --git a/lld/test/ELF/ppc64-toc-call-to-pcrel-long-jump.s b/lld/test/ELF/ppc64-toc-call-to-pcrel-long-jump.s
index 4175ba313108..5f03ab13dfb4 100644
--- a/lld/test/ELF/ppc64-toc-call-to-pcrel-long-jump.s
+++ b/lld/test/ELF/ppc64-toc-call-to-pcrel-long-jump.s
@@ -1,36 +1,111 @@
 # REQUIRES: ppc
-# RUN: echo 'SECTIONS { \
-# RUN:       .text_callee 0x10010000 : { *(.text_callee) } \
-# RUN:       .text_caller 0x20020000 : { *(.text_caller) } \
-# RUN:       }' > %t.script
+# RUN: split-file %s %t
 
-# RUN: llvm-mc -filetype=obj -triple=powerpc64le %s -o %t.o
-# RUN: not ld.lld -T %t.script %t.o -o /dev/null 2>&1 | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/asm -o %t.o
+# RUN: ld.lld -T %t/lts %t.o -o %t_le
+# RUN: llvm-objdump --mcpu=pwr10 --no-show-raw-insn -d %t_le | FileCheck %s
+# RUN: llvm-readelf -s %t_le | FileCheck %s --check-prefix=SYM
 
-# RUN: llvm-mc -filetype=obj -triple=powerpc64 %s -o %t.o
-# RUN: not ld.lld -T %t.script %t.o -o /dev/null 2>&1 | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple=powerpc64 %t/asm -o %t.o
+# RUN: ld.lld -T %t/lts %t.o -o %t_be
+# RUN: llvm-objdump --mcpu=pwr10 --no-show-raw-insn -d %t_be | FileCheck %s
+# RUN: llvm-readelf -s %t_be | FileCheck %s --check-prefix=SYM
 
-# CHECK:      error: R2 save stub offset is out of range: -268501028 is not in [-33554432, 33554431]; references callee
-# CHECK-NEXT: >>> defined in {{.*}}.o
+# SYM:      Symbol table '.symtab' contains 9 entries:
+# SYM:      1: 0000000010010000     0 NOTYPE  LOCAL  DEFAULT [<other: 0x20>]   1 callee
+# SYM-NEXT: 2: 0000000020020008     0 NOTYPE  LOCAL  DEFAULT [<other: 0x60>]   3 caller
+# SYM-NEXT: 3: 0000000010020008     0 NOTYPE  LOCAL  DEFAULT                  2 caller_close
+# SYM-NEXT: 4: 0000000520020008     0 NOTYPE  LOCAL  DEFAULT                  4 caller_far
+# SYM-NEXT: 5: 0000000520028038     0 NOTYPE  LOCAL  HIDDEN                   6 .TOC.
+# SYM-NEXT: 6: 0000000010020020     8 FUNC    LOCAL  DEFAULT                  2 __toc_save_callee
+# SYM-NEXT: 7: 0000000020020020    20 FUNC    LOCAL  DEFAULT                  3 __toc_save_callee
+# SYM-NEXT: 8: 0000000520020020    20 FUNC    LOCAL  DEFAULT                  4 __toc_save_callee
 
-# RUN: ld.lld -T %t.script %t.o -o /dev/null --noinhibit-exec
+#--- lts
+PHDRS {
+  callee PT_LOAD FLAGS(0x1 | 0x4);
+  close PT_LOAD FLAGS(0x1 | 0x4);
+  caller PT_LOAD FLAGS(0x1 | 0x4);
+  far PT_LOAD FLAGS(0x1 | 0x4);
+}
+SECTIONS {
+  .text_callee 0x10010000 : { *(.text_callee) } :callee
+  .text_caller_close 0x10020000 : { *(.text_caller_close) } :close
+  .text_caller 0x20020000 : { *(.text_caller) } :caller
+  .text_caller_far 0x520020000 : { *(.text_caller_far) } :far
+}
 
+#--- asm
+# CHECK-LABEL: <callee>:
+# CHECK:         blr
 .section .text_callee, "ax", %progbits
 callee:
   .localentry callee, 1
   blr
 
-.section .text_caller, "ax", %progbits
-caller:
+# CHECK-LABEL: <caller_close>:
+# CHECK:         bl 0x10020020
+# CHECK-NEXT:    ld 2, 24(1)
+# CHECK-NEXT:    blr
+# CHECK-LABEL: <__toc_save_callee>:
+# CHECK:         std 2, 24(1)
+# CHECK-NEXT:    b 0x10010000
+.section .text_caller_close, "ax", %progbits
+.Lfunc_toc1:
+  .quad .TOC.-.Lfunc_gep1
+caller_close:
 .Lfunc_gep1:
-  addis 2, 12, .TOC.-.Lfunc_gep1 at ha
-  addi 2, 2, .TOC.-.Lfunc_gep1 at l
+  ld 2, .Lfunc_toc1-.Lfunc_gep1(12)
+  add 2, 2, 12
 .Lfunc_lep1:
   .localentry caller, .Lfunc_lep1-.Lfunc_gep1
-  addis 30, 2, global at toc@ha
-  lwz 3, global at toc@l(30)
   bl callee
   nop
   blr
-global:
-  .long	0
+
+# CHECK-LABEL: <caller>:
+# CHECK:         bl 0x20020020
+# CHECK-NEXT:    ld 2, 24(1)
+# CHECK-NEXT:    blr
+# CHECK-LABEL: <__toc_save_callee>:
+# CHECK:         std 2, 24(1)
+# CHECK-NEXT:    paddi 12, 0, -268501028, 1
+# CHECK-NEXT:    mtctr 12
+# CHECK-NEXT:    bctr
+.section .text_caller, "ax", %progbits
+.Lfunc_toc2:
+  .quad .TOC.-.Lfunc_gep2
+caller:
+.Lfunc_gep2:
+  ld 2, .Lfunc_toc2-.Lfunc_gep2(12)
+  add 2, 2, 12
+.Lfunc_lep2:
+  .localentry caller, .Lfunc_lep2-.Lfunc_gep2
+  bl callee
+  nop
+  blr
+
+# CHECK-LABEL: <caller_far>:
+# CHECK:         ld 2, -8(12)
+# CHECK-NEXT:    add 2, 2, 12
+# CHECK-NEXT:    bl 0x520020020
+# CHECK-NEXT:    ld 2, 24(1)
+# CHECK-NEXT:    blr
+# CHECK-LABEL: <__toc_save_callee>:
+# CHECK:         std 2, 24(1)
+# CHECK-NEXT:    addis 12, 2, 0
+# CHECK-NEXT:    ld 12, -32760(12)
+# CHECK-NEXT:    mtctr 12
+# CHECK-NEXT:    bctr
+.section .text_caller_far, "ax", %progbits
+.Lfunc_toc3:
+  .quad .TOC.-.Lfunc_gep3
+caller_far:
+.Lfunc_gep3:
+  ld 2, .Lfunc_toc3-.Lfunc_gep3(12)
+  add 2, 2, 12
+.Lfunc_lep3:
+  .localentry caller, .Lfunc_lep3-.Lfunc_gep3
+  bl callee
+  nop
+  blr


        


More information about the llvm-commits mailing list