[PATCH] D34689: [LLD][ELF] Pre-create ThunkSections at Target specific intervals

Rafael Avila de Espindola via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 14 11:49:20 PDT 2017


Why is this based on output sections?

If the idea is to have regularly spaced sections, it seems it should look
at executable PT_LOADs, find the offsets where the sections should be
and only then walk the input sections trying to figure out where to put
them.

If it is because it is easier to implement like this and a sufficiently
good heuristic, that is fine :-)

Cheers,
Rafael

Peter Smith via Phabricator <reviews at reviews.llvm.org> writes:

> peter.smith updated this revision to Diff 106435.
> peter.smith added a comment.
>
> Updated diff to return nullptr from ThunkSection::getTargetInputSection() when there are no Thunks.
>
>
> https://reviews.llvm.org/D34689
>
> Files:
>   ELF/Arch/ARM.cpp
>   ELF/Relocations.cpp
>   ELF/Relocations.h
>   ELF/SyntheticSections.cpp
>   ELF/Target.h
>   test/ELF/arm-thumb-thunk-symbols.s
>
> Index: test/ELF/arm-thumb-thunk-symbols.s
> ===================================================================
> --- test/ELF/arm-thumb-thunk-symbols.s
> +++ test/ELF/arm-thumb-thunk-symbols.s
> @@ -25,18 +25,18 @@
>   b thumb_fn
>  
>  // CHECK:     Name: __Thumbv7ABSLongThunk_arm_fn
> -// CHECK-NEXT:     Value: 0x11005
> +// CHECK-NEXT:     Value: 0x12005
>  // CHECK-NEXT:     Size: 10
>  // CHECK-NEXT:    Binding: Local (0x0)
>  // CHECK-NEXT:    Type: Function (0x2)
>  // CHECK:     Name: __ARMv7ABSLongThunk_thumb_fn
> -// CHECK-NEXT:     Value: 0x11010
> +// CHECK-NEXT:     Value: 0x12010
>  // CHECK-NEXT:     Size: 12
>  // CHECK-NEXT:    Binding: Local (0x0)
>  // CHECK-NEXT:    Type: Function (0x2)
>  
>  // CHECK-PI:     Name: __ThumbV7PILongThunk_arm_fn
> -// CHECK-PI-NEXT:     Value: 0x1005
> +// CHECK-PI-NEXT:     Value: 0x2005
>  // CHECK-PI-NEXT:     Size: 12
>  // CHECK-PI-NEXT:    Binding: Local (0x0)
>  // CHECK-PI-NEXT:    Type: Function (0x2)
> Index: ELF/Target.h
> ===================================================================
> --- ELF/Target.h
> +++ ELF/Target.h
> @@ -70,6 +70,13 @@
>    // end of .got
>    uint64_t GotBaseSymOff = 0;
>  
> +  // On systems with range extensions we place collections of Thunks at
> +  // regular spacings that enable the majority of branches reach the Thunks.
> +  uint32_t ThunkSectionSpacing = 0;
> +
> +  // An estimate of size of the Thunks that will be created per ThunkSection
> +  uint32_t ThunkSectionSize = 0;
> +
>    uint32_t CopyRel;
>    uint32_t GotRel;
>    uint32_t PltRel;
> Index: ELF/SyntheticSections.cpp
> ===================================================================
> --- ELF/SyntheticSections.cpp
> +++ ELF/SyntheticSections.cpp
> @@ -2325,6 +2325,8 @@
>  }
>  
>  InputSection *ThunkSection::getTargetInputSection() const {
> +  if (Thunks.empty())
> +    return nullptr;
>    const Thunk *T = Thunks.front();
>    return T->getTargetInputSection();
>  }
> Index: ELF/Relocations.h
> ===================================================================
> --- ELF/Relocations.h
> +++ ELF/Relocations.h
> @@ -133,14 +133,14 @@
>  
>  private:
>    void mergeThunks();
> -  ThunkSection *getOSThunkSec(OutputSection *OS,
> -                              std::vector<InputSection *> *ISR);
> +  ThunkSection *getISRThunkSec(OutputSection *OS,
> +                               std::vector<InputSection *> *ISR);
>    ThunkSection *getISThunkSec(InputSection *IS, OutputSection *OS);
> -  void forEachExecInputSection(
> +  void
> +  createInitialThunkSections(ArrayRef<OutputSectionCommand *> OutputSections);
> +  void forEachExecInputSectionRange(
>        ArrayRef<OutputSectionCommand *> OutputSections,
> -      std::function<void(OutputSection *, std::vector<InputSection *> *,
> -                         InputSection *)>
> -          Fn);
> +      std::function<void(OutputSection *, std::vector<InputSection *> *)> Fn);
>    std::pair<Thunk *, bool> getThunk(SymbolBody &Body, uint32_t Type);
>    ThunkSection *addThunkSection(OutputSection *OS,
>                                  std::vector<InputSection *> *, uint64_t Off);
> @@ -163,9 +163,6 @@
>    // passes
>    std::map<std::vector<InputSection *> *, std::vector<ThunkSection *>>
>        ThunkSections;
> -
> -  // The ThunkSection for this vector of InputSections
> -  ThunkSection *CurTS;
>  };
>  
>  // Return a int64_t to make sure we get the sign extension out of the way as
> Index: ELF/Relocations.cpp
> ===================================================================
> --- ELF/Relocations.cpp
> +++ ELF/Relocations.cpp
> @@ -973,6 +973,14 @@
>      std::vector<InputSection *> *ISR = KV.first;
>      std::vector<ThunkSection *> &Thunks = KV.second;
>  
> +    // // Remove ThunkSections that contain no Thunks
> +    Thunks.erase(
> +        llvm::remove_if(
> +            Thunks, [](const ThunkSection *TS) { return TS->getSize() == 0; }),
> +        Thunks.end());
> +    if (Thunks.empty())
> +      continue;
> +
>      // Order Thunks in ascending OutSecOff
>      auto ThunkCmp = [](const ThunkSection *A, const ThunkSection *B) {
>        return A->OutSecOff < B->OutSecOff;
> @@ -1000,18 +1008,22 @@
>    }
>  }
>  
> -ThunkSection *ThunkCreator::getOSThunkSec(OutputSection *OS,
> -                                          std::vector<InputSection *> *ISR) {
> -  if (CurTS == nullptr) {
> -    uint32_t Off = 0;
> -    for (auto *IS : OS->Sections) {
> -      Off = IS->OutSecOff + IS->getSize();
> -      if ((IS->Flags & SHF_EXECINSTR) == 0)
> -        break;
> -    }
> -    CurTS = addThunkSection(OS, ISR, Off);
> -  }
> -  return CurTS;
> +// Find or create a ThunkSection within the InputSectionRange (ISR) that is in
> +// range of Src. An ISR maps to a range of InputSections described by a
> +// linker script section pattern such as { .text .text.* }.
> +// FIXME: At present we assume that all ThunkSections are in range so we always
> +// return the first pre-created ThunkSection.
> +ThunkSection *ThunkCreator::getISRThunkSec(OutputSection *OS,
> +                                           std::vector<InputSection *> *ISR) {
> +  // FIXME: When range extension thunks are supported we will need to check
> +  // that the ThunkSection is in range of the caller
> +  if (!ThunkSections[ISR].empty())
> +    return ThunkSections[ISR].front();
> +
> +  // FIXME: When range extension thunks are supported we must handle the case
> +  // where no pre-created ThunkSections are in range by creating a new one in
> +  // range for now it is unreachable
> +  llvm_unreachable("Must have created at least one ThunkSection per ISR");
>  }
>  
>  ThunkSection *ThunkCreator::getISThunkSec(InputSection *IS, OutputSection *OS) {
> @@ -1024,7 +1036,7 @@
>    OutputSectionCommand *C = Script->getCmd(TOS);
>    std::vector<InputSection *> *Range = nullptr;
>    for (BaseCommand *BC : C->Commands)
> -    if (auto *ISD = dyn_cast<InputSectionDescription> (BC)) {
> +    if (auto *ISD = dyn_cast<InputSectionDescription>(BC)) {
>        InputSection *first = ISD->Sections.front();
>        InputSection *last = ISD->Sections.back();
>        if (IS->OutSecOff >= first->OutSecOff &&
> @@ -1038,15 +1050,60 @@
>    return TS;
>  }
>  
> +// Create one or more ThunkSections per OS that can be used to place Thunks.
> +// We attempt to place the ThunkSections using the following desirable
> +// properties:
> +// - Within range of the maximum number of callers
> +// - Minimise the number of ThunkSections
> +//
> +// We follow a simple but conservative heuristic to place ThunkSections at
> +// offsets that are multiples of a Target specific branch range.
> +// For an InputSectionRange that is smaller than the range then a single
> +// ThunkSection at the end of the range will do.
> +void ThunkCreator::createInitialThunkSections(
> +    ArrayRef<OutputSectionCommand *> OutputSections) {
> +  bool NeedTrailingTS;
> +  uint32_t Off;
> +  uint32_t Limit;
> +  InputSection *PrevIS = nullptr;
> +  std::vector<InputSection *> *PrevISR = nullptr;
> +
> +  forEachExecInputSectionRange(
> +      OutputSections, [&](OutputSection *OS, std::vector<InputSection *> *ISR) {
> +        for (InputSection *IS : *ISR) {
> +          if (ISR != PrevISR) {
> +            NeedTrailingTS = true;
> +            Off = 0;
> +            Limit = IS->OutSecOff +
> +                    (Target->ThunkSectionSpacing - Target->ThunkSectionSize);
> +            PrevIS = nullptr;
> +            PrevISR = ISR;
> +          }
> +          Off = IS->OutSecOff + IS->getSize();
> +          if (Off >= Limit) {
> +            uint32_t ThunkOff = (PrevIS == nullptr)
> +                                    ? IS->OutSecOff
> +                                    : PrevIS->OutSecOff + PrevIS->getSize();
> +            addThunkSection(OS, ISR, ThunkOff);
> +            NeedTrailingTS = false;
> +            Limit = ThunkOff + Target->ThunkSectionSpacing;
> +          }
> +          PrevIS = IS;
> +
> +          if (ISR->back() == IS && NeedTrailingTS)
> +            addThunkSection(OS, ISR, Off);
> +        }
> +      });
> +}
> +
>  ThunkSection *ThunkCreator::addThunkSection(OutputSection *OS,
>                                              std::vector<InputSection *> *ISR,
>                                              uint64_t Off) {
>    auto *TS = make<ThunkSection>(OS, Off);
>    ThunkSections[ISR].push_back(TS);
>    return TS;
>  }
>  
> -
>  std::pair<Thunk *, bool> ThunkCreator::getThunk(SymbolBody &Body,
>                                                  uint32_t Type) {
>    auto Res = ThunkedSymbols.insert({&Body, std::vector<Thunk *>()});
> @@ -1062,22 +1119,18 @@
>    return std::make_pair(T, true);
>  }
>  
> -// Call Fn on every executable InputSection accessed via the linker script
> -// InputSectionDescription::Sections.
> -void ThunkCreator::forEachExecInputSection(
> +// Call Fn on every executable Range of InputSections accessed via the linker
> +// script InputSectionDescription::Sections.
> +void ThunkCreator::forEachExecInputSectionRange(
>      ArrayRef<OutputSectionCommand *> OutputSections,
> -    std::function<void(OutputSection *, std::vector<InputSection *> *,
> -                       InputSection *)>
> -        Fn) {
> +    std::function<void(OutputSection *, std::vector<InputSection *> *)> Fn) {
>    for (OutputSectionCommand *Cmd : OutputSections) {
>      OutputSection *OS = Cmd->Sec;
>      if (!(OS->Flags & SHF_ALLOC) || !(OS->Flags & SHF_EXECINSTR))
>        continue;
>      for (BaseCommand *BC : Cmd->Commands)
>        if (auto *ISD = dyn_cast<InputSectionDescription>(BC)) {
> -        CurTS = nullptr;
> -        for (InputSection *IS : ISD->Sections)
> -          Fn(OS, &ISD->Sections, IS);
> +        Fn(OS, &ISD->Sections);
>        }
>    }
>  }
> @@ -1096,38 +1149,40 @@
>      ArrayRef<OutputSectionCommand *> OutputSections) {
>    if (Pass > 0)
>      ThunkSections.clear();
> +  else if (Target->ThunkSectionSpacing)
> +    createInitialThunkSections(OutputSections);
>  
>    // Create all the Thunks and insert them into synthetic ThunkSections. The
>    // ThunkSections are later inserted back into the OutputSection.
>  
>    // We separate the creation of ThunkSections from the insertion of the
>    // ThunkSections back into the OutputSection as ThunkSections are not always
>    // inserted into the same OutputSection as the caller.
> -  forEachExecInputSection(
> -      OutputSections, [&](OutputSection *OS,  std::vector<InputSection*> *ISR,
> -                          InputSection *IS) {
> -        for (Relocation &Rel : IS->Relocations) {
> -          SymbolBody &Body = *Rel.Sym;
> -          if (Thunks.find(&Body) != Thunks.end() ||
> -              !Target->needsThunk(Rel.Expr, Rel.Type, IS->File, Body))
> -            continue;
> -          Thunk *T;
> -          bool IsNew;
> -          std::tie(T, IsNew) = getThunk(Body, Rel.Type);
> -          if (IsNew) {
> -            // Find or create a ThunkSection for the new Thunk
> -            ThunkSection *TS;
> -            if (auto *TIS = T->getTargetInputSection())
> -              TS = getISThunkSec(TIS, OS);
> -            else
> -              TS = getOSThunkSec(OS, ISR);
> -            TS->addThunk(T);
> -            Thunks[T->ThunkSym] = T;
> +  forEachExecInputSectionRange(
> +      OutputSections, [&](OutputSection *OS, std::vector<InputSection *> *ISR) {
> +        for (InputSection *IS : *ISR)
> +          for (Relocation &Rel : IS->Relocations) {
> +            SymbolBody &Body = *Rel.Sym;
> +            if (Thunks.find(&Body) != Thunks.end() ||
> +                !Target->needsThunk(Rel.Expr, Rel.Type, IS->File, Body))
> +              continue;
> +            Thunk *T;
> +            bool IsNew;
> +            std::tie(T, IsNew) = getThunk(Body, Rel.Type);
> +            if (IsNew) {
> +              // Find or create a ThunkSection for the new Thunk
> +              ThunkSection *TS;
> +              if (auto *TIS = T->getTargetInputSection())
> +                TS = getISThunkSec(TIS, OS);
> +              else
> +                TS = getISRThunkSec(OS, ISR);
> +              TS->addThunk(T);
> +              Thunks[T->ThunkSym] = T;
> +            }
> +            // Redirect relocation to Thunk, we never go via the PLT to a Thunk
> +            Rel.Sym = T->ThunkSym;
> +            Rel.Expr = fromPlt(Rel.Expr);
>            }
> -          // Redirect relocation to Thunk, we never go via the PLT to a Thunk
> -          Rel.Sym = T->ThunkSym;
> -          Rel.Expr = fromPlt(Rel.Expr);
> -        }
>        });
>    // Merge all created synthetic ThunkSections back into OutputSection
>    mergeThunks();
> Index: ELF/Arch/ARM.cpp
> ===================================================================
> --- ELF/Arch/ARM.cpp
> +++ ELF/Arch/ARM.cpp
> @@ -61,6 +61,22 @@
>    // ARM uses Variant 1 TLS
>    TcbSize = 8;
>    NeedsThunks = true;
> +  // Pre-created ThunkSections are spaced roughly 16Mb apart on ARM. This is to
> +  // match the most common expected case of a Thumb 2 encoded BL, BLX or B.W
> +  // ARM B, BL, BLX range 32MiB
> +  // Thumb B.W, BL, BLX range 16MiB
> +  // Thumb B<cc>.W range 1MiB
> +  // If branch cannot reach a pre-created ThunkSection a new one will be created
> +  // so we can handle the rare case of Thumb 2 conditional branch.
> +  // FIXME: lld assumes a CPU with support for ARMv6T2 and above encodings.
> +  // If support is added for ARMv6T2 then when in use this spacing should drop
> +  // to 4MiB
> +  ThunkSectionSpacing = 0x1000000;
> +  // The pre-created ThunkSections are inserted such that the end of the
> +  // precreated ThunkSection is almost certain to be within range a branch
> +  // from the start of the Section, or immediately following the previous
> +  // ThunkSection. Allow for 16384 12 byte Thunks per ThunkSectionSpacing
> +  ThunkSectionSize = 0x30000;
>  }
>  
>  RelExpr ARM::getRelExpr(uint32_t Type, const SymbolBody &S,


More information about the llvm-commits mailing list