[PATCH] D34689: [LLD][ELF] Pre-create ThunkSections at Target specific intervals

Peter Smith via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 17 04:09:50 PDT 2017


It is pretty much as you suggest, this is simple to implement and
works well enough. My suggestion is to start simple and if there is a
need for a more optimal placement then add it later.

On 14 July 2017 at 19:49, Rafael Avila de Espindola
<rafael.espindola at gmail.com> wrote:
> Why is this based on output sections?
>
> If the idea is to have regularly spaced sections, it seems it should look
> at executable PT_LOADs, find the offsets where the sections should be
> and only then walk the input sections trying to figure out where to put
> them.
>
> If it is because it is easier to implement like this and a sufficiently
> good heuristic, that is fine :-)
>
> Cheers,
> Rafael
>
> Peter Smith via Phabricator <reviews at reviews.llvm.org> writes:
>
>> peter.smith updated this revision to Diff 106435.
>> peter.smith added a comment.
>>
>> Updated diff to return nullptr from ThunkSection::getTargetInputSection() when there are no Thunks.
>>
>>
>> https://reviews.llvm.org/D34689
>>
>> Files:
>>   ELF/Arch/ARM.cpp
>>   ELF/Relocations.cpp
>>   ELF/Relocations.h
>>   ELF/SyntheticSections.cpp
>>   ELF/Target.h
>>   test/ELF/arm-thumb-thunk-symbols.s
>>
>> Index: test/ELF/arm-thumb-thunk-symbols.s
>> ===================================================================
>> --- test/ELF/arm-thumb-thunk-symbols.s
>> +++ test/ELF/arm-thumb-thunk-symbols.s
>> @@ -25,18 +25,18 @@
>>   b thumb_fn
>>
>>  // CHECK:     Name: __Thumbv7ABSLongThunk_arm_fn
>> -// CHECK-NEXT:     Value: 0x11005
>> +// CHECK-NEXT:     Value: 0x12005
>>  // CHECK-NEXT:     Size: 10
>>  // CHECK-NEXT:    Binding: Local (0x0)
>>  // CHECK-NEXT:    Type: Function (0x2)
>>  // CHECK:     Name: __ARMv7ABSLongThunk_thumb_fn
>> -// CHECK-NEXT:     Value: 0x11010
>> +// CHECK-NEXT:     Value: 0x12010
>>  // CHECK-NEXT:     Size: 12
>>  // CHECK-NEXT:    Binding: Local (0x0)
>>  // CHECK-NEXT:    Type: Function (0x2)
>>
>>  // CHECK-PI:     Name: __ThumbV7PILongThunk_arm_fn
>> -// CHECK-PI-NEXT:     Value: 0x1005
>> +// CHECK-PI-NEXT:     Value: 0x2005
>>  // CHECK-PI-NEXT:     Size: 12
>>  // CHECK-PI-NEXT:    Binding: Local (0x0)
>>  // CHECK-PI-NEXT:    Type: Function (0x2)
>> Index: ELF/Target.h
>> ===================================================================
>> --- ELF/Target.h
>> +++ ELF/Target.h
>> @@ -70,6 +70,13 @@
>>    // end of .got
>>    uint64_t GotBaseSymOff = 0;
>>
>> +  // On systems with range extensions we place collections of Thunks at
>> +  // regular spacings that enable the majority of branches reach the Thunks.
>> +  uint32_t ThunkSectionSpacing = 0;
>> +
>> +  // An estimate of size of the Thunks that will be created per ThunkSection
>> +  uint32_t ThunkSectionSize = 0;
>> +
>>    uint32_t CopyRel;
>>    uint32_t GotRel;
>>    uint32_t PltRel;
>> Index: ELF/SyntheticSections.cpp
>> ===================================================================
>> --- ELF/SyntheticSections.cpp
>> +++ ELF/SyntheticSections.cpp
>> @@ -2325,6 +2325,8 @@
>>  }
>>
>>  InputSection *ThunkSection::getTargetInputSection() const {
>> +  if (Thunks.empty())
>> +    return nullptr;
>>    const Thunk *T = Thunks.front();
>>    return T->getTargetInputSection();
>>  }
>> Index: ELF/Relocations.h
>> ===================================================================
>> --- ELF/Relocations.h
>> +++ ELF/Relocations.h
>> @@ -133,14 +133,14 @@
>>
>>  private:
>>    void mergeThunks();
>> -  ThunkSection *getOSThunkSec(OutputSection *OS,
>> -                              std::vector<InputSection *> *ISR);
>> +  ThunkSection *getISRThunkSec(OutputSection *OS,
>> +                               std::vector<InputSection *> *ISR);
>>    ThunkSection *getISThunkSec(InputSection *IS, OutputSection *OS);
>> -  void forEachExecInputSection(
>> +  void
>> +  createInitialThunkSections(ArrayRef<OutputSectionCommand *> OutputSections);
>> +  void forEachExecInputSectionRange(
>>        ArrayRef<OutputSectionCommand *> OutputSections,
>> -      std::function<void(OutputSection *, std::vector<InputSection *> *,
>> -                         InputSection *)>
>> -          Fn);
>> +      std::function<void(OutputSection *, std::vector<InputSection *> *)> Fn);
>>    std::pair<Thunk *, bool> getThunk(SymbolBody &Body, uint32_t Type);
>>    ThunkSection *addThunkSection(OutputSection *OS,
>>                                  std::vector<InputSection *> *, uint64_t Off);
>> @@ -163,9 +163,6 @@
>>    // passes
>>    std::map<std::vector<InputSection *> *, std::vector<ThunkSection *>>
>>        ThunkSections;
>> -
>> -  // The ThunkSection for this vector of InputSections
>> -  ThunkSection *CurTS;
>>  };
>>
>>  // Return a int64_t to make sure we get the sign extension out of the way as
>> Index: ELF/Relocations.cpp
>> ===================================================================
>> --- ELF/Relocations.cpp
>> +++ ELF/Relocations.cpp
>> @@ -973,6 +973,14 @@
>>      std::vector<InputSection *> *ISR = KV.first;
>>      std::vector<ThunkSection *> &Thunks = KV.second;
>>
>> +    // // Remove ThunkSections that contain no Thunks
>> +    Thunks.erase(
>> +        llvm::remove_if(
>> +            Thunks, [](const ThunkSection *TS) { return TS->getSize() == 0; }),
>> +        Thunks.end());
>> +    if (Thunks.empty())
>> +      continue;
>> +
>>      // Order Thunks in ascending OutSecOff
>>      auto ThunkCmp = [](const ThunkSection *A, const ThunkSection *B) {
>>        return A->OutSecOff < B->OutSecOff;
>> @@ -1000,18 +1008,22 @@
>>    }
>>  }
>>
>> -ThunkSection *ThunkCreator::getOSThunkSec(OutputSection *OS,
>> -                                          std::vector<InputSection *> *ISR) {
>> -  if (CurTS == nullptr) {
>> -    uint32_t Off = 0;
>> -    for (auto *IS : OS->Sections) {
>> -      Off = IS->OutSecOff + IS->getSize();
>> -      if ((IS->Flags & SHF_EXECINSTR) == 0)
>> -        break;
>> -    }
>> -    CurTS = addThunkSection(OS, ISR, Off);
>> -  }
>> -  return CurTS;
>> +// Find or create a ThunkSection within the InputSectionRange (ISR) that is in
>> +// range of Src. An ISR maps to a range of InputSections described by a
>> +// linker script section pattern such as { .text .text.* }.
>> +// FIXME: At present we assume that all ThunkSections are in range so we always
>> +// return the first pre-created ThunkSection.
>> +ThunkSection *ThunkCreator::getISRThunkSec(OutputSection *OS,
>> +                                           std::vector<InputSection *> *ISR) {
>> +  // FIXME: When range extension thunks are supported we will need to check
>> +  // that the ThunkSection is in range of the caller
>> +  if (!ThunkSections[ISR].empty())
>> +    return ThunkSections[ISR].front();
>> +
>> +  // FIXME: When range extension thunks are supported we must handle the case
>> +  // where no pre-created ThunkSections are in range by creating a new one in
>> +  // range for now it is unreachable
>> +  llvm_unreachable("Must have created at least one ThunkSection per ISR");
>>  }
>>
>>  ThunkSection *ThunkCreator::getISThunkSec(InputSection *IS, OutputSection *OS) {
>> @@ -1024,7 +1036,7 @@
>>    OutputSectionCommand *C = Script->getCmd(TOS);
>>    std::vector<InputSection *> *Range = nullptr;
>>    for (BaseCommand *BC : C->Commands)
>> -    if (auto *ISD = dyn_cast<InputSectionDescription> (BC)) {
>> +    if (auto *ISD = dyn_cast<InputSectionDescription>(BC)) {
>>        InputSection *first = ISD->Sections.front();
>>        InputSection *last = ISD->Sections.back();
>>        if (IS->OutSecOff >= first->OutSecOff &&
>> @@ -1038,15 +1050,60 @@
>>    return TS;
>>  }
>>
>> +// Create one or more ThunkSections per OS that can be used to place Thunks.
>> +// We attempt to place the ThunkSections using the following desirable
>> +// properties:
>> +// - Within range of the maximum number of callers
>> +// - Minimise the number of ThunkSections
>> +//
>> +// We follow a simple but conservative heuristic to place ThunkSections at
>> +// offsets that are multiples of a Target specific branch range.
>> +// For an InputSectionRange that is smaller than the range then a single
>> +// ThunkSection at the end of the range will do.
>> +void ThunkCreator::createInitialThunkSections(
>> +    ArrayRef<OutputSectionCommand *> OutputSections) {
>> +  bool NeedTrailingTS;
>> +  uint32_t Off;
>> +  uint32_t Limit;
>> +  InputSection *PrevIS = nullptr;
>> +  std::vector<InputSection *> *PrevISR = nullptr;
>> +
>> +  forEachExecInputSectionRange(
>> +      OutputSections, [&](OutputSection *OS, std::vector<InputSection *> *ISR) {
>> +        for (InputSection *IS : *ISR) {
>> +          if (ISR != PrevISR) {
>> +            NeedTrailingTS = true;
>> +            Off = 0;
>> +            Limit = IS->OutSecOff +
>> +                    (Target->ThunkSectionSpacing - Target->ThunkSectionSize);
>> +            PrevIS = nullptr;
>> +            PrevISR = ISR;
>> +          }
>> +          Off = IS->OutSecOff + IS->getSize();
>> +          if (Off >= Limit) {
>> +            uint32_t ThunkOff = (PrevIS == nullptr)
>> +                                    ? IS->OutSecOff
>> +                                    : PrevIS->OutSecOff + PrevIS->getSize();
>> +            addThunkSection(OS, ISR, ThunkOff);
>> +            NeedTrailingTS = false;
>> +            Limit = ThunkOff + Target->ThunkSectionSpacing;
>> +          }
>> +          PrevIS = IS;
>> +
>> +          if (ISR->back() == IS && NeedTrailingTS)
>> +            addThunkSection(OS, ISR, Off);
>> +        }
>> +      });
>> +}
>> +
>>  ThunkSection *ThunkCreator::addThunkSection(OutputSection *OS,
>>                                              std::vector<InputSection *> *ISR,
>>                                              uint64_t Off) {
>>    auto *TS = make<ThunkSection>(OS, Off);
>>    ThunkSections[ISR].push_back(TS);
>>    return TS;
>>  }
>>
>> -
>>  std::pair<Thunk *, bool> ThunkCreator::getThunk(SymbolBody &Body,
>>                                                  uint32_t Type) {
>>    auto Res = ThunkedSymbols.insert({&Body, std::vector<Thunk *>()});
>> @@ -1062,22 +1119,18 @@
>>    return std::make_pair(T, true);
>>  }
>>
>> -// Call Fn on every executable InputSection accessed via the linker script
>> -// InputSectionDescription::Sections.
>> -void ThunkCreator::forEachExecInputSection(
>> +// Call Fn on every executable Range of InputSections accessed via the linker
>> +// script InputSectionDescription::Sections.
>> +void ThunkCreator::forEachExecInputSectionRange(
>>      ArrayRef<OutputSectionCommand *> OutputSections,
>> -    std::function<void(OutputSection *, std::vector<InputSection *> *,
>> -                       InputSection *)>
>> -        Fn) {
>> +    std::function<void(OutputSection *, std::vector<InputSection *> *)> Fn) {
>>    for (OutputSectionCommand *Cmd : OutputSections) {
>>      OutputSection *OS = Cmd->Sec;
>>      if (!(OS->Flags & SHF_ALLOC) || !(OS->Flags & SHF_EXECINSTR))
>>        continue;
>>      for (BaseCommand *BC : Cmd->Commands)
>>        if (auto *ISD = dyn_cast<InputSectionDescription>(BC)) {
>> -        CurTS = nullptr;
>> -        for (InputSection *IS : ISD->Sections)
>> -          Fn(OS, &ISD->Sections, IS);
>> +        Fn(OS, &ISD->Sections);
>>        }
>>    }
>>  }
>> @@ -1096,38 +1149,40 @@
>>      ArrayRef<OutputSectionCommand *> OutputSections) {
>>    if (Pass > 0)
>>      ThunkSections.clear();
>> +  else if (Target->ThunkSectionSpacing)
>> +    createInitialThunkSections(OutputSections);
>>
>>    // Create all the Thunks and insert them into synthetic ThunkSections. The
>>    // ThunkSections are later inserted back into the OutputSection.
>>
>>    // We separate the creation of ThunkSections from the insertion of the
>>    // ThunkSections back into the OutputSection as ThunkSections are not always
>>    // inserted into the same OutputSection as the caller.
>> -  forEachExecInputSection(
>> -      OutputSections, [&](OutputSection *OS,  std::vector<InputSection*> *ISR,
>> -                          InputSection *IS) {
>> -        for (Relocation &Rel : IS->Relocations) {
>> -          SymbolBody &Body = *Rel.Sym;
>> -          if (Thunks.find(&Body) != Thunks.end() ||
>> -              !Target->needsThunk(Rel.Expr, Rel.Type, IS->File, Body))
>> -            continue;
>> -          Thunk *T;
>> -          bool IsNew;
>> -          std::tie(T, IsNew) = getThunk(Body, Rel.Type);
>> -          if (IsNew) {
>> -            // Find or create a ThunkSection for the new Thunk
>> -            ThunkSection *TS;
>> -            if (auto *TIS = T->getTargetInputSection())
>> -              TS = getISThunkSec(TIS, OS);
>> -            else
>> -              TS = getOSThunkSec(OS, ISR);
>> -            TS->addThunk(T);
>> -            Thunks[T->ThunkSym] = T;
>> +  forEachExecInputSectionRange(
>> +      OutputSections, [&](OutputSection *OS, std::vector<InputSection *> *ISR) {
>> +        for (InputSection *IS : *ISR)
>> +          for (Relocation &Rel : IS->Relocations) {
>> +            SymbolBody &Body = *Rel.Sym;
>> +            if (Thunks.find(&Body) != Thunks.end() ||
>> +                !Target->needsThunk(Rel.Expr, Rel.Type, IS->File, Body))
>> +              continue;
>> +            Thunk *T;
>> +            bool IsNew;
>> +            std::tie(T, IsNew) = getThunk(Body, Rel.Type);
>> +            if (IsNew) {
>> +              // Find or create a ThunkSection for the new Thunk
>> +              ThunkSection *TS;
>> +              if (auto *TIS = T->getTargetInputSection())
>> +                TS = getISThunkSec(TIS, OS);
>> +              else
>> +                TS = getISRThunkSec(OS, ISR);
>> +              TS->addThunk(T);
>> +              Thunks[T->ThunkSym] = T;
>> +            }
>> +            // Redirect relocation to Thunk, we never go via the PLT to a Thunk
>> +            Rel.Sym = T->ThunkSym;
>> +            Rel.Expr = fromPlt(Rel.Expr);
>>            }
>> -          // Redirect relocation to Thunk, we never go via the PLT to a Thunk
>> -          Rel.Sym = T->ThunkSym;
>> -          Rel.Expr = fromPlt(Rel.Expr);
>> -        }
>>        });
>>    // Merge all created synthetic ThunkSections back into OutputSection
>>    mergeThunks();
>> Index: ELF/Arch/ARM.cpp
>> ===================================================================
>> --- ELF/Arch/ARM.cpp
>> +++ ELF/Arch/ARM.cpp
>> @@ -61,6 +61,22 @@
>>    // ARM uses Variant 1 TLS
>>    TcbSize = 8;
>>    NeedsThunks = true;
>> +  // Pre-created ThunkSections are spaced roughly 16Mb apart on ARM. This is to
>> +  // match the most common expected case of a Thumb 2 encoded BL, BLX or B.W
>> +  // ARM B, BL, BLX range 32MiB
>> +  // Thumb B.W, BL, BLX range 16MiB
>> +  // Thumb B<cc>.W range 1MiB
>> +  // If branch cannot reach a pre-created ThunkSection a new one will be created
>> +  // so we can handle the rare case of Thumb 2 conditional branch.
>> +  // FIXME: lld assumes a CPU with support for ARMv6T2 and above encodings.
>> +  // If support is added for ARMv6T2 then when in use this spacing should drop
>> +  // to 4MiB
>> +  ThunkSectionSpacing = 0x1000000;
>> +  // The pre-created ThunkSections are inserted such that the end of the
>> +  // precreated ThunkSection is almost certain to be within range a branch
>> +  // from the start of the Section, or immediately following the previous
>> +  // ThunkSection. Allow for 16384 12 byte Thunks per ThunkSectionSpacing
>> +  ThunkSectionSize = 0x30000;
>>  }
>>
>>  RelExpr ARM::getRelExpr(uint32_t Type, const SymbolBody &S,


More information about the llvm-commits mailing list