[PATCH] D34689: [LLD][ELF] Pre-create ThunkSections at Target specific intervals
Peter Smith via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 17 04:09:50 PDT 2017
It is pretty much as you suggest, this is simple to implement and
works well enough. My suggestion is to start simple and if there is a
need for a more optimal placement then add it later.
On 14 July 2017 at 19:49, Rafael Avila de Espindola
<rafael.espindola at gmail.com> wrote:
> Why is this based on output sections?
>
> If the idea is to have regularly spaced sections, it seems it should look
> at executable PT_LOADs, find the offsets where the sections should be
> and only then walk the input sections trying to figure out where to put
> them.
>
> If it is because it is easier to implement like this and a sufficiently
> good heuristic, that is fine :-)
>
> Cheers,
> Rafael
>
> Peter Smith via Phabricator <reviews at reviews.llvm.org> writes:
>
>> peter.smith updated this revision to Diff 106435.
>> peter.smith added a comment.
>>
>> Updated diff to return nullptr from ThunkSection::getTargetInputSection() when there are no Thunks.
>>
>>
>> https://reviews.llvm.org/D34689
>>
>> Files:
>> ELF/Arch/ARM.cpp
>> ELF/Relocations.cpp
>> ELF/Relocations.h
>> ELF/SyntheticSections.cpp
>> ELF/Target.h
>> test/ELF/arm-thumb-thunk-symbols.s
>>
>> Index: test/ELF/arm-thumb-thunk-symbols.s
>> ===================================================================
>> --- test/ELF/arm-thumb-thunk-symbols.s
>> +++ test/ELF/arm-thumb-thunk-symbols.s
>> @@ -25,18 +25,18 @@
>> b thumb_fn
>>
>> // CHECK: Name: __Thumbv7ABSLongThunk_arm_fn
>> -// CHECK-NEXT: Value: 0x11005
>> +// CHECK-NEXT: Value: 0x12005
>> // CHECK-NEXT: Size: 10
>> // CHECK-NEXT: Binding: Local (0x0)
>> // CHECK-NEXT: Type: Function (0x2)
>> // CHECK: Name: __ARMv7ABSLongThunk_thumb_fn
>> -// CHECK-NEXT: Value: 0x11010
>> +// CHECK-NEXT: Value: 0x12010
>> // CHECK-NEXT: Size: 12
>> // CHECK-NEXT: Binding: Local (0x0)
>> // CHECK-NEXT: Type: Function (0x2)
>>
>> // CHECK-PI: Name: __ThumbV7PILongThunk_arm_fn
>> -// CHECK-PI-NEXT: Value: 0x1005
>> +// CHECK-PI-NEXT: Value: 0x2005
>> // CHECK-PI-NEXT: Size: 12
>> // CHECK-PI-NEXT: Binding: Local (0x0)
>> // CHECK-PI-NEXT: Type: Function (0x2)
>> Index: ELF/Target.h
>> ===================================================================
>> --- ELF/Target.h
>> +++ ELF/Target.h
>> @@ -70,6 +70,13 @@
>> // end of .got
>> uint64_t GotBaseSymOff = 0;
>>
>> + // On systems with range extensions we place collections of Thunks at
>> + // regular spacings that enable the majority of branches reach the Thunks.
>> + uint32_t ThunkSectionSpacing = 0;
>> +
>> + // An estimate of size of the Thunks that will be created per ThunkSection
>> + uint32_t ThunkSectionSize = 0;
>> +
>> uint32_t CopyRel;
>> uint32_t GotRel;
>> uint32_t PltRel;
>> Index: ELF/SyntheticSections.cpp
>> ===================================================================
>> --- ELF/SyntheticSections.cpp
>> +++ ELF/SyntheticSections.cpp
>> @@ -2325,6 +2325,8 @@
>> }
>>
>> InputSection *ThunkSection::getTargetInputSection() const {
>> + if (Thunks.empty())
>> + return nullptr;
>> const Thunk *T = Thunks.front();
>> return T->getTargetInputSection();
>> }
>> Index: ELF/Relocations.h
>> ===================================================================
>> --- ELF/Relocations.h
>> +++ ELF/Relocations.h
>> @@ -133,14 +133,14 @@
>>
>> private:
>> void mergeThunks();
>> - ThunkSection *getOSThunkSec(OutputSection *OS,
>> - std::vector<InputSection *> *ISR);
>> + ThunkSection *getISRThunkSec(OutputSection *OS,
>> + std::vector<InputSection *> *ISR);
>> ThunkSection *getISThunkSec(InputSection *IS, OutputSection *OS);
>> - void forEachExecInputSection(
>> + void
>> + createInitialThunkSections(ArrayRef<OutputSectionCommand *> OutputSections);
>> + void forEachExecInputSectionRange(
>> ArrayRef<OutputSectionCommand *> OutputSections,
>> - std::function<void(OutputSection *, std::vector<InputSection *> *,
>> - InputSection *)>
>> - Fn);
>> + std::function<void(OutputSection *, std::vector<InputSection *> *)> Fn);
>> std::pair<Thunk *, bool> getThunk(SymbolBody &Body, uint32_t Type);
>> ThunkSection *addThunkSection(OutputSection *OS,
>> std::vector<InputSection *> *, uint64_t Off);
>> @@ -163,9 +163,6 @@
>> // passes
>> std::map<std::vector<InputSection *> *, std::vector<ThunkSection *>>
>> ThunkSections;
>> -
>> - // The ThunkSection for this vector of InputSections
>> - ThunkSection *CurTS;
>> };
>>
>> // Return a int64_t to make sure we get the sign extension out of the way as
>> Index: ELF/Relocations.cpp
>> ===================================================================
>> --- ELF/Relocations.cpp
>> +++ ELF/Relocations.cpp
>> @@ -973,6 +973,14 @@
>> std::vector<InputSection *> *ISR = KV.first;
>> std::vector<ThunkSection *> &Thunks = KV.second;
>>
>> + // // Remove ThunkSections that contain no Thunks
>> + Thunks.erase(
>> + llvm::remove_if(
>> + Thunks, [](const ThunkSection *TS) { return TS->getSize() == 0; }),
>> + Thunks.end());
>> + if (Thunks.empty())
>> + continue;
>> +
>> // Order Thunks in ascending OutSecOff
>> auto ThunkCmp = [](const ThunkSection *A, const ThunkSection *B) {
>> return A->OutSecOff < B->OutSecOff;
>> @@ -1000,18 +1008,22 @@
>> }
>> }
>>
>> -ThunkSection *ThunkCreator::getOSThunkSec(OutputSection *OS,
>> - std::vector<InputSection *> *ISR) {
>> - if (CurTS == nullptr) {
>> - uint32_t Off = 0;
>> - for (auto *IS : OS->Sections) {
>> - Off = IS->OutSecOff + IS->getSize();
>> - if ((IS->Flags & SHF_EXECINSTR) == 0)
>> - break;
>> - }
>> - CurTS = addThunkSection(OS, ISR, Off);
>> - }
>> - return CurTS;
>> +// Find or create a ThunkSection within the InputSectionRange (ISR) that is in
>> +// range of Src. An ISR maps to a range of InputSections described by a
>> +// linker script section pattern such as { .text .text.* }.
>> +// FIXME: At present we assume that all ThunkSections are in range so we always
>> +// return the first pre-created ThunkSection.
>> +ThunkSection *ThunkCreator::getISRThunkSec(OutputSection *OS,
>> + std::vector<InputSection *> *ISR) {
>> + // FIXME: When range extension thunks are supported we will need to check
>> + // that the ThunkSection is in range of the caller
>> + if (!ThunkSections[ISR].empty())
>> + return ThunkSections[ISR].front();
>> +
>> + // FIXME: When range extension thunks are supported we must handle the case
>> + // where no pre-created ThunkSections are in range by creating a new one in
>> + // range for now it is unreachable
>> + llvm_unreachable("Must have created at least one ThunkSection per ISR");
>> }
>>
>> ThunkSection *ThunkCreator::getISThunkSec(InputSection *IS, OutputSection *OS) {
>> @@ -1024,7 +1036,7 @@
>> OutputSectionCommand *C = Script->getCmd(TOS);
>> std::vector<InputSection *> *Range = nullptr;
>> for (BaseCommand *BC : C->Commands)
>> - if (auto *ISD = dyn_cast<InputSectionDescription> (BC)) {
>> + if (auto *ISD = dyn_cast<InputSectionDescription>(BC)) {
>> InputSection *first = ISD->Sections.front();
>> InputSection *last = ISD->Sections.back();
>> if (IS->OutSecOff >= first->OutSecOff &&
>> @@ -1038,15 +1050,60 @@
>> return TS;
>> }
>>
>> +// Create one or more ThunkSections per OS that can be used to place Thunks.
>> +// We attempt to place the ThunkSections using the following desirable
>> +// properties:
>> +// - Within range of the maximum number of callers
>> +// - Minimise the number of ThunkSections
>> +//
>> +// We follow a simple but conservative heuristic to place ThunkSections at
>> +// offsets that are multiples of a Target specific branch range.
>> +// For an InputSectionRange that is smaller than the range then a single
>> +// ThunkSection at the end of the range will do.
>> +void ThunkCreator::createInitialThunkSections(
>> + ArrayRef<OutputSectionCommand *> OutputSections) {
>> + bool NeedTrailingTS;
>> + uint32_t Off;
>> + uint32_t Limit;
>> + InputSection *PrevIS = nullptr;
>> + std::vector<InputSection *> *PrevISR = nullptr;
>> +
>> + forEachExecInputSectionRange(
>> + OutputSections, [&](OutputSection *OS, std::vector<InputSection *> *ISR) {
>> + for (InputSection *IS : *ISR) {
>> + if (ISR != PrevISR) {
>> + NeedTrailingTS = true;
>> + Off = 0;
>> + Limit = IS->OutSecOff +
>> + (Target->ThunkSectionSpacing - Target->ThunkSectionSize);
>> + PrevIS = nullptr;
>> + PrevISR = ISR;
>> + }
>> + Off = IS->OutSecOff + IS->getSize();
>> + if (Off >= Limit) {
>> + uint32_t ThunkOff = (PrevIS == nullptr)
>> + ? IS->OutSecOff
>> + : PrevIS->OutSecOff + PrevIS->getSize();
>> + addThunkSection(OS, ISR, ThunkOff);
>> + NeedTrailingTS = false;
>> + Limit = ThunkOff + Target->ThunkSectionSpacing;
>> + }
>> + PrevIS = IS;
>> +
>> + if (ISR->back() == IS && NeedTrailingTS)
>> + addThunkSection(OS, ISR, Off);
>> + }
>> + });
>> +}
>> +
>> ThunkSection *ThunkCreator::addThunkSection(OutputSection *OS,
>> std::vector<InputSection *> *ISR,
>> uint64_t Off) {
>> auto *TS = make<ThunkSection>(OS, Off);
>> ThunkSections[ISR].push_back(TS);
>> return TS;
>> }
>>
>> -
>> std::pair<Thunk *, bool> ThunkCreator::getThunk(SymbolBody &Body,
>> uint32_t Type) {
>> auto Res = ThunkedSymbols.insert({&Body, std::vector<Thunk *>()});
>> @@ -1062,22 +1119,18 @@
>> return std::make_pair(T, true);
>> }
>>
>> -// Call Fn on every executable InputSection accessed via the linker script
>> -// InputSectionDescription::Sections.
>> -void ThunkCreator::forEachExecInputSection(
>> +// Call Fn on every executable Range of InputSections accessed via the linker
>> +// script InputSectionDescription::Sections.
>> +void ThunkCreator::forEachExecInputSectionRange(
>> ArrayRef<OutputSectionCommand *> OutputSections,
>> - std::function<void(OutputSection *, std::vector<InputSection *> *,
>> - InputSection *)>
>> - Fn) {
>> + std::function<void(OutputSection *, std::vector<InputSection *> *)> Fn) {
>> for (OutputSectionCommand *Cmd : OutputSections) {
>> OutputSection *OS = Cmd->Sec;
>> if (!(OS->Flags & SHF_ALLOC) || !(OS->Flags & SHF_EXECINSTR))
>> continue;
>> for (BaseCommand *BC : Cmd->Commands)
>> if (auto *ISD = dyn_cast<InputSectionDescription>(BC)) {
>> - CurTS = nullptr;
>> - for (InputSection *IS : ISD->Sections)
>> - Fn(OS, &ISD->Sections, IS);
>> + Fn(OS, &ISD->Sections);
>> }
>> }
>> }
>> @@ -1096,38 +1149,40 @@
>> ArrayRef<OutputSectionCommand *> OutputSections) {
>> if (Pass > 0)
>> ThunkSections.clear();
>> + else if (Target->ThunkSectionSpacing)
>> + createInitialThunkSections(OutputSections);
>>
>> // Create all the Thunks and insert them into synthetic ThunkSections. The
>> // ThunkSections are later inserted back into the OutputSection.
>>
>> // We separate the creation of ThunkSections from the insertion of the
>> // ThunkSections back into the OutputSection as ThunkSections are not always
>> // inserted into the same OutputSection as the caller.
>> - forEachExecInputSection(
>> - OutputSections, [&](OutputSection *OS, std::vector<InputSection*> *ISR,
>> - InputSection *IS) {
>> - for (Relocation &Rel : IS->Relocations) {
>> - SymbolBody &Body = *Rel.Sym;
>> - if (Thunks.find(&Body) != Thunks.end() ||
>> - !Target->needsThunk(Rel.Expr, Rel.Type, IS->File, Body))
>> - continue;
>> - Thunk *T;
>> - bool IsNew;
>> - std::tie(T, IsNew) = getThunk(Body, Rel.Type);
>> - if (IsNew) {
>> - // Find or create a ThunkSection for the new Thunk
>> - ThunkSection *TS;
>> - if (auto *TIS = T->getTargetInputSection())
>> - TS = getISThunkSec(TIS, OS);
>> - else
>> - TS = getOSThunkSec(OS, ISR);
>> - TS->addThunk(T);
>> - Thunks[T->ThunkSym] = T;
>> + forEachExecInputSectionRange(
>> + OutputSections, [&](OutputSection *OS, std::vector<InputSection *> *ISR) {
>> + for (InputSection *IS : *ISR)
>> + for (Relocation &Rel : IS->Relocations) {
>> + SymbolBody &Body = *Rel.Sym;
>> + if (Thunks.find(&Body) != Thunks.end() ||
>> + !Target->needsThunk(Rel.Expr, Rel.Type, IS->File, Body))
>> + continue;
>> + Thunk *T;
>> + bool IsNew;
>> + std::tie(T, IsNew) = getThunk(Body, Rel.Type);
>> + if (IsNew) {
>> + // Find or create a ThunkSection for the new Thunk
>> + ThunkSection *TS;
>> + if (auto *TIS = T->getTargetInputSection())
>> + TS = getISThunkSec(TIS, OS);
>> + else
>> + TS = getISRThunkSec(OS, ISR);
>> + TS->addThunk(T);
>> + Thunks[T->ThunkSym] = T;
>> + }
>> + // Redirect relocation to Thunk, we never go via the PLT to a Thunk
>> + Rel.Sym = T->ThunkSym;
>> + Rel.Expr = fromPlt(Rel.Expr);
>> }
>> - // Redirect relocation to Thunk, we never go via the PLT to a Thunk
>> - Rel.Sym = T->ThunkSym;
>> - Rel.Expr = fromPlt(Rel.Expr);
>> - }
>> });
>> // Merge all created synthetic ThunkSections back into OutputSection
>> mergeThunks();
>> Index: ELF/Arch/ARM.cpp
>> ===================================================================
>> --- ELF/Arch/ARM.cpp
>> +++ ELF/Arch/ARM.cpp
>> @@ -61,6 +61,22 @@
>> // ARM uses Variant 1 TLS
>> TcbSize = 8;
>> NeedsThunks = true;
>> + // Pre-created ThunkSections are spaced roughly 16Mb apart on ARM. This is to
>> + // match the most common expected case of a Thumb 2 encoded BL, BLX or B.W
>> + // ARM B, BL, BLX range 32MiB
>> + // Thumb B.W, BL, BLX range 16MiB
>> + // Thumb B<cc>.W range 1MiB
>> + // If branch cannot reach a pre-created ThunkSection a new one will be created
>> + // so we can handle the rare case of Thumb 2 conditional branch.
>> + // FIXME: lld assumes a CPU with support for ARMv6T2 and above encodings.
>> + // If support is added for ARMv6T2 then when in use this spacing should drop
>> + // to 4MiB
>> + ThunkSectionSpacing = 0x1000000;
>> + // The pre-created ThunkSections are inserted such that the end of the
>> + // precreated ThunkSection is almost certain to be within range a branch
>> + // from the start of the Section, or immediately following the previous
>> + // ThunkSection. Allow for 16384 12 byte Thunks per ThunkSectionSpacing
>> + ThunkSectionSize = 0x30000;
>> }
>>
>> RelExpr ARM::getRelExpr(uint32_t Type, const SymbolBody &S,
More information about the llvm-commits
mailing list