[lld] r296570 - Partially rewrite .gnu.hash synthetic section and add comments.
Rui Ueyama via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 28 18:51:42 PST 2017
Author: ruiu
Date: Tue Feb 28 20:51:42 2017
New Revision: 296570
URL: http://llvm.org/viewvc/llvm-project?rev=296570&view=rev
Log:
Partially rewrite .gnu.hash synthetic section and add comments.
This implementation is probably slightly inefficient than before,
but that should be negligible because this is not a performance-
critical pass.
Modified:
lld/trunk/ELF/SyntheticSections.cpp
lld/trunk/ELF/SyntheticSections.h
Modified: lld/trunk/ELF/SyntheticSections.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SyntheticSections.cpp?rev=296570&r1=296569&r2=296570&view=diff
==============================================================================
--- lld/trunk/ELF/SyntheticSections.cpp (original)
+++ lld/trunk/ELF/SyntheticSections.cpp Tue Feb 28 20:51:42 2017
@@ -1450,87 +1450,49 @@ GnuHashTableSection<ELFT>::GnuHashTableS
this->Entsize = ELFT::Is64Bits ? 0 : 4;
}
-template <class ELFT>
-unsigned GnuHashTableSection<ELFT>::calcNBuckets(unsigned NumHashed) {
- if (!NumHashed)
- return 0;
-
- // These values are prime numbers which are not greater than 2^(N-1) + 1.
- // In result, for any particular NumHashed we return a prime number
- // which is not greater than NumHashed.
- static const unsigned Primes[] = {
- 1, 1, 3, 3, 7, 13, 31, 61, 127, 251,
- 509, 1021, 2039, 4093, 8191, 16381, 32749, 65521, 131071};
-
- return Primes[std::min<unsigned>(Log2_32_Ceil(NumHashed),
- array_lengthof(Primes) - 1)];
-}
-
-// Bloom filter estimation: at least 8 bits for each hashed symbol.
-// GNU Hash table requirement: it should be a power of 2,
-// the minimum value is 1, even for an empty table.
-// Expected results for a 32-bit target:
-// calcMaskWords(0..4) = 1
-// calcMaskWords(5..8) = 2
-// calcMaskWords(9..16) = 4
-// For a 64-bit target:
-// calcMaskWords(0..8) = 1
-// calcMaskWords(9..16) = 2
-// calcMaskWords(17..32) = 4
-template <class ELFT>
-unsigned GnuHashTableSection<ELFT>::calcMaskWords(unsigned NumHashed) {
- if (!NumHashed)
- return 1;
- return NextPowerOf2((NumHashed - 1) / sizeof(uintX_t));
-}
-
template <class ELFT> void GnuHashTableSection<ELFT>::finalizeContents() {
- unsigned NumHashed = Symbols.size();
- NBuckets = calcNBuckets(NumHashed);
- MaskWords = calcMaskWords(NumHashed);
- // Second hash shift estimation: just predefined values.
- Shift2 = ELFT::Is64Bits ? 6 : 5;
-
this->OutSec->Entsize = this->Entsize;
this->OutSec->Link = In<ELFT>::DynSymTab->OutSec->SectionIndex;
- this->Size = sizeof(uint32_t) * 4 // Header
- + sizeof(uintX_t) * MaskWords // Bloom Filter
- + sizeof(uint32_t) * NBuckets // Hash Buckets
- + sizeof(uint32_t) * NumHashed; // Hash Values
-}
-template <class ELFT> void GnuHashTableSection<ELFT>::writeTo(uint8_t *Buf) {
- Buf = writeHeader(Buf);
+ // Computes bloom filter size in word size. We want to allocate 8
+ // bits for each symbol. It must be a power of two.
if (Symbols.empty())
- return;
- Buf = writeBloomFilter(Buf);
- writeHashTable(Buf);
+ MaskWords = 1;
+ else
+ MaskWords = NextPowerOf2((Symbols.size() - 1) / sizeof(uintX_t));
+
+ Size = 16; // Header
+ Size += sizeof(uintX_t) * MaskWords; // Bloom filter
+ Size += NBuckets * 4; // Hash buckets
+ Size += Symbols.size() * 4; // Hash values
}
-template <class ELFT>
-uint8_t *GnuHashTableSection<ELFT>::writeHeader(uint8_t *Buf) {
+template <class ELFT> void GnuHashTableSection<ELFT>::writeTo(uint8_t *Buf) {
+ // Write a header.
const endianness E = ELFT::TargetEndianness;
write32<E>(Buf, NBuckets);
write32<E>(Buf + 4, In<ELFT>::DynSymTab->getNumSymbols() - Symbols.size());
write32<E>(Buf + 8, MaskWords);
- write32<E>(Buf + 12, Shift2);
- return Buf + 16;
+ write32<E>(Buf + 12, getShift2());
+ Buf += 16;
+
+ writeBloomFilter(Buf);
+ Buf += sizeof(uintX_t) * MaskWords;
+
+ writeHashTable(Buf);
}
template <class ELFT>
-uint8_t *GnuHashTableSection<ELFT>::writeBloomFilter(uint8_t *Buf) {
+void GnuHashTableSection<ELFT>::writeBloomFilter(uint8_t *Buf) {
typedef typename ELFT::Off Elf_Off;
-
const unsigned C = sizeof(uintX_t) * 8;
- auto *Masks = reinterpret_cast<Elf_Off *>(Buf);
- for (const SymbolData &Sym : Symbols) {
- size_t Pos = (Sym.Hash / C) & (MaskWords - 1);
- uintX_t V = (uintX_t(1) << (Sym.Hash % C)) |
- (uintX_t(1) << ((Sym.Hash >> Shift2) % C));
- Masks[Pos] |= V;
+ auto *Filter = reinterpret_cast<Elf_Off *>(Buf);
+ for (const Entry &Sym : Symbols) {
+ size_t I = (Sym.Hash / C) & (MaskWords - 1);
+ Filter[I] |= uintX_t(1) << (Sym.Hash % C);
+ Filter[I] |= uintX_t(1) << ((Sym.Hash >> getShift2()) % C);
}
- return Buf + sizeof(uintX_t) * MaskWords;
}
template <class ELFT>
@@ -1538,25 +1500,30 @@ void GnuHashTableSection<ELFT>::writeHas
// A 32-bit integer type in the target endianness.
typedef typename ELFT::Word Elf_Word;
+ // Group symbols by hash value.
+ std::vector<std::vector<Entry>> Syms(NBuckets);
+ for (const Entry &Ent : Symbols)
+ Syms[Ent.Hash % NBuckets].push_back(Ent);
+
+ // Write hash buckets. Hash buckets contain indices in the following
+ // hash value table.
Elf_Word *Buckets = reinterpret_cast<Elf_Word *>(Buf);
+ for (size_t I = 0; I < NBuckets; ++I)
+ if (!Syms[I].empty())
+ Buckets[I] = Syms[I][0].Body->DynsymIndex;
+
+ // Write a hash value table. It represents a sequence of chains that
+ // share the same hash modulo value. The last element of each chain
+ // is terminated by LSB 1.
Elf_Word *Values = Buckets + NBuckets;
-
- int PrevBucket = -1;
- int I = 0;
- for (const SymbolData &Sym : Symbols) {
- int Bucket = Sym.Hash % NBuckets;
- assert(PrevBucket <= Bucket);
- if (Bucket != PrevBucket) {
- Buckets[Bucket] = Sym.Body->DynsymIndex;
- PrevBucket = Bucket;
- if (I > 0)
- Values[I - 1] |= 1;
- }
- Values[I] = Sym.Hash & ~1;
- ++I;
+ size_t I = 0;
+ for (std::vector<Entry> &Vec : Syms) {
+ if (Vec.empty())
+ continue;
+ for (const Entry &Ent : makeArrayRef(Vec).drop_back())
+ Values[I++] = Ent.Hash & ~1;
+ Values[I++] = Vec.back().Hash | 1;
}
- if (I > 0)
- Values[I - 1] |= 1;
}
static uint32_t hashGnu(StringRef Name) {
@@ -1566,34 +1533,50 @@ static uint32_t hashGnu(StringRef Name)
return H;
}
+// Returns a number of hash buckets to accomodate given number of elements.
+// We want to choose a moderate number that is not too small (which
+// causes too many hash collisions) and not too large (which wastes
+// disk space.)
+//
+// We return a prime number because it (is believed to) achieve good
+// hash distribution.
+static size_t getBucketSize(size_t NumSymbols) {
+ // List of largest prime numbers that are not greater than 2^n + 1.
+ for (size_t N : {131071, 65521, 32749, 16381, 8191, 4093, 2039, 1021, 509,
+ 251, 127, 61, 31, 13, 7, 3, 1})
+ if (N <= NumSymbols)
+ return N;
+ return 0;
+}
+
// Add symbols to this symbol hash table. Note that this function
// destructively sort a given vector -- which is needed because
// GNU-style hash table places some sorting requirements.
template <class ELFT>
void GnuHashTableSection<ELFT>::addSymbols(std::vector<SymbolTableEntry> &V) {
- // Ideally this will just be 'auto' but GCC 6.1 is not able
- // to deduce it correctly.
+ // We cannot use 'auto' for Mid because GCC 6.1 cannot deduce
+ // its type correctly.
std::vector<SymbolTableEntry>::iterator Mid =
std::stable_partition(V.begin(), V.end(), [](const SymbolTableEntry &S) {
return S.Symbol->isUndefined();
});
if (Mid == V.end())
return;
- for (auto I = Mid, E = V.end(); I != E; ++I) {
- SymbolBody *B = I->Symbol;
- size_t StrOff = I->StrTabOffset;
- Symbols.push_back({B, StrOff, hashGnu(B->getName())});
+
+ for (SymbolTableEntry &Ent : llvm::make_range(Mid, V.end())) {
+ SymbolBody *B = Ent.Symbol;
+ Symbols.push_back({B, Ent.StrTabOffset, hashGnu(B->getName())});
}
- unsigned NBuckets = calcNBuckets(Symbols.size());
+ NBuckets = getBucketSize(Symbols.size());
std::stable_sort(Symbols.begin(), Symbols.end(),
- [&](const SymbolData &L, const SymbolData &R) {
+ [&](const Entry &L, const Entry &R) {
return L.Hash % NBuckets < R.Hash % NBuckets;
});
V.erase(Mid, V.end());
- for (const SymbolData &Sym : Symbols)
- V.push_back({Sym.Body, Sym.STName});
+ for (const Entry &Ent : Symbols)
+ V.push_back({Ent.Body, Ent.StrTabOffset});
}
template <class ELFT>
Modified: lld/trunk/ELF/SyntheticSections.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/ELF/SyntheticSections.h?rev=296570&r1=296569&r2=296570&view=diff
==============================================================================
--- lld/trunk/ELF/SyntheticSections.h (original)
+++ lld/trunk/ELF/SyntheticSections.h Tue Feb 28 20:51:42 2017
@@ -445,25 +445,21 @@ public:
void addSymbols(std::vector<SymbolTableEntry> &Symbols);
private:
- static unsigned calcNBuckets(unsigned NumHashed);
- static unsigned calcMaskWords(unsigned NumHashed);
+ size_t getShift2() const { return ELFT::Is64Bits ? 6 : 5; }
- uint8_t *writeHeader(uint8_t *Buf);
- uint8_t *writeBloomFilter(uint8_t *Buf);
+ void writeBloomFilter(uint8_t *Buf);
void writeHashTable(uint8_t *Buf);
- struct SymbolData {
+ struct Entry {
SymbolBody *Body;
- size_t STName;
+ size_t StrTabOffset;
uint32_t Hash;
};
- std::vector<SymbolData> Symbols;
-
- unsigned MaskWords;
- unsigned NBuckets;
- unsigned Shift2;
- uintX_t Size = 0;
+ std::vector<Entry> Symbols;
+ size_t MaskWords;
+ size_t NBuckets;
+ size_t Size = 0;
};
template <class ELFT> class HashTableSection final : public SyntheticSection {
More information about the llvm-commits
mailing list