Attempts at speeding up StringTableBuilder

Fri Oct 14 11:23:36 PDT 2016

I have put some effort to try to speed up StringTableBuilder. The last
thing that worked was committed as r284249.

The main difficulty in optimizing it is the large number of strings it
has to handle. In the case of xul, one of the string tables has
14_375_801 strings added, out of which only 1_726_762 are unique.

The things I tried:

* Instead of returning size_t from add, pass in a size_t* to add. This
allows us to remember all StringRefs and only do the merging in
finalize. This would be extra helpful for the -O2 case by not needing
an extra hash lookup. The idea for -O1 was to avoid hash resizing and
improve cache hit by calling StringIndexMap.reserve. Unfortunately
given how many strings are duplicated this is not profitable.

* Using a DenseSet with just an unsigned in it and a side std::vector
with the rest of the information. The idea is that the vector doesn't
contain empty keys, so it should be denser. This reduced the cache
misses accessing the set, but the extra vector compensated for it.

* Creating the string buffer incrementally in add. The idea is that
then we don't need to store the pointer to the string. We can find out
what the string is with just the offset. This was probably the most
promising. It reduced the total number of cache misses reported by
perf stat, but the overall time didn't improve significantly. This
also makes -O2 substantially harder to implement. I have attached the
patch that implements this (note that -O2 is not implemented).

* Not merging constants to see if special casing them would make a
difference. No test speeds ups by even 1%.

In summary, it seems the string merging at -O1 is as fast as it gets
short of someone knowing a crazy algorithm. At -O2 it should be
possible to avoid the second hash lookup by passing a size_t* to add,
but it is not clear if that would be worth the code complexity.

Cheers,
Rafael
-------------- next part --------------

diff --git a/include/llvm/MC/StringTableBuilder.h b/include/llvm/MC/StringTableBuilder.h
index e63a353..a924cb1 100644
--- a/include/llvm/MC/StringTableBuilder.h
+++ b/include/llvm/MC/StringTableBuilder.h
@@ -10,8 +10,8 @@
 #ifndef LLVM_MC_STRINGTABLEBUILDER_H
 #define LLVM_MC_STRINGTABLEBUILDER_H
 
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/DenseMap.h"
 #include <cassert>
 
 namespace llvm {
@@ -35,14 +35,20 @@ public:
   uint32_t hash() const { return Hash; }
 };
 
+struct CachedHashString2 {
+  size_t OffsetInBuffer;
+  uint32_t Size;
+  uint32_t Hash;
+};
+
 /// \brief Utility for building string tables with deduplicated suffixes.
 class StringTableBuilder {
 public:
   enum Kind { ELF, WinCOFF, MachO, RAW };
 
 private:
-  DenseMap<CachedHashString, size_t> StringIndexMap;
-  size_t Size = 0;
+  DenseSet<CachedHashString2> StringIndexMap;
+  SmallString<0> StringBuffer;
   Kind K;
   unsigned Alignment;
   bool Finalized = false;
@@ -73,7 +79,7 @@ public:
   size_t getOffset(CachedHashString S) const;
   size_t getOffset(StringRef S) const { return getOffset(CachedHashString(S)); }
 
-  size_t getSize() const { return Size; }
+  size_t getSize() const { return StringBuffer.size(); }
   void clear();
 
   void write(raw_ostream &OS) const;
diff --git a/lib/MC/StringTableBuilder.cpp b/lib/MC/StringTableBuilder.cpp
index e5d2a4a..5f0b74a 100644
--- a/lib/MC/StringTableBuilder.cpp
+++ b/lib/MC/StringTableBuilder.cpp
@@ -17,45 +17,53 @@
 
 using namespace llvm;
 
+struct LookupKey {
+  const char *BufferStart;
+  CachedHashString Key;
+};
+
 namespace llvm {
-template <> struct DenseMapInfo<CachedHashString> {
-  static CachedHashString getEmptyKey() {
-    StringRef S = DenseMapInfo<StringRef>::getEmptyKey();
-    return {S, 0};
-  }
-  static CachedHashString getTombstoneKey() {
-    StringRef S = DenseMapInfo<StringRef>::getTombstoneKey();
-    return {S, 0};
-  }
-  static unsigned getHashValue(CachedHashString Val) {
+template <> struct DenseMapInfo<CachedHashString2> {
+  static CachedHashString2 getEmptyKey() { return {(size_t)-1, 0, 0}; }
+  static CachedHashString2 getTombstoneKey() { return {(size_t)-2, 0, 0}; }
+  static unsigned getHashValue(CachedHashString2 Val) {
     assert(!isEqual(Val, getEmptyKey()) && "Cannot hash the empty key!");
     assert(!isEqual(Val, getTombstoneKey()) &&
            "Cannot hash the tombstone key!");
-    return Val.hash();
+    return Val.Hash;
+  }
+  static bool isEqual(CachedHashString2 A, CachedHashString2 B) {
+    return A.OffsetInBuffer == B.OffsetInBuffer;
   }
-  static bool isEqual(CachedHashString A, CachedHashString B) {
-    return DenseMapInfo<StringRef>::isEqual(A.val(), B.val());
+  static bool isEqual(LookupKey A, CachedHashString2 B) {
+    if (A.Key.hash() != B.Hash || isEqual(B, getEmptyKey()) ||
+        isEqual(B, getTombstoneKey()))
+      return false;
+    const char *P = A.BufferStart + B.OffsetInBuffer;
+    StringRef BS(P, B.Size);
+    return A.Key.val() == BS;
   }
+  static unsigned getHashValue(LookupKey Val) { return Val.Key.hash(); }
 };
 }
 
 StringTableBuilder::~StringTableBuilder() {}
 
 void StringTableBuilder::initSize() {
+  StringBuffer.clear();
   // Account for leading bytes in table so that offsets returned from add are
   // correct.
   switch (K) {
   case RAW:
-    Size = 0;
     break;
   case MachO:
   case ELF:
     // Start the table with a NUL byte.
-    Size = 1;
+    StringBuffer.resize(1);
     break;
   case WinCOFF:
     // Make room to write the table size later.
-    Size = 4;
+    StringBuffer.resize(4);
     break;
   }
 }
@@ -77,55 +85,10 @@ typedef std::pair<CachedHashString, size_t> StringPair;
 
 void StringTableBuilder::write(uint8_t *Buf) const {
   assert(isFinalized());
-  for (const StringPair &P : StringIndexMap) {
-    StringRef Data = P.first.val();
-    if (!Data.empty())
-      memcpy(Buf + P.second, Data.data(), Data.size());
-  }
+  memcpy(Buf, StringBuffer.data(), StringBuffer.size());
   if (K != WinCOFF)
     return;
-  support::endian::write32le(Buf, Size);
-}
-
-// Returns the character at Pos from end of a string.
-static int charTailAt(StringPair *P, size_t Pos) {
-  StringRef S = P->first.val();
-  if (Pos >= S.size())
-    return -1;
-  return (unsigned char)S[S.size() - Pos - 1];
-}
-
-// Three-way radix quicksort. This is much faster than std::sort with strcmp
-// because it does not compare characters that we already know the same.
-static void multikey_qsort(StringPair **Begin, StringPair **End, int Pos) {
-tailcall:
-  if (End - Begin <= 1)
-    return;
-
-  // Partition items. Items in [Begin, P) are greater than the pivot,
-  // [P, Q) are the same as the pivot, and [Q, End) are less than the pivot.
-  int Pivot = charTailAt(*Begin, Pos);
-  StringPair **P = Begin;
-  StringPair **Q = End;
-  for (StringPair **R = Begin + 1; R < Q;) {
-    int C = charTailAt(*R, Pos);
-    if (C > Pivot)
-      std::swap(*P++, *R++);
-    else if (C < Pivot)
-      std::swap(*--Q, *R);
-    else
-      R++;
-  }
-
-  multikey_qsort(Begin, P, Pos);
-  multikey_qsort(Q, End, Pos);
-  if (Pivot != -1) {
-    // qsort(P, Q, Pos + 1), but with tail call optimization.
-    Begin = P;
-    End = Q;
-    ++Pos;
-    goto tailcall;
-  }
+  support::endian::write32le(Buf, StringBuffer.size());
 }
 
 void StringTableBuilder::finalize() {
@@ -139,55 +102,25 @@ void StringTableBuilder::finalizeInOrder() {
 void StringTableBuilder::finalizeStringTable(bool Optimize) {
   Finalized = true;
 
-  if (Optimize) {
-    std::vector<StringPair *> Strings;
-    Strings.reserve(StringIndexMap.size());
-    for (StringPair &P : StringIndexMap)
-      Strings.push_back(&P);
-
-    if (!Strings.empty()) {
-      // If we're optimizing, sort by name. If not, sort by previously assigned
-      // offset.
-      multikey_qsort(&Strings[0], &Strings[0] + Strings.size(), 0);
-    }
-
-    initSize();
-
-    StringRef Previous;
-    for (StringPair *P : Strings) {
-      StringRef S = P->first.val();
-      if (Previous.endswith(S)) {
-        size_t Pos = Size - S.size() - (K != RAW);
-        if (!(Pos & (Alignment - 1))) {
-          P->second = Pos;
-          continue;
-        }
-      }
-
-      Size = alignTo(Size, Alignment);
-      P->second = Size;
-
-      Size += S.size();
-      if (K != RAW)
-        ++Size;
-      Previous = S;
-    }
+  if (K == MachO) {
+    // Pad to multiple of 4.
+    size_t Pos = alignTo(StringBuffer.size(), Alignment);
+    StringBuffer.resize(Pos);
   }
-
-  if (K == MachO)
-    Size = alignTo(Size, 4); // Pad to multiple of 4.
 }
 
 void StringTableBuilder::clear() {
   Finalized = false;
   StringIndexMap.clear();
+  initSize();
 }
 
 size_t StringTableBuilder::getOffset(CachedHashString S) const {
   assert(isFinalized());
-  auto I = StringIndexMap.find(S);
+  LookupKey Key{&StringBuffer[0], S};
+  auto I = StringIndexMap.find_as(Key);
   assert(I != StringIndexMap.end() && "String is not in table!");
-  return I->second;
+  return I->OffsetInBuffer;
 }
 
 size_t StringTableBuilder::add(CachedHashString S) {
@@ -195,11 +128,16 @@ size_t StringTableBuilder::add(CachedHashString S) {
     assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
 
   assert(!isFinalized());
-  auto P = StringIndexMap.insert(std::make_pair(S, 0));
+  CachedHashString2 Key{0, S.size(), S.hash()};
+  LookupKey LKey{StringBuffer.data(), S};
+  auto P = StringIndexMap.insert_as(Key, LKey);
   if (P.second) {
-    size_t Start = alignTo(Size, Alignment);
-    P.first->second = Start;
-    Size = Start + S.size() + (K != RAW);
+    size_t Start = alignTo(StringBuffer.size(), Alignment);
+    StringBuffer.resize(Start);
+    P.first->OffsetInBuffer = Start;
+    StringBuffer.append(S.val());
+    if (K != RAW)
+      StringBuffer.push_back('\0');
   }
-  return P.first->second;
+  return P.first->OffsetInBuffer;
 }