[llvm] TableGen: Emit perfect hash function for runtime libcalls (PR #150192)
Eli Friedman via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 13 14:25:12 PDT 2025
================
@@ -315,8 +325,183 @@ void RuntimeLibcallEmitter::emitGetRuntimeLibcallEnum(raw_ostream &OS) const {
"#endif\n\n";
}
+// StringMap uses xxh3_64bits, truncated to uint32_t.
+static uint64_t hash(StringRef Str) {
+ return static_cast<uint32_t>(xxh3_64bits(Str));
+}
+
+static void emitHashFunction(raw_ostream &OS) {
+ OS << "static inline uint64_t hash(StringRef Str) {\n"
+ " return static_cast<uint32_t>(xxh3_64bits(Str));\n"
+ "}\n\n";
+}
+
+/// Return the table size, maximum number of collisions for the set of hashes
+static std::pair<int, int>
+computePerfectHashParameters(ArrayRef<uint64_t> Hashes) {
+ const int SizeOverhead = 10;
+ const int NumHashes = Hashes.size();
+
+ // Index derived from hash -> number of collisions.
+ DenseMap<uint64_t, int> Table;
+
+ for (int MaxCollisions = 1;; ++MaxCollisions) {
+ for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) {
+ Table.clear();
+
+ bool NeedResize = false;
+ for (uint64_t H : Hashes) {
+ uint64_t Idx = H % static_cast<uint64_t>(N);
+ if (++Table[Idx] > MaxCollisions) {
+ // Need to resize the final table if we increased the collision count.
+ NeedResize = true;
+ break;
+ }
+ }
+
+ if (!NeedResize)
+ return {N, MaxCollisions};
+ }
+ }
+
+ llvm_unreachable("loop should terminate");
+}
+
+static std::vector<LookupEntry>
+constructPerfectHashTable(ArrayRef<RuntimeLibcallImpl> Keywords,
+ ArrayRef<uint64_t> Hashes, int Size, int Collisions,
+ StringToOffsetTable &OffsetTable) {
+ DenseSet<StringRef> Seen;
+ std::vector<LookupEntry> Lookup(Size * Collisions);
+
+ for (const RuntimeLibcallImpl &LibCallImpl : Keywords) {
+ StringRef ImplName = LibCallImpl.getLibcallFuncName();
+
+ // We do not want to add repeated entries for cases with the same name, only
+ // an entry for the first, with the name collision enum values immediately
+ // following.
+ if (!Seen.insert(ImplName).second)
+ continue;
+
+ uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1];
+
+ uint64_t Idx = (HashValue % static_cast<uint64_t>(Size)) *
+ static_cast<uint64_t>(Collisions);
+
+ bool Found = false;
+ for (int J = 0; J < Collisions; ++J) {
+ LookupEntry &Entry = Lookup[Idx + J];
+ if (Entry.TableValue == 0) {
+ Entry.FuncName = ImplName;
+ Entry.TableValue = LibCallImpl.getEnumVal();
+ Entry.Hash = HashValue;
+ Found = true;
+ break;
+ }
+ }
+
+ if (!Found)
+ reportFatalInternalError("failure to hash " + ImplName);
+ }
+
+ return Lookup;
+}
+
+/// Generate hash table based lookup by name.
+void RuntimeLibcallEmitter::emitNameMatchHashTable(
+ raw_ostream &OS, StringToOffsetTable &OffsetTable) const {
+ std::vector<uint64_t> Hashes(RuntimeLibcallImplDefList.size());
+
+ size_t MaxFuncNameSize = 0;
+ size_t Index = 0;
+ for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
+ StringRef ImplName = LibCallImpl.getLibcallFuncName();
+ MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size());
+ Hashes[Index++] = hash(ImplName);
+ }
+
+ LLVM_DEBUG({
+ for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
+ StringRef ImplName = LibCallImpl.getLibcallFuncName();
+ if (ImplName.size() == MaxFuncNameSize) {
+ dbgs() << "Maximum runtime libcall name size: " << ImplName << '('
+ << MaxFuncNameSize << ")\n";
+ }
+ }
+ });
+
+ // Early exiting on the symbol name provides a significant speedup in the miss
+ // case on the set of symbols in a clang binary. Emit this as an inlinable
+ // precondition in the header.
+ //
+ // The empty check is also used to get sensible behavior on anonymous
+ // functions.
+ //
+ // TODO: It may make more sense to split the search by string size more. There
+ // are a few outliers, most call names are small.
----------------
efriedma-quic wrote:
That implies you're ending up with a really low effective load factor (at least 5 empty entries per filled entry). You can probably save a lot of space using linear probing instead of fixed 6-entry buckets.
https://github.com/llvm/llvm-project/pull/150192
More information about the llvm-commits
mailing list