[clang] [clang][Tooling] Reuse standard-library symbol descriptors (PR #202663)
David Zbarsky via cfe-commits
cfe-commits at lists.llvm.org
Tue Jun 9 07:02:05 PDT 2026
https://github.com/dzbarsky created https://github.com/llvm/llvm-project/pull/202663
StandardLibrary.cpp emits the generated C and C++ symbol mappings once to initialize SymbolHeaderMapping and a second time to count unique symbols. The second expansion costs about 46 KiB of read-only data and constructs a DenseSet during initialization.
Return one SymbolMapping array per language and count the already-grouped qualified names with a linear pass before initializing the mapping from the same array. This removes the duplicate generated tables and the temporary DenseSet without changing the lookup representation.
In an arm64 Release build, StandardLibrary.cpp.o loadable contents decrease from 272,536 to 224,729 bytes (-47,807).
In the LLVM 22 Bazel build, clangd decreases from 52,672,672 to 52,622,704 bytes (-49,968) and stripped clangd decreases from 40,952,336 to 40,902,784 bytes (-49,552). The multicall binary decreases from 162,148,880 to 162,098,928 bytes (-49,952) and its stripped form decreases from 132,207,344 to 132,157,792 bytes (-49,552).
A 50-batch initialization benchmark measured -1.16% elapsed initialization time with a 95% bootstrap interval of -3.70% to +1.68%. A 300-process clangd --check benchmark measured -1.17% CPU with a 95% bootstrap interval of -2.64% to -0.00%.
All six existing StdlibTest cases pass, covering C and C++ mappings, experimental symbols, recognizer behavior, and special C mappings.
Work towards #202616
>From c5a8f493ce5edb01ec5b0d4fa9a41d83386644a5 Mon Sep 17 00:00:00 2001
From: David Zbarsky <dzbarsky at gmail.com>
Date: Tue, 9 Jun 2026 04:54:39 -0400
Subject: [PATCH] [clang][Tooling] Reuse standard-library symbol descriptors
StandardLibrary.cpp emits the generated C and C++ symbol mappings once to initialize SymbolHeaderMapping and a second time to count unique symbols. The second expansion costs about 46 KiB of read-only data and constructs a DenseSet during initialization.
Return one SymbolMapping array per language and count the already-grouped qualified names with a linear pass before initializing the mapping from the same array. This removes the duplicate generated tables and the temporary DenseSet without changing the lookup representation.
In an arm64 Release build, StandardLibrary.cpp.o loadable contents decrease from 272,536 to 224,729 bytes (-47,807).
In the LLVM 22 Bazel build, clangd decreases from 52,672,672 to 52,622,704 bytes (-49,968) and stripped clangd decreases from 40,952,336 to 40,902,784 bytes (-49,552). The multicall binary decreases from 162,148,880 to 162,098,928 bytes (-49,952) and its stripped form decreases from 132,207,344 to 132,157,792 bytes (-49,552).
A 50-batch initialization benchmark measured -1.16% elapsed initialization time with a 95% bootstrap interval of -3.70% to +1.68%. A 300-process clangd --check benchmark measured -1.17% CPU with a 95% bootstrap interval of -2.64% to -0.00%.
All six existing StdlibTest cases pass, covering C and C++ mappings, experimental symbols, recognizer behavior, and special C mappings.
---
.../Inclusions/Stdlib/StandardLibrary.cpp | 75 ++++++++-----------
1 file changed, 33 insertions(+), 42 deletions(-)
diff --git a/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp b/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp
index 807a8d8a34ad7..e24b934e01071 100644
--- a/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp
+++ b/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp
@@ -10,10 +10,10 @@
#include "clang/AST/Decl.h"
#include "clang/Basic/LangOptions.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
#include <optional>
namespace clang {
@@ -24,6 +24,12 @@ namespace {
// Symbol name -> Symbol::ID, within a namespace.
using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>;
+struct SymbolMapping {
+ const char *QName;
+ unsigned NSLen;
+ const char *HeaderName;
+};
+
// A Mapping per language.
struct SymbolHeaderMapping {
llvm::StringRef *HeaderNames = nullptr;
@@ -54,37 +60,50 @@ static const SymbolHeaderMapping *getMappingPerLang(Lang L) {
return LanguageMappings[static_cast<unsigned>(L)];
}
-static int countSymbols(Lang Language) {
- ArrayRef<const char *> Symbols;
-#define SYMBOL(Name, NS, Header) #NS #Name,
+static ArrayRef<SymbolMapping> getSymbolMappings(Lang Language) {
+#define SYMBOL(Name, NS, Header) \
+ {#NS #Name, \
+ static_cast<decltype(SymbolMapping::NSLen)>(StringRef(#NS).size()), \
+ #Header},
switch (Language) {
case Lang::C: {
- static constexpr const char *CSymbols[] = {
+ static constexpr SymbolMapping CSymbols[] = {
#include "CSpecialSymbolMap.inc"
#include "CSymbolMap.inc"
};
- Symbols = CSymbols;
- break;
+ return CSymbols;
}
case Lang::CXX: {
- static constexpr const char *CXXSymbols[] = {
+ static constexpr SymbolMapping CXXSymbols[] = {
#include "StdSpecialSymbolMap.inc"
#include "StdSymbolMap.inc"
#include "StdTsSymbolMap.inc"
};
- Symbols = CXXSymbols;
- break;
+ return CXXSymbols;
}
}
#undef SYMBOL
- return llvm::DenseSet<StringRef>(llvm::from_range, Symbols).size();
+ llvm_unreachable("unknown language");
+}
+
+static unsigned countSymbols(ArrayRef<SymbolMapping> Symbols) {
+ unsigned Count = 0;
+ StringRef Previous;
+ for (const SymbolMapping &S : Symbols) {
+ if (Previous != S.QName) {
+ ++Count;
+ Previous = S.QName;
+ }
+ }
+ return Count;
}
static int initialize(Lang Language) {
SymbolHeaderMapping *Mapping = new SymbolHeaderMapping();
LanguageMappings[static_cast<unsigned>(Language)] = Mapping;
- unsigned SymCount = countSymbols(Language);
+ ArrayRef<SymbolMapping> Symbols = getSymbolMappings(Language);
+ unsigned SymCount = countSymbols(Symbols);
Mapping->SymbolCount = SymCount;
Mapping->SymbolNames =
new std::remove_reference_t<decltype(*Mapping->SymbolNames)>[SymCount];
@@ -137,36 +156,8 @@ static int initialize(Lang Language) {
NSSymbols.try_emplace(QName.drop_front(NSLen), SymIndex);
};
- struct Symbol {
- const char *QName;
- unsigned NSLen;
- const char *HeaderName;
- };
-#define SYMBOL(Name, NS, Header) \
- {#NS #Name, static_cast<decltype(Symbol::NSLen)>(StringRef(#NS).size()), \
- #Header},
- switch (Language) {
- case Lang::C: {
- static constexpr Symbol CSymbols[] = {
-#include "CSpecialSymbolMap.inc"
-#include "CSymbolMap.inc"
- };
- for (const Symbol &S : CSymbols)
- Add(S.QName, S.NSLen, S.HeaderName);
- break;
- }
- case Lang::CXX: {
- static constexpr Symbol CXXSymbols[] = {
-#include "StdSpecialSymbolMap.inc"
-#include "StdSymbolMap.inc"
-#include "StdTsSymbolMap.inc"
- };
- for (const Symbol &S : CXXSymbols)
- Add(S.QName, S.NSLen, S.HeaderName);
- break;
- }
- }
-#undef SYMBOL
+ for (const SymbolMapping &S : Symbols)
+ Add(S.QName, S.NSLen, S.HeaderName);
Mapping->HeaderNames = new llvm::StringRef[Mapping->HeaderIDs->size()];
for (const auto &E : *Mapping->HeaderIDs)
More information about the cfe-commits
mailing list