[clang] 1285172 - [include-mapping] Implement language separation in stdlib recognizer library
Haojian Wu via cfe-commits
cfe-commits at lists.llvm.org
Fri Feb 3 08:05:34 PST 2023
Author: Viktoriia Bakalova
Date: 2023-02-03T17:05:25+01:00
New Revision: 1285172c21ef4867d9f895c0b2ab0f338c46e36f
URL: https://github.com/llvm/llvm-project/commit/1285172c21ef4867d9f895c0b2ab0f338c46e36f
DIFF: https://github.com/llvm/llvm-project/commit/1285172c21ef4867d9f895c0b2ab0f338c46e36f.diff
LOG: [include-mapping] Implement language separation in stdlib recognizer library
Differential Revision: https://reviews.llvm.org/D142992
Added:
Modified:
clang/include/clang/Tooling/Inclusions/StandardLibrary.h
clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp
clang/unittests/Tooling/StandardLibraryTest.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Tooling/Inclusions/StandardLibrary.h b/clang/include/clang/Tooling/Inclusions/StandardLibrary.h
index 6dc8d6d09390b..60509a9cebcab 100644
--- a/clang/include/clang/Tooling/Inclusions/StandardLibrary.h
+++ b/clang/include/clang/Tooling/Inclusions/StandardLibrary.h
@@ -30,6 +30,7 @@ namespace tooling {
namespace stdlib {
class Symbol;
+enum class Lang { C = 0, CXX, LastValue = CXX };
// A standard library header, such as <iostream>
// Lightweight class, in fact just an index into a table.
@@ -37,9 +38,10 @@ class Symbol;
// "<cstdio>" and "<stdio.h>" (and their symbols) are treated
diff erently.
class Header {
public:
- static std::vector<Header> all();
+ static std::vector<Header> all(Lang L = Lang::CXX);
// Name should contain the angle brackets, e.g. "<vector>".
- static std::optional<Header> named(llvm::StringRef Name);
+ static std::optional<Header> named(llvm::StringRef Name,
+ Lang Language = Lang::CXX);
friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Header &H) {
return OS << H.name();
@@ -47,8 +49,10 @@ class Header {
llvm::StringRef name() const;
private:
- Header(unsigned ID) : ID(ID) {}
+ Header(unsigned ID, Lang Language) : ID(ID), Language(Language) {}
unsigned ID;
+ Lang Language;
+
friend Symbol;
friend llvm::DenseMapInfo<Header>;
friend bool operator==(const Header &L, const Header &R) {
@@ -64,11 +68,11 @@ class Header {
// for them.
class Symbol {
public:
- static std::vector<Symbol> all();
+ static std::vector<Symbol> all(Lang L = Lang::CXX);
/// \p Scope should have the trailing "::", for example:
/// named("std::chrono::", "system_clock")
- static std::optional<Symbol> named(llvm::StringRef Scope,
- llvm::StringRef Name);
+ static std::optional<Symbol>
+ named(llvm::StringRef Scope, llvm::StringRef Name, Lang Language = Lang::CXX);
friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S) {
return OS << S.qualified_name();
@@ -82,8 +86,10 @@ class Symbol {
llvm::SmallVector<Header> headers() const;
private:
- Symbol(unsigned ID) : ID(ID) {}
+ Symbol(unsigned ID, Lang Language) : ID(ID), Language(Language) {}
unsigned ID;
+ Lang Language;
+
friend class Recognizer;
friend llvm::DenseMapInfo<Symbol>;
friend bool operator==(const Symbol &L, const Symbol &R) {
@@ -114,10 +120,12 @@ namespace llvm {
template <> struct DenseMapInfo<clang::tooling::stdlib::Header> {
static inline clang::tooling::stdlib::Header getEmptyKey() {
- return clang::tooling::stdlib::Header(-1);
+ return clang::tooling::stdlib::Header(-1,
+ clang::tooling::stdlib::Lang::CXX);
}
static inline clang::tooling::stdlib::Header getTombstoneKey() {
- return clang::tooling::stdlib::Header(-2);
+ return clang::tooling::stdlib::Header(-2,
+ clang::tooling::stdlib::Lang::CXX);
}
static unsigned getHashValue(const clang::tooling::stdlib::Header &H) {
return hash_value(H.ID);
@@ -130,10 +138,12 @@ template <> struct DenseMapInfo<clang::tooling::stdlib::Header> {
template <> struct DenseMapInfo<clang::tooling::stdlib::Symbol> {
static inline clang::tooling::stdlib::Symbol getEmptyKey() {
- return clang::tooling::stdlib::Symbol(-1);
+ return clang::tooling::stdlib::Symbol(-1,
+ clang::tooling::stdlib::Lang::CXX);
}
static inline clang::tooling::stdlib::Symbol getTombstoneKey() {
- return clang::tooling::stdlib::Symbol(-2);
+ return clang::tooling::stdlib::Symbol(-2,
+ clang::tooling::stdlib::Lang::CXX);
}
static unsigned getHashValue(const clang::tooling::stdlib::Symbol &S) {
return hash_value(S.ID);
diff --git a/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp b/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp
index 9d06657df18a2..add8414eaf091 100644
--- a/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp
+++ b/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp
@@ -8,6 +8,7 @@
#include "clang/Tooling/Inclusions/StandardLibrary.h"
#include "clang/AST/Decl.h"
+#include "clang/Basic/LangOptions.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
@@ -15,46 +16,74 @@ namespace clang {
namespace tooling {
namespace stdlib {
-// Header::ID => header name
-static llvm::StringRef *HeaderNames;
-// Header name => Header::ID
-static llvm::DenseMap<llvm::StringRef, unsigned> *HeaderIDs;
-
-static unsigned SymbolCount = 0;
-// Symbol::ID => symbol qualified_name/name/scope
-static struct SymbolName {
- const char *Data; // std::vector
- unsigned ScopeLen; // ~~~~~
- unsigned NameLen; // ~~~~~~
-} *SymbolNames;
+namespace {
// Symbol name -> Symbol::ID, within a namespace.
using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>;
-static llvm::DenseMap<llvm::StringRef, NSSymbolMap *> *NamespaceSymbols;
-// Symbol::ID => Header::ID
-static unsigned *SymbolHeaderIDs;
-static int initialize() {
- SymbolCount = 0;
-#define SYMBOL(Name, NS, Header) ++SymbolCount;
+// A Mapping per language.
+struct SymbolHeaderMapping {
+ llvm::StringRef *HeaderNames = nullptr;
+ // Header name => Header::ID
+ llvm::DenseMap<llvm::StringRef, unsigned> *HeaderIDs;
+
+ unsigned SymbolCount = 0;
+ // Symbol::ID => symbol qualified_name/name/scope
+ struct SymbolName {
+ const char *Data; // std::vector
+ unsigned ScopeLen; // ~~~~~
+ unsigned NameLen; // ~~~~~~
+ } *SymbolNames = nullptr;
+ // Symbol name -> Symbol::ID, within a namespace.
+ llvm::DenseMap<llvm::StringRef, NSSymbolMap *> *NamespaceSymbols = nullptr;
+ // Symbol::ID => Header::ID
+ unsigned *SymbolHeaderIDs = nullptr;
+};
+} // namespace
+static SymbolHeaderMapping
+ *LanguageMappings[static_cast<unsigned>(Lang::LastValue) + 1];
+static const SymbolHeaderMapping *getMappingPerLang(Lang L) {
+ return LanguageMappings[static_cast<unsigned>(L)];
+}
+
+static int countSymbols(Lang Language) {
+ unsigned SymCount = 0;
+#define SYMBOL(Name, NS, Header) ++SymCount;
+ switch (Language) {
+ case Lang::C:
#include "clang/Tooling/Inclusions/CSymbolMap.inc"
+ break;
+ case Lang::CXX:
#include "clang/Tooling/Inclusions/StdSymbolMap.inc"
+ break;
+ }
#undef SYMBOL
- SymbolNames =
- new std::remove_reference_t<decltype(*SymbolNames)>[SymbolCount];
- SymbolHeaderIDs =
- new std::remove_reference_t<decltype(*SymbolHeaderIDs)>[SymbolCount];
- NamespaceSymbols = new std::remove_reference_t<decltype(*NamespaceSymbols)>;
- HeaderIDs = new std::remove_reference_t<decltype(*HeaderIDs)>;
+ return SymCount;
+}
+
+static int initialize(Lang Language) {
+ SymbolHeaderMapping *Mapping = new SymbolHeaderMapping();
+ LanguageMappings[static_cast<unsigned>(Language)] = Mapping;
+ unsigned SymCount = countSymbols(Language);
+ Mapping->SymbolCount = SymCount;
+ Mapping->SymbolNames =
+ new std::remove_reference_t<decltype(*Mapping->SymbolNames)>[SymCount];
+ Mapping->SymbolHeaderIDs = new std::remove_reference_t<
+ decltype(*Mapping->SymbolHeaderIDs)>[SymCount];
+ Mapping->NamespaceSymbols =
+ new std::remove_reference_t<decltype(*Mapping->NamespaceSymbols)>;
+ Mapping->HeaderIDs =
+ new std::remove_reference_t<decltype(*Mapping->HeaderIDs)>;
auto AddNS = [&](llvm::StringRef NS) -> NSSymbolMap & {
- auto R = NamespaceSymbols->try_emplace(NS, nullptr);
+ auto R = Mapping->NamespaceSymbols->try_emplace(NS, nullptr);
if (R.second)
R.first->second = new NSSymbolMap();
return *R.first->second;
};
auto AddHeader = [&](llvm::StringRef Header) -> unsigned {
- return HeaderIDs->try_emplace(Header, HeaderIDs->size()).first->second;
+ return Mapping->HeaderIDs->try_emplace(Header, Mapping->HeaderIDs->size())
+ .first->second;
};
auto Add = [&, SymIndex(0)](llvm::StringRef QName, unsigned NSLen,
@@ -66,9 +95,9 @@ static int initialize() {
NSLen = 0;
}
- SymbolNames[SymIndex] = {QName.data(), NSLen,
- static_cast<unsigned int>(QName.size() - NSLen)};
- SymbolHeaderIDs[SymIndex] = AddHeader(HeaderName);
+ Mapping->SymbolNames[SymIndex] = {
+ QName.data(), NSLen, static_cast<unsigned int>(QName.size() - NSLen)};
+ Mapping->SymbolHeaderIDs[SymIndex] = AddHeader(HeaderName);
NSSymbolMap &NSSymbols = AddNS(QName.take_front(NSLen));
NSSymbols.try_emplace(QName.drop_front(NSLen), SymIndex);
@@ -76,70 +105,89 @@ static int initialize() {
++SymIndex;
};
#define SYMBOL(Name, NS, Header) Add(#NS #Name, strlen(#NS), #Header);
+ switch (Language) {
+ case Lang::C:
#include "clang/Tooling/Inclusions/CSymbolMap.inc"
+ break;
+ case Lang::CXX:
#include "clang/Tooling/Inclusions/StdSymbolMap.inc"
+ break;
+ }
#undef SYMBOL
- HeaderNames = new llvm::StringRef[HeaderIDs->size()];
- for (const auto &E : *HeaderIDs)
- HeaderNames[E.second] = E.first;
+ Mapping->HeaderNames = new llvm::StringRef[Mapping->HeaderIDs->size()];
+ for (const auto &E : *Mapping->HeaderIDs)
+ Mapping->HeaderNames[E.second] = E.first;
return 0;
}
static void ensureInitialized() {
- static int Dummy = initialize();
+ static int Dummy = []() {
+ for (unsigned L = 0; L <= static_cast<unsigned>(Lang::LastValue); ++L)
+ initialize(static_cast<Lang>(L));
+ return 0;
+ }();
(void)Dummy;
}
-std::vector<Header> Header::all() {
+std::vector<Header> Header::all(Lang L) {
ensureInitialized();
std::vector<Header> Result;
- Result.reserve(HeaderIDs->size());
- for (unsigned I = 0, E = HeaderIDs->size(); I < E; ++I)
- Result.push_back(Header(I));
+ const auto *Mapping = getMappingPerLang(L);
+ Result.reserve(Mapping->HeaderIDs->size());
+ for (unsigned I = 0, E = Mapping->HeaderIDs->size(); I < E; ++I)
+ Result.push_back(Header(I, L));
return Result;
}
-std::optional<Header> Header::named(llvm::StringRef Name) {
+std::optional<Header> Header::named(llvm::StringRef Name, Lang L) {
ensureInitialized();
- auto It = HeaderIDs->find(Name);
- if (It == HeaderIDs->end())
+ const auto *Mapping = getMappingPerLang(L);
+ auto It = Mapping->HeaderIDs->find(Name);
+ if (It == Mapping->HeaderIDs->end())
return std::nullopt;
- return Header(It->second);
+ return Header(It->second, L);
+}
+llvm::StringRef Header::name() const {
+ return getMappingPerLang(Language)->HeaderNames[ID];
}
-llvm::StringRef Header::name() const { return HeaderNames[ID]; }
-std::vector<Symbol> Symbol::all() {
+std::vector<Symbol> Symbol::all(Lang L) {
ensureInitialized();
std::vector<Symbol> Result;
- Result.reserve(SymbolCount);
- for (unsigned I = 0, E = SymbolCount; I < E; ++I)
- Result.push_back(Symbol(I));
+ const auto *Mapping = getMappingPerLang(L);
+ Result.reserve(Mapping->SymbolCount);
+ for (unsigned I = 0, E = Mapping->SymbolCount; I < E; ++I)
+ Result.push_back(Symbol(I, L));
return Result;
}
llvm::StringRef Symbol::scope() const {
- SymbolName &S = SymbolNames[ID];
+ auto &S = getMappingPerLang(Language)->SymbolNames[ID];
return StringRef(S.Data, S.ScopeLen);
}
llvm::StringRef Symbol::name() const {
- SymbolName &S = SymbolNames[ID];
+ auto &S = getMappingPerLang(Language)->SymbolNames[ID];
return StringRef(S.Data + S.ScopeLen, S.NameLen);
}
llvm::StringRef Symbol::qualified_name() const {
- SymbolName &S = SymbolNames[ID];
+ auto &S = getMappingPerLang(Language)->SymbolNames[ID];
return StringRef(S.Data, S.ScopeLen + S.NameLen);
}
-std::optional<Symbol> Symbol::named(llvm::StringRef Scope,
- llvm::StringRef Name) {
+std::optional<Symbol> Symbol::named(llvm::StringRef Scope, llvm::StringRef Name,
+ Lang L) {
ensureInitialized();
- if (NSSymbolMap *NSSymbols = NamespaceSymbols->lookup(Scope)) {
+
+ if (NSSymbolMap *NSSymbols =
+ getMappingPerLang(L)->NamespaceSymbols->lookup(Scope)) {
auto It = NSSymbols->find(Name);
if (It != NSSymbols->end())
- return Symbol(It->second);
+ return Symbol(It->second, L);
}
return std::nullopt;
}
-Header Symbol::header() const { return Header(SymbolHeaderIDs[ID]); }
+Header Symbol::header() const {
+ return Header(getMappingPerLang(Language)->SymbolHeaderIDs[ID], Language);
+}
llvm::SmallVector<Header> Symbol::headers() const {
return {header()}; // FIXME: multiple in case of ambiguity
}
@@ -147,12 +195,22 @@ llvm::SmallVector<Header> Symbol::headers() const {
Recognizer::Recognizer() { ensureInitialized(); }
NSSymbolMap *Recognizer::namespaceSymbols(const NamespaceDecl *D) {
+ if (!D)
+ return nullptr;
+ Lang Language;
+ if (D->getLangOpts().CPlusPlus)
+ Language = Lang::CXX;
+ else if (D->getLangOpts().C11)
+ Language = Lang::C;
+ else
+ return nullptr;
+
auto It = NamespaceCache.find(D);
if (It != NamespaceCache.end())
return It->second;
NSSymbolMap *Result = [&]() -> NSSymbolMap * {
- if (D && D->isAnonymousNamespace())
+ if (D->isAnonymousNamespace())
return nullptr;
// Print the namespace and its parents ommitting inline scopes.
std::string Scope;
@@ -160,7 +218,7 @@ NSSymbolMap *Recognizer::namespaceSymbols(const NamespaceDecl *D) {
ND = llvm::dyn_cast_or_null<NamespaceDecl>(ND->getParent()))
if (!ND->isInlineNamespace() && !ND->isAnonymousNamespace())
Scope = ND->getName().str() + "::" + Scope;
- return NamespaceSymbols->lookup(Scope);
+ return getMappingPerLang(Language)->NamespaceSymbols->lookup(Scope);
}();
NamespaceCache.try_emplace(D, Result);
return Result;
@@ -200,7 +258,7 @@ std::optional<Symbol> Recognizer::operator()(const Decl *D) {
auto It = Symbols->find(Name);
if (It == Symbols->end())
return std::nullopt;
- return Symbol(It->second);
+ return Symbol(It->second, D->getLangOpts().CPlusPlus? Lang::CXX : Lang::C);
}
} // namespace stdlib
diff --git a/clang/unittests/Tooling/StandardLibraryTest.cpp b/clang/unittests/Tooling/StandardLibraryTest.cpp
index f8d9b6a5437d2..136b7b80dc4fb 100644
--- a/clang/unittests/Tooling/StandardLibraryTest.cpp
+++ b/clang/unittests/Tooling/StandardLibraryTest.cpp
@@ -40,6 +40,9 @@ TEST(StdlibTest, All) {
EXPECT_EQ(llvm::to_string(*VectorH), "<vector>");
EXPECT_FALSE(stdlib::Header::named("HeadersTests.cpp"));
+ EXPECT_TRUE(stdlib::Header::named("<vector>", stdlib::Lang::CXX));
+ EXPECT_FALSE(stdlib::Header::named("<vector>", stdlib::Lang::C));
+
auto Vector = stdlib::Symbol::named("std::", "vector");
EXPECT_TRUE(Vector);
EXPECT_EQ(Vector->scope(), "std::");
@@ -49,11 +52,21 @@ TEST(StdlibTest, All) {
EXPECT_FALSE(stdlib::Symbol::named("std::", "dongle"));
EXPECT_FALSE(stdlib::Symbol::named("clang::", "ASTContext"));
+ EXPECT_TRUE(stdlib::Symbol::named("std::", "vector", stdlib::Lang::CXX));
+ EXPECT_FALSE(stdlib::Symbol::named("std::", "vector", stdlib::Lang::C));
+
EXPECT_EQ(Vector->header(), *VectorH);
EXPECT_THAT(Vector->headers(), ElementsAre(*VectorH));
EXPECT_THAT(stdlib::Header::all(), Contains(*VectorH));
EXPECT_THAT(stdlib::Symbol::all(), Contains(*Vector));
+ EXPECT_FALSE(stdlib::Header::named("<stdint.h>"));
+ EXPECT_FALSE(stdlib::Header::named("<stdint.h>", stdlib::Lang::CXX));
+ EXPECT_TRUE(stdlib::Header::named("<stdint.h>", stdlib::Lang::C));
+
+ EXPECT_FALSE(stdlib::Symbol::named("", "int16_t"));
+ EXPECT_FALSE(stdlib::Symbol::named("", "int16_t", stdlib::Lang::CXX));
+ EXPECT_TRUE(stdlib::Symbol::named("", "int16_t", stdlib::Lang::C));
}
TEST(StdlibTest, Recognizer) {
@@ -104,10 +117,14 @@ TEST(StdlibTest, Recognizer) {
EXPECT_EQ(Recognizer(&VectorNonstd), std::nullopt);
EXPECT_EQ(Recognizer(Vec), stdlib::Symbol::named("std::", "vector"));
+ EXPECT_EQ(Recognizer(Vec),
+ stdlib::Symbol::named("std::", "vector", stdlib::Lang::CXX));
EXPECT_EQ(Recognizer(Nest), stdlib::Symbol::named("std::", "vector"));
EXPECT_EQ(Recognizer(Clock),
stdlib::Symbol::named("std::chrono::", "system_clock"));
EXPECT_EQ(Recognizer(CDivT), stdlib::Symbol::named("", "div_t"));
+ EXPECT_EQ(Recognizer(CDivT),
+ stdlib::Symbol::named("", "div_t", stdlib::Lang::C));
EXPECT_EQ(Recognizer(Sec), std::nullopt);
}
More information about the cfe-commits
mailing list