[clang] 1285172 - [include-mapping] Implement language separation in stdlib recognizer library

Haojian Wu via cfe-commits cfe-commits at lists.llvm.org
Fri Feb 3 08:05:34 PST 2023


Author: Viktoriia Bakalova
Date: 2023-02-03T17:05:25+01:00
New Revision: 1285172c21ef4867d9f895c0b2ab0f338c46e36f

URL: https://github.com/llvm/llvm-project/commit/1285172c21ef4867d9f895c0b2ab0f338c46e36f
DIFF: https://github.com/llvm/llvm-project/commit/1285172c21ef4867d9f895c0b2ab0f338c46e36f.diff

LOG: [include-mapping] Implement language separation in stdlib recognizer library

Differential Revision: https://reviews.llvm.org/D142992

Added: 
    

Modified: 
    clang/include/clang/Tooling/Inclusions/StandardLibrary.h
    clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp
    clang/unittests/Tooling/StandardLibraryTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Tooling/Inclusions/StandardLibrary.h b/clang/include/clang/Tooling/Inclusions/StandardLibrary.h
index 6dc8d6d09390b..60509a9cebcab 100644
--- a/clang/include/clang/Tooling/Inclusions/StandardLibrary.h
+++ b/clang/include/clang/Tooling/Inclusions/StandardLibrary.h
@@ -30,6 +30,7 @@ namespace tooling {
 namespace stdlib {
 
 class Symbol;
+enum class Lang { C = 0, CXX, LastValue = CXX };
 
 // A standard library header, such as <iostream>
 // Lightweight class, in fact just an index into a table.
@@ -37,9 +38,10 @@ class Symbol;
 // "<cstdio>" and "<stdio.h>" (and their symbols) are treated 
diff erently.
 class Header {
 public:
-  static std::vector<Header> all();
+  static std::vector<Header> all(Lang L = Lang::CXX);
   // Name should contain the angle brackets, e.g. "<vector>".
-  static std::optional<Header> named(llvm::StringRef Name);
+  static std::optional<Header> named(llvm::StringRef Name,
+                                     Lang Language = Lang::CXX);
 
   friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Header &H) {
     return OS << H.name();
@@ -47,8 +49,10 @@ class Header {
   llvm::StringRef name() const;
 
 private:
-  Header(unsigned ID) : ID(ID) {}
+  Header(unsigned ID, Lang Language) : ID(ID), Language(Language) {}
   unsigned ID;
+  Lang Language;
+
   friend Symbol;
   friend llvm::DenseMapInfo<Header>;
   friend bool operator==(const Header &L, const Header &R) {
@@ -64,11 +68,11 @@ class Header {
 // for them.
 class Symbol {
 public:
-  static std::vector<Symbol> all();
+  static std::vector<Symbol> all(Lang L = Lang::CXX);
   /// \p Scope should have the trailing "::", for example:
   /// named("std::chrono::", "system_clock")
-  static std::optional<Symbol> named(llvm::StringRef Scope,
-                                      llvm::StringRef Name);
+  static std::optional<Symbol>
+  named(llvm::StringRef Scope, llvm::StringRef Name, Lang Language = Lang::CXX);
 
   friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S) {
     return OS << S.qualified_name();
@@ -82,8 +86,10 @@ class Symbol {
   llvm::SmallVector<Header> headers() const;
 
 private:
-  Symbol(unsigned ID) : ID(ID) {}
+  Symbol(unsigned ID, Lang Language) : ID(ID), Language(Language) {}
   unsigned ID;
+  Lang Language;
+
   friend class Recognizer;
   friend llvm::DenseMapInfo<Symbol>;
   friend bool operator==(const Symbol &L, const Symbol &R) {
@@ -114,10 +120,12 @@ namespace llvm {
 
 template <> struct DenseMapInfo<clang::tooling::stdlib::Header> {
   static inline clang::tooling::stdlib::Header getEmptyKey() {
-    return clang::tooling::stdlib::Header(-1);
+    return clang::tooling::stdlib::Header(-1,
+                                          clang::tooling::stdlib::Lang::CXX);
   }
   static inline clang::tooling::stdlib::Header getTombstoneKey() {
-    return clang::tooling::stdlib::Header(-2);
+    return clang::tooling::stdlib::Header(-2,
+                                          clang::tooling::stdlib::Lang::CXX);
   }
   static unsigned getHashValue(const clang::tooling::stdlib::Header &H) {
     return hash_value(H.ID);
@@ -130,10 +138,12 @@ template <> struct DenseMapInfo<clang::tooling::stdlib::Header> {
 
 template <> struct DenseMapInfo<clang::tooling::stdlib::Symbol> {
   static inline clang::tooling::stdlib::Symbol getEmptyKey() {
-    return clang::tooling::stdlib::Symbol(-1);
+    return clang::tooling::stdlib::Symbol(-1,
+                                          clang::tooling::stdlib::Lang::CXX);
   }
   static inline clang::tooling::stdlib::Symbol getTombstoneKey() {
-    return clang::tooling::stdlib::Symbol(-2);
+    return clang::tooling::stdlib::Symbol(-2,
+                                          clang::tooling::stdlib::Lang::CXX);
   }
   static unsigned getHashValue(const clang::tooling::stdlib::Symbol &S) {
     return hash_value(S.ID);

diff  --git a/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp b/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp
index 9d06657df18a2..add8414eaf091 100644
--- a/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp
+++ b/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp
@@ -8,6 +8,7 @@
 
 #include "clang/Tooling/Inclusions/StandardLibrary.h"
 #include "clang/AST/Decl.h"
+#include "clang/Basic/LangOptions.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
 
@@ -15,46 +16,74 @@ namespace clang {
 namespace tooling {
 namespace stdlib {
 
-// Header::ID => header name
-static llvm::StringRef *HeaderNames;
-// Header name => Header::ID
-static llvm::DenseMap<llvm::StringRef, unsigned> *HeaderIDs;
-
-static unsigned SymbolCount = 0;
-// Symbol::ID => symbol qualified_name/name/scope
-static struct SymbolName {
-  const char *Data;  // std::vector
-  unsigned ScopeLen; // ~~~~~
-  unsigned NameLen;  //      ~~~~~~
-} *SymbolNames;
+namespace {
 // Symbol name -> Symbol::ID, within a namespace.
 using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>;
-static llvm::DenseMap<llvm::StringRef, NSSymbolMap *> *NamespaceSymbols;
-// Symbol::ID => Header::ID
-static unsigned *SymbolHeaderIDs;
 
-static int initialize() {
-  SymbolCount = 0;
-#define SYMBOL(Name, NS, Header) ++SymbolCount;
+// A Mapping per language.
+struct SymbolHeaderMapping {
+  llvm::StringRef *HeaderNames = nullptr;
+  // Header name => Header::ID
+  llvm::DenseMap<llvm::StringRef, unsigned> *HeaderIDs;
+
+  unsigned SymbolCount = 0;
+  // Symbol::ID => symbol qualified_name/name/scope
+  struct SymbolName {
+    const char *Data;  // std::vector
+    unsigned ScopeLen; // ~~~~~
+    unsigned NameLen;  //      ~~~~~~
+  } *SymbolNames = nullptr;
+  // Symbol name -> Symbol::ID, within a namespace.
+  llvm::DenseMap<llvm::StringRef, NSSymbolMap *> *NamespaceSymbols = nullptr;
+  // Symbol::ID => Header::ID
+  unsigned *SymbolHeaderIDs = nullptr;
+};
+} // namespace
+static SymbolHeaderMapping
+    *LanguageMappings[static_cast<unsigned>(Lang::LastValue) + 1];
+static const SymbolHeaderMapping *getMappingPerLang(Lang L) {
+  return LanguageMappings[static_cast<unsigned>(L)];
+}
+
+static int countSymbols(Lang Language) {
+  unsigned SymCount = 0;
+#define SYMBOL(Name, NS, Header) ++SymCount;
+  switch (Language) {
+  case Lang::C:
 #include "clang/Tooling/Inclusions/CSymbolMap.inc"
+    break;
+  case Lang::CXX:
 #include "clang/Tooling/Inclusions/StdSymbolMap.inc"
+    break;
+  }
 #undef SYMBOL
-  SymbolNames =
-      new std::remove_reference_t<decltype(*SymbolNames)>[SymbolCount];
-  SymbolHeaderIDs =
-      new std::remove_reference_t<decltype(*SymbolHeaderIDs)>[SymbolCount];
-  NamespaceSymbols = new std::remove_reference_t<decltype(*NamespaceSymbols)>;
-  HeaderIDs = new std::remove_reference_t<decltype(*HeaderIDs)>;
+  return SymCount;
+}
+
+static int initialize(Lang Language) {
+  SymbolHeaderMapping *Mapping = new SymbolHeaderMapping();
+  LanguageMappings[static_cast<unsigned>(Language)] = Mapping;
 
+  unsigned SymCount = countSymbols(Language);
+  Mapping->SymbolCount = SymCount;
+  Mapping->SymbolNames =
+      new std::remove_reference_t<decltype(*Mapping->SymbolNames)>[SymCount];
+  Mapping->SymbolHeaderIDs = new std::remove_reference_t<
+      decltype(*Mapping->SymbolHeaderIDs)>[SymCount];
+  Mapping->NamespaceSymbols =
+      new std::remove_reference_t<decltype(*Mapping->NamespaceSymbols)>;
+  Mapping->HeaderIDs =
+      new std::remove_reference_t<decltype(*Mapping->HeaderIDs)>;
   auto AddNS = [&](llvm::StringRef NS) -> NSSymbolMap & {
-    auto R = NamespaceSymbols->try_emplace(NS, nullptr);
+    auto R = Mapping->NamespaceSymbols->try_emplace(NS, nullptr);
     if (R.second)
       R.first->second = new NSSymbolMap();
     return *R.first->second;
   };
 
   auto AddHeader = [&](llvm::StringRef Header) -> unsigned {
-    return HeaderIDs->try_emplace(Header, HeaderIDs->size()).first->second;
+    return Mapping->HeaderIDs->try_emplace(Header, Mapping->HeaderIDs->size())
+        .first->second;
   };
 
   auto Add = [&, SymIndex(0)](llvm::StringRef QName, unsigned NSLen,
@@ -66,9 +95,9 @@ static int initialize() {
       NSLen = 0;
     }
 
-    SymbolNames[SymIndex] = {QName.data(), NSLen,
-                             static_cast<unsigned int>(QName.size() - NSLen)};
-    SymbolHeaderIDs[SymIndex] = AddHeader(HeaderName);
+    Mapping->SymbolNames[SymIndex] = {
+        QName.data(), NSLen, static_cast<unsigned int>(QName.size() - NSLen)};
+    Mapping->SymbolHeaderIDs[SymIndex] = AddHeader(HeaderName);
 
     NSSymbolMap &NSSymbols = AddNS(QName.take_front(NSLen));
     NSSymbols.try_emplace(QName.drop_front(NSLen), SymIndex);
@@ -76,70 +105,89 @@ static int initialize() {
     ++SymIndex;
   };
 #define SYMBOL(Name, NS, Header) Add(#NS #Name, strlen(#NS), #Header);
+  switch (Language) {
+  case Lang::C:
 #include "clang/Tooling/Inclusions/CSymbolMap.inc"
+    break;
+  case Lang::CXX:
 #include "clang/Tooling/Inclusions/StdSymbolMap.inc"
+    break;
+  }
 #undef SYMBOL
 
-  HeaderNames = new llvm::StringRef[HeaderIDs->size()];
-  for (const auto &E : *HeaderIDs)
-    HeaderNames[E.second] = E.first;
+  Mapping->HeaderNames = new llvm::StringRef[Mapping->HeaderIDs->size()];
+  for (const auto &E : *Mapping->HeaderIDs)
+    Mapping->HeaderNames[E.second] = E.first;
 
   return 0;
 }
 
 static void ensureInitialized() {
-  static int Dummy = initialize();
+  static int Dummy = []() {
+    for (unsigned L = 0; L <= static_cast<unsigned>(Lang::LastValue); ++L)
+      initialize(static_cast<Lang>(L));
+    return 0;
+  }();
   (void)Dummy;
 }
 
-std::vector<Header> Header::all() {
+std::vector<Header> Header::all(Lang L) {
   ensureInitialized();
   std::vector<Header> Result;
-  Result.reserve(HeaderIDs->size());
-  for (unsigned I = 0, E = HeaderIDs->size(); I < E; ++I)
-    Result.push_back(Header(I));
+  const auto *Mapping = getMappingPerLang(L);
+  Result.reserve(Mapping->HeaderIDs->size());
+  for (unsigned I = 0, E = Mapping->HeaderIDs->size(); I < E; ++I)
+    Result.push_back(Header(I, L));
   return Result;
 }
-std::optional<Header> Header::named(llvm::StringRef Name) {
+std::optional<Header> Header::named(llvm::StringRef Name, Lang L) {
   ensureInitialized();
-  auto It = HeaderIDs->find(Name);
-  if (It == HeaderIDs->end())
+  const auto *Mapping = getMappingPerLang(L);
+  auto It = Mapping->HeaderIDs->find(Name);
+  if (It == Mapping->HeaderIDs->end())
     return std::nullopt;
-  return Header(It->second);
+  return Header(It->second, L);
+}
+llvm::StringRef Header::name() const {
+  return getMappingPerLang(Language)->HeaderNames[ID];
 }
-llvm::StringRef Header::name() const { return HeaderNames[ID]; }
 
-std::vector<Symbol> Symbol::all() {
+std::vector<Symbol> Symbol::all(Lang L) {
   ensureInitialized();
   std::vector<Symbol> Result;
-  Result.reserve(SymbolCount);
-  for (unsigned I = 0, E = SymbolCount; I < E; ++I)
-    Result.push_back(Symbol(I));
+  const auto *Mapping = getMappingPerLang(L);
+  Result.reserve(Mapping->SymbolCount);
+  for (unsigned I = 0, E = Mapping->SymbolCount; I < E; ++I)
+    Result.push_back(Symbol(I, L));
   return Result;
 }
 llvm::StringRef Symbol::scope() const {
-  SymbolName &S = SymbolNames[ID];
+  auto &S = getMappingPerLang(Language)->SymbolNames[ID];
   return StringRef(S.Data, S.ScopeLen);
 }
 llvm::StringRef Symbol::name() const {
-  SymbolName &S = SymbolNames[ID];
+  auto &S = getMappingPerLang(Language)->SymbolNames[ID];
   return StringRef(S.Data + S.ScopeLen, S.NameLen);
 }
 llvm::StringRef Symbol::qualified_name() const {
-  SymbolName &S = SymbolNames[ID];
+  auto &S = getMappingPerLang(Language)->SymbolNames[ID];
   return StringRef(S.Data, S.ScopeLen + S.NameLen);
 }
-std::optional<Symbol> Symbol::named(llvm::StringRef Scope,
-                                     llvm::StringRef Name) {
+std::optional<Symbol> Symbol::named(llvm::StringRef Scope, llvm::StringRef Name,
+                                    Lang L) {
   ensureInitialized();
-  if (NSSymbolMap *NSSymbols = NamespaceSymbols->lookup(Scope)) {
+
+  if (NSSymbolMap *NSSymbols =
+          getMappingPerLang(L)->NamespaceSymbols->lookup(Scope)) {
     auto It = NSSymbols->find(Name);
     if (It != NSSymbols->end())
-      return Symbol(It->second);
+      return Symbol(It->second, L);
   }
   return std::nullopt;
 }
-Header Symbol::header() const { return Header(SymbolHeaderIDs[ID]); }
+Header Symbol::header() const {
+  return Header(getMappingPerLang(Language)->SymbolHeaderIDs[ID], Language);
+}
 llvm::SmallVector<Header> Symbol::headers() const {
   return {header()}; // FIXME: multiple in case of ambiguity
 }
@@ -147,12 +195,22 @@ llvm::SmallVector<Header> Symbol::headers() const {
 Recognizer::Recognizer() { ensureInitialized(); }
 
 NSSymbolMap *Recognizer::namespaceSymbols(const NamespaceDecl *D) {
+  if (!D)
+    return nullptr;
+  Lang Language;
+  if (D->getLangOpts().CPlusPlus)
+    Language = Lang::CXX;
+  else if (D->getLangOpts().C11)
+    Language = Lang::C;
+  else
+    return nullptr;
+
   auto It = NamespaceCache.find(D);
   if (It != NamespaceCache.end())
     return It->second;
 
   NSSymbolMap *Result = [&]() -> NSSymbolMap * {
-    if (D && D->isAnonymousNamespace())
+    if (D->isAnonymousNamespace())
       return nullptr;
     // Print the namespace and its parents ommitting inline scopes.
     std::string Scope;
@@ -160,7 +218,7 @@ NSSymbolMap *Recognizer::namespaceSymbols(const NamespaceDecl *D) {
          ND = llvm::dyn_cast_or_null<NamespaceDecl>(ND->getParent()))
       if (!ND->isInlineNamespace() && !ND->isAnonymousNamespace())
         Scope = ND->getName().str() + "::" + Scope;
-    return NamespaceSymbols->lookup(Scope);
+    return getMappingPerLang(Language)->NamespaceSymbols->lookup(Scope);
   }();
   NamespaceCache.try_emplace(D, Result);
   return Result;
@@ -200,7 +258,7 @@ std::optional<Symbol> Recognizer::operator()(const Decl *D) {
   auto It = Symbols->find(Name);
   if (It == Symbols->end())
     return std::nullopt;
-  return Symbol(It->second);
+  return Symbol(It->second, D->getLangOpts().CPlusPlus? Lang::CXX : Lang::C);
 }
 
 } // namespace stdlib

diff  --git a/clang/unittests/Tooling/StandardLibraryTest.cpp b/clang/unittests/Tooling/StandardLibraryTest.cpp
index f8d9b6a5437d2..136b7b80dc4fb 100644
--- a/clang/unittests/Tooling/StandardLibraryTest.cpp
+++ b/clang/unittests/Tooling/StandardLibraryTest.cpp
@@ -40,6 +40,9 @@ TEST(StdlibTest, All) {
   EXPECT_EQ(llvm::to_string(*VectorH), "<vector>");
   EXPECT_FALSE(stdlib::Header::named("HeadersTests.cpp"));
 
+  EXPECT_TRUE(stdlib::Header::named("<vector>", stdlib::Lang::CXX));
+  EXPECT_FALSE(stdlib::Header::named("<vector>", stdlib::Lang::C));
+
   auto Vector = stdlib::Symbol::named("std::", "vector");
   EXPECT_TRUE(Vector);
   EXPECT_EQ(Vector->scope(), "std::");
@@ -49,11 +52,21 @@ TEST(StdlibTest, All) {
   EXPECT_FALSE(stdlib::Symbol::named("std::", "dongle"));
   EXPECT_FALSE(stdlib::Symbol::named("clang::", "ASTContext"));
 
+  EXPECT_TRUE(stdlib::Symbol::named("std::", "vector", stdlib::Lang::CXX));
+  EXPECT_FALSE(stdlib::Symbol::named("std::", "vector", stdlib::Lang::C));
+
   EXPECT_EQ(Vector->header(), *VectorH);
   EXPECT_THAT(Vector->headers(), ElementsAre(*VectorH));
 
   EXPECT_THAT(stdlib::Header::all(), Contains(*VectorH));
   EXPECT_THAT(stdlib::Symbol::all(), Contains(*Vector));
+  EXPECT_FALSE(stdlib::Header::named("<stdint.h>"));
+  EXPECT_FALSE(stdlib::Header::named("<stdint.h>", stdlib::Lang::CXX));
+  EXPECT_TRUE(stdlib::Header::named("<stdint.h>", stdlib::Lang::C));
+
+  EXPECT_FALSE(stdlib::Symbol::named("", "int16_t"));
+  EXPECT_FALSE(stdlib::Symbol::named("", "int16_t", stdlib::Lang::CXX));
+  EXPECT_TRUE(stdlib::Symbol::named("", "int16_t", stdlib::Lang::C));
 }
 
 TEST(StdlibTest, Recognizer) {
@@ -104,10 +117,14 @@ TEST(StdlibTest, Recognizer) {
 
   EXPECT_EQ(Recognizer(&VectorNonstd), std::nullopt);
   EXPECT_EQ(Recognizer(Vec), stdlib::Symbol::named("std::", "vector"));
+  EXPECT_EQ(Recognizer(Vec),
+            stdlib::Symbol::named("std::", "vector", stdlib::Lang::CXX));
   EXPECT_EQ(Recognizer(Nest), stdlib::Symbol::named("std::", "vector"));
   EXPECT_EQ(Recognizer(Clock),
             stdlib::Symbol::named("std::chrono::", "system_clock"));
   EXPECT_EQ(Recognizer(CDivT), stdlib::Symbol::named("", "div_t"));
+  EXPECT_EQ(Recognizer(CDivT),
+            stdlib::Symbol::named("", "div_t", stdlib::Lang::C));
   EXPECT_EQ(Recognizer(Sec), std::nullopt);
 }
 


        


More information about the cfe-commits mailing list