[clang-tools-extra] r358605 - [clangd] Recognize "don't include me directly" pattern, and suppress include insertion.

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Wed Apr 17 11:33:08 PDT 2019


Author: sammccall
Date: Wed Apr 17 11:33:07 2019
New Revision: 358605

URL: http://llvm.org/viewvc/llvm-project?rev=358605&view=rev
Log:
[clangd] Recognize "don't include me directly" pattern, and suppress include insertion.

Summary:
Typically used with umbrella headers, e.g. GTK:

 #if !defined (__GTK_H_INSIDE__) && !defined (GTK_COMPILATION)
 #error "Only <gtk/gtk.h> can be included directly."
 #endif

Heuristic is fairly conservative, a quick code search over github showed
a fair number of hits and few/no false positives. (Not all were umbrella
headers, but I'd be happy avoiding include insertion for all of them).

We may want to relax the heuristic later to catch more cases.

Reviewers: ioeric

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D60815

Modified:
    clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp
    clang-tools-extra/trunk/clangd/index/SymbolCollector.h
    clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp

Modified: clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp?rev=358605&r1=358604&r2=358605&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp Wed Apr 17 11:33:07 2019
@@ -128,48 +128,6 @@ bool shouldCollectIncludePath(index::Sym
   }
 }
 
-bool isSelfContainedHeader(FileID FID, const SourceManager &SM,
-                           const Preprocessor &PP) {
-  const FileEntry *FE = SM.getFileEntryForID(FID);
-  if (!FE)
-    return false;
-  return PP.getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE);
-}
-
-/// Gets a canonical include (URI of the header or <header> or "header") for
-/// header of \p FID (which should usually be the *expansion* file).
-/// Returns None if includes should not be inserted for this file.
-llvm::Optional<std::string>
-getIncludeHeader(llvm::StringRef QName, const SourceManager &SM,
-                 const Preprocessor &PP, FileID FID,
-                 const SymbolCollector::Options &Opts) {
-  const FileEntry *FE = SM.getFileEntryForID(FID);
-  if (!FE || FE->getName().empty())
-    return llvm::None;
-  llvm::StringRef Filename = FE->getName();
-  // If a file is mapped by canonical headers, use that mapping, regardless
-  // of whether it's an otherwise-good header (header guards etc).
-  if (Opts.Includes) {
-    llvm::StringRef Canonical = Opts.Includes->mapHeader(Filename, QName);
-    // If we had a mapping, always use it.
-    if (Canonical.startswith("<") || Canonical.startswith("\""))
-      return Canonical.str();
-    if (Canonical != Filename)
-      return toURI(SM, Canonical, Opts);
-  }
-  if (!isSelfContainedHeader(FID, SM, PP)) {
-    // A .inc or .def file is often included into a real header to define
-    // symbols (e.g. LLVM tablegen files).
-    if (Filename.endswith(".inc") || Filename.endswith(".def"))
-      return getIncludeHeader(QName, SM, PP,
-                              SM.getFileID(SM.getIncludeLoc(FID)), Opts);
-    // Conservatively refuse to insert #includes to files without guards.
-    return llvm::None;
-  }
-  // Standard case: just insert the file itself.
-  return toURI(SM, Filename, Opts);
-}
-
 // Return the symbol range of the token at \p TokLoc.
 std::pair<SymbolLocation::Position, SymbolLocation::Position>
 getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
@@ -452,9 +410,8 @@ bool SymbolCollector::handleMacroOccuren
 
   std::string Include;
   if (Opts.CollectIncludePath && shouldCollectIncludePath(S.SymInfo.Kind)) {
-    if (auto Header =
-            getIncludeHeader(Name->getName(), SM, *PP,
-                             SM.getDecomposedExpansionLoc(DefLoc).first, Opts))
+    if (auto Header = getIncludeHeader(
+            Name->getName(), SM.getDecomposedExpansionLoc(DefLoc).first))
       Include = std::move(*Header);
   }
   S.Signature = Signature;
@@ -533,6 +490,7 @@ void SymbolCollector::finish() {
   ReferencedMacros.clear();
   DeclRefs.clear();
   FilesToIndexCache.clear();
+  HeaderIsSelfContainedCache.clear();
 }
 
 const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID,
@@ -602,8 +560,7 @@ const Symbol *SymbolCollector::addDeclar
     // Use the expansion location to get the #include header since this is
     // where the symbol is exposed.
     if (auto Header = getIncludeHeader(
-            QName, SM, *PP,
-            SM.getDecomposedExpansionLoc(ND.getLocation()).first, Opts))
+            QName, SM.getDecomposedExpansionLoc(ND.getLocation()).first))
       Include = std::move(*Header);
   }
   if (!Include.empty())
@@ -639,5 +596,59 @@ void SymbolCollector::addDefinition(cons
   Symbols.insert(S);
 }
 
+/// Gets a canonical include (URI of the header or <header> or "header") for
+/// header of \p FID (which should usually be the *expansion* file).
+/// Returns None if includes should not be inserted for this file.
+llvm::Optional<std::string>
+SymbolCollector::getIncludeHeader(llvm::StringRef QName, FileID FID) {
+  const SourceManager &SM = ASTCtx->getSourceManager();
+  const FileEntry *FE = SM.getFileEntryForID(FID);
+  if (!FE || FE->getName().empty())
+    return llvm::None;
+  llvm::StringRef Filename = FE->getName();
+  // If a file is mapped by canonical headers, use that mapping, regardless
+  // of whether it's an otherwise-good header (header guards etc).
+  if (Opts.Includes) {
+    llvm::StringRef Canonical = Opts.Includes->mapHeader(Filename, QName);
+    // If we had a mapping, always use it.
+    if (Canonical.startswith("<") || Canonical.startswith("\""))
+      return Canonical.str();
+    if (Canonical != Filename)
+      return toURI(SM, Canonical, Opts);
+  }
+  if (!isSelfContainedHeader(FID)) {
+    // A .inc or .def file is often included into a real header to define
+    // symbols (e.g. LLVM tablegen files).
+    if (Filename.endswith(".inc") || Filename.endswith(".def"))
+      return getIncludeHeader(QName, SM.getFileID(SM.getIncludeLoc(FID)));
+    // Conservatively refuse to insert #includes to files without guards.
+    return llvm::None;
+  }
+  // Standard case: just insert the file itself.
+  return toURI(SM, Filename, Opts);
+}
+
+bool SymbolCollector::isSelfContainedHeader(FileID FID) {
+  // The real computation (which will be memoized).
+  auto Compute = [&] {
+    const SourceManager &SM = ASTCtx->getSourceManager();
+    const FileEntry *FE = SM.getFileEntryForID(FID);
+    if (!FE)
+      return false;
+    if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE))
+      return false;
+    // This pattern indicates that a header can't be used without
+    // particular preprocessor state, usually set up by another header.
+    if (DontIncludeMePattern.match(SM.getBufferData(FID)))
+      return false;
+    return true;
+  };
+
+  auto R = HeaderIsSelfContainedCache.try_emplace(FID, false);
+  if (R.second)
+    R.first->second = Compute();
+  return R.first->second;
+}
+
 } // namespace clangd
 } // namespace clang

Modified: clang-tools-extra/trunk/clangd/index/SymbolCollector.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/SymbolCollector.h?rev=358605&r1=358604&r2=358605&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/SymbolCollector.h (original)
+++ clang-tools-extra/trunk/clangd/index/SymbolCollector.h Wed Apr 17 11:33:07 2019
@@ -19,6 +19,7 @@
 #include "clang/Index/IndexSymbol.h"
 #include "clang/Sema/CodeCompleteConsumer.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Regex.h"
 #include <functional>
 
 namespace clang {
@@ -117,6 +118,15 @@ private:
                                bool IsMainFileSymbol);
   void addDefinition(const NamedDecl &, const Symbol &DeclSymbol);
 
+  llvm::Optional<std::string> getIncludeHeader(llvm::StringRef QName, FileID);
+  bool isSelfContainedHeader(FileID);
+  // Heuristic to detect headers that aren't self-contained, usually because
+  // they need to be included via an umbrella header. e.g. GTK matches this.
+  llvm::Regex DontIncludeMePattern = {
+      "^[ \t]*#[ \t]*if.*\n"         // An #if, #ifndef etc directive, then
+      "[ \t]*#[ \t]*error.*include", // an #error directive mentioning "include"
+      llvm::Regex::Newline};
+
   // All Symbols collected from the AST.
   SymbolSlab::Builder Symbols;
   // All refs collected from the AST.
@@ -141,6 +151,7 @@ private:
   llvm::DenseMap<const Decl *, const Decl *> CanonicalDecls;
   // Cache whether to index a file or not.
   llvm::DenseMap<FileID, bool> FilesToIndexCache;
+  llvm::DenseMap<FileID, bool> HeaderIsSelfContainedCache;
 };
 
 } // namespace clangd

Modified: clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp?rev=358605&r1=358604&r2=358605&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp Wed Apr 17 11:33:07 2019
@@ -272,8 +272,8 @@ public:
         Args, Factory->create(), Files.get(),
         std::make_shared<PCHContainerOperations>());
 
-    InMemoryFileSystem->addFile(TestHeaderName, 0,
-                                llvm::MemoryBuffer::getMemBuffer(HeaderCode));
+    InMemoryFileSystem->addFile(
+        TestHeaderName, 0, llvm::MemoryBuffer::getMemBuffer(HeaderCode));
     InMemoryFileSystem->addFile(TestFileName, 0,
                                 llvm::MemoryBuffer::getMemBuffer(MainCode));
     Invocation.run();
@@ -1064,8 +1064,19 @@ TEST_F(SymbolCollectorTest, NonModularHe
   auto TU = TestTU::withHeaderCode("int x();");
   EXPECT_THAT(TU.headerSymbols(), ElementsAre(IncludeHeader()));
 
+  // Files missing include guards aren't eligible for insertion.
   TU.ImplicitHeaderGuard = false;
   EXPECT_THAT(TU.headerSymbols(), ElementsAre(Not(IncludeHeader())));
+
+  // We recognize some patterns of trying to prevent insertion.
+  TU = TestTU::withHeaderCode(R"cpp(
+#ifndef SECRET
+#error "This file isn't safe to include directly"
+#endif
+    int x();
+    )cpp");
+  TU.ExtraArgs.push_back("-DSECRET"); // *we're* able to include it.
+  EXPECT_THAT(TU.headerSymbols(), ElementsAre(Not(IncludeHeader())));
 }
 
 TEST_F(SymbolCollectorTest, AvoidUsingFwdDeclsAsCanonicalDecls) {




More information about the cfe-commits mailing list