[clang-tools-extra] cd0ca5a - [clangd] Record information about non self-contained headers in IncludeStructure

Kirill Bobyrev via cfe-commits cfe-commits at lists.llvm.org
Fri Nov 26 05:13:13 PST 2021


Author: Kirill Bobyrev
Date: 2021-11-26T14:12:54+01:00
New Revision: cd0ca5a0eaa1b75b445e82753ea093bbb8e7e85c

URL: https://github.com/llvm/llvm-project/commit/cd0ca5a0eaa1b75b445e82753ea093bbb8e7e85c
DIFF: https://github.com/llvm/llvm-project/commit/cd0ca5a0eaa1b75b445e82753ea093bbb8e7e85c.diff

LOG: [clangd] Record information about non self-contained headers in IncludeStructure

This will be useful for IncludeCleaner.

Reviewed By: sammccall

Differential Revision: https://reviews.llvm.org/D114370

Added: 
    

Modified: 
    clang-tools-extra/clangd/CodeComplete.cpp
    clang-tools-extra/clangd/Headers.cpp
    clang-tools-extra/clangd/Headers.h
    clang-tools-extra/clangd/ParsedAST.cpp
    clang-tools-extra/clangd/Preamble.cpp
    clang-tools-extra/clangd/SourceCode.cpp
    clang-tools-extra/clangd/SourceCode.h
    clang-tools-extra/clangd/index/SymbolCollector.cpp
    clang-tools-extra/clangd/unittests/HeadersTests.cpp
    clang-tools-extra/clangd/unittests/PreambleTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp
index ac6c3b077d47..63111c264c73 100644
--- a/clang-tools-extra/clangd/CodeComplete.cpp
+++ b/clang-tools-extra/clangd/CodeComplete.cpp
@@ -1272,8 +1272,7 @@ bool semaCodeComplete(std::unique_ptr<CodeCompleteConsumer> Consumer,
   // Force them to be deserialized so SemaCodeComplete sees them.
   loadMainFilePreambleMacros(Clang->getPreprocessor(), Input.Preamble);
   if (Includes)
-    Clang->getPreprocessor().addPPCallbacks(
-        Includes->collect(Clang->getSourceManager()));
+    Clang->getPreprocessor().addPPCallbacks(Includes->collect(*Clang));
   if (llvm::Error Err = Action.Execute()) {
     log("Execute() failed when running codeComplete for {0}: {1}",
         Input.FileName, toString(std::move(Err)));

diff  --git a/clang-tools-extra/clangd/Headers.cpp b/clang-tools-extra/clangd/Headers.cpp
index 9f5ab244aa63..dc113068ef3a 100644
--- a/clang-tools-extra/clangd/Headers.cpp
+++ b/clang-tools-extra/clangd/Headers.cpp
@@ -22,12 +22,12 @@
 
 namespace clang {
 namespace clangd {
-namespace {
 
-class RecordHeaders : public PPCallbacks {
+class IncludeStructure::RecordHeaders : public PPCallbacks {
 public:
-  RecordHeaders(const SourceManager &SM, IncludeStructure *Out)
-      : SM(SM), Out(Out) {}
+  RecordHeaders(const SourceManager &SM, HeaderSearch &HeaderInfo,
+                IncludeStructure *Out)
+      : SM(SM), HeaderInfo(HeaderInfo), Out(Out) {}
 
   // Record existing #includes - both written and resolved paths. Only #includes
   // in the main file are collected.
@@ -85,10 +85,17 @@ class RecordHeaders : public PPCallbacks {
         InBuiltinFile = true;
       }
       break;
-    case PPCallbacks::ExitFile:
+    case PPCallbacks::ExitFile: {
       if (PrevFID == BuiltinFile)
         InBuiltinFile = false;
+      // At file exit time HeaderSearchInfo is valid and can be used to
+      // determine whether the file was a self-contained header or not.
+      if (const FileEntry *FE = SM.getFileEntryForID(PrevFID))
+        if (!isSelfContainedHeader(FE, PrevFID, SM, HeaderInfo))
+          Out->NonSelfContained.insert(
+              *Out->getID(SM.getFileEntryForID(PrevFID)));
       break;
+    }
     case PPCallbacks::RenameFile:
     case PPCallbacks::SystemHeaderPragma:
       break;
@@ -97,6 +104,7 @@ class RecordHeaders : public PPCallbacks {
 
 private:
   const SourceManager &SM;
+  HeaderSearch &HeaderInfo;
   // Set after entering the <built-in> file.
   FileID BuiltinFile;
   // Indicates whether <built-in> file is part of include stack.
@@ -105,8 +113,6 @@ class RecordHeaders : public PPCallbacks {
   IncludeStructure *Out;
 };
 
-} // namespace
-
 bool isLiteralInclude(llvm::StringRef Include) {
   return Include.startswith("<") || Include.startswith("\"");
 }
@@ -152,9 +158,11 @@ llvm::SmallVector<llvm::StringRef, 1> getRankedIncludes(const Symbol &Sym) {
 }
 
 std::unique_ptr<PPCallbacks>
-IncludeStructure::collect(const SourceManager &SM) {
+IncludeStructure::collect(const CompilerInstance &CI) {
+  auto &SM = CI.getSourceManager();
   MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
-  return std::make_unique<RecordHeaders>(SM, this);
+  return std::make_unique<RecordHeaders>(
+      SM, CI.getPreprocessor().getHeaderSearchInfo(), this);
 }
 
 llvm::Optional<IncludeStructure::HeaderID>

diff  --git a/clang-tools-extra/clangd/Headers.h b/clang-tools-extra/clangd/Headers.h
index 7b42598b819c..b6c67478568f 100644
--- a/clang-tools-extra/clangd/Headers.h
+++ b/clang-tools-extra/clangd/Headers.h
@@ -17,10 +17,12 @@
 #include "clang/Basic/FileEntry.h"
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Format/Format.h"
+#include "clang/Frontend/CompilerInstance.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Tooling/Inclusions/HeaderIncludes.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Support/Error.h"
@@ -123,7 +125,7 @@ class IncludeStructure {
 
   // Returns a PPCallback that visits all inclusions in the main file and
   // populates the structure.
-  std::unique_ptr<PPCallbacks> collect(const SourceManager &SM);
+  std::unique_ptr<PPCallbacks> collect(const CompilerInstance &CI);
 
   // HeaderID identifies file in the include graph. It corresponds to a
   // FileEntry rather than a FileID, but stays stable across preamble & main
@@ -138,6 +140,10 @@ class IncludeStructure {
     return RealPathNames[static_cast<unsigned>(ID)];
   }
 
+  bool isSelfContained(HeaderID ID) const {
+    return !NonSelfContained.contains(ID);
+  }
+
   // Return all transitively reachable files.
   llvm::ArrayRef<std::string> allHeaders() const { return RealPathNames; }
 
@@ -158,6 +164,8 @@ class IncludeStructure {
   // content of the main file changes.
   static const HeaderID MainFileID = HeaderID(0u);
 
+  class RecordHeaders;
+
 private:
   // MainFileEntry will be used to check if the queried file is the main file
   // or not.
@@ -170,6 +178,9 @@ class IncludeStructure {
   // and RealPathName and UniqueID are not preserved in
   // the preamble.
   llvm::DenseMap<llvm::sys::fs::UniqueID, HeaderID> UIDToIndex;
+  // Contains HeaderIDs of all non self-contained entries in the
+  // IncludeStructure.
+  llvm::DenseSet<HeaderID> NonSelfContained;
 };
 
 // Calculates insertion edit for including a new header in a file.

diff  --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp
index 719c374d8b85..5239bc3d185a 100644
--- a/clang-tools-extra/clangd/ParsedAST.cpp
+++ b/clang-tools-extra/clangd/ParsedAST.cpp
@@ -446,8 +446,7 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs,
   // Important: collectIncludeStructure is registered *after* ReplayPreamble!
   // Otherwise we would collect the replayed includes again...
   // (We can't *just* use the replayed includes, they don't have Resolved path).
-  Clang->getPreprocessor().addPPCallbacks(
-      Includes.collect(Clang->getSourceManager()));
+  Clang->getPreprocessor().addPPCallbacks(Includes.collect(*Clang));
   // Copy over the macros in the preamble region of the main file, and combine
   // with non-preamble macros below.
   MainFileMacros Macros;

diff  --git a/clang-tools-extra/clangd/Preamble.cpp b/clang-tools-extra/clangd/Preamble.cpp
index 8b80ee1bc265..79165e392df6 100644
--- a/clang-tools-extra/clangd/Preamble.cpp
+++ b/clang-tools-extra/clangd/Preamble.cpp
@@ -23,6 +23,7 @@
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Frontend/FrontendActions.h"
+#include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/Preprocessor.h"
@@ -98,6 +99,7 @@ class CppFilePreambleCallbacks : public PreambleCallbacks {
     CanonIncludes.addSystemHeadersMapping(CI.getLangOpts());
     LangOpts = &CI.getLangOpts();
     SourceMgr = &CI.getSourceManager();
+    Compiler = &CI;
   }
 
   std::unique_ptr<PPCallbacks> createPPCallbacks() override {
@@ -105,7 +107,7 @@ class CppFilePreambleCallbacks : public PreambleCallbacks {
            "SourceMgr and LangOpts must be set at this point");
 
     return std::make_unique<PPChainedCallbacks>(
-        Includes.collect(*SourceMgr),
+        Includes.collect(*Compiler),
         std::make_unique<PPChainedCallbacks>(
             std::make_unique<CollectMainFileMacros>(*SourceMgr, Macros),
             collectPragmaMarksCallback(*SourceMgr, Marks)));
@@ -140,6 +142,7 @@ class CppFilePreambleCallbacks : public PreambleCallbacks {
   std::unique_ptr<CommentHandler> IWYUHandler = nullptr;
   const clang::LangOptions *LangOpts = nullptr;
   const SourceManager *SourceMgr = nullptr;
+  const CompilerInstance *Compiler = nullptr;
 };
 
 // Represents directives other than includes, where basic textual information is
@@ -283,10 +286,9 @@ scanPreamble(llvm::StringRef Contents, const tooling::CompileCommand &Cmd) {
   PreprocessOnlyAction Action;
   if (!Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0]))
     return error("failed BeginSourceFile");
-  const auto &SM = Clang->getSourceManager();
   Preprocessor &PP = Clang->getPreprocessor();
   IncludeStructure Includes;
-  PP.addPPCallbacks(Includes.collect(SM));
+  PP.addPPCallbacks(Includes.collect(*Clang));
   ScannedPreamble SP;
   SP.Bounds = Bounds;
   PP.addPPCallbacks(

diff  --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp
index 16072f3803c5..f3a60d641951 100644
--- a/clang-tools-extra/clangd/SourceCode.cpp
+++ b/clang-tools-extra/clangd/SourceCode.cpp
@@ -1182,5 +1182,58 @@ bool isProtoFile(SourceLocation Loc, const SourceManager &SM) {
   return SM.getBufferData(FID).startswith(PROTO_HEADER_COMMENT);
 }
 
+namespace {
+
+// Is Line an #if or #ifdef directive?
+// FIXME: This makes headers with #ifdef LINUX/WINDOWS/MACOS marked as non
+// self-contained and is probably not what we want.
+bool isIf(llvm::StringRef Line) {
+  Line = Line.ltrim();
+  if (!Line.consume_front("#"))
+    return false;
+  Line = Line.ltrim();
+  return Line.startswith("if");
+}
+
+// Is Line an #error directive mentioning includes?
+bool isErrorAboutInclude(llvm::StringRef Line) {
+  Line = Line.ltrim();
+  if (!Line.consume_front("#"))
+    return false;
+  Line = Line.ltrim();
+  if (!Line.startswith("error"))
+    return false;
+  return Line.contains_insensitive(
+      "includ"); // Matches "include" or "including".
+}
+
+// Heuristically headers that only want to be included via an umbrella.
+bool isDontIncludeMeHeader(llvm::StringRef Content) {
+  llvm::StringRef Line;
+  // Only sniff up to 100 lines or 10KB.
+  Content = Content.take_front(100 * 100);
+  for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
+    std::tie(Line, Content) = Content.split('\n');
+    if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
+      return true;
+  }
+  return false;
+}
+
+} // namespace
+
+bool isSelfContainedHeader(const FileEntry *FE, FileID FID,
+                           const SourceManager &SM, HeaderSearch &HeaderInfo) {
+  // FIXME: Should files that have been #import'd be considered
+  // self-contained? That's really a property of the includer,
+  // not of the file.
+  if (!HeaderInfo.isFileMultipleIncludeGuarded(FE) &&
+      !HeaderInfo.hasFileBeenImported(FE))
+    return false;
+  // This pattern indicates that a header can't be used without
+  // particular preprocessor state, usually set up by another header.
+  return !isDontIncludeMeHeader(SM.getBufferData(FID));
+}
+
 } // namespace clangd
 } // namespace clang

diff  --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h
index 459c943b5712..315d79a932df 100644
--- a/clang-tools-extra/clangd/SourceCode.h
+++ b/clang-tools-extra/clangd/SourceCode.h
@@ -21,6 +21,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Format/Format.h"
+#include "clang/Lex/HeaderSearch.h"
 #include "clang/Tooling/Core/Replacement.h"
 #include "clang/Tooling/Syntax/Tokens.h"
 #include "llvm/ADT/StringRef.h"
@@ -324,6 +325,11 @@ bool isHeaderFile(llvm::StringRef FileName,
 /// Returns true if the given location is in a generated protobuf file.
 bool isProtoFile(SourceLocation Loc, const SourceManager &SourceMgr);
 
+/// This scans source code, and should not be called when using a preamble.
+/// Prefer to access the cache in IncludeStructure::isSelfContained if you can.
+bool isSelfContainedHeader(const FileEntry *FE, FileID ID,
+                           const SourceManager &SM, HeaderSearch &HeaderInfo);
+
 } // namespace clangd
 } // namespace clang
 #endif

diff  --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp
index dfcd4cb50a13..5171e79f3072 100644
--- a/clang-tools-extra/clangd/index/SymbolCollector.cpp
+++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp
@@ -266,7 +266,8 @@ class SymbolCollector::HeaderFileURICache {
         return toURI(Canonical);
       }
     }
-    if (!isSelfContainedHeader(FID, FE)) {
+    if (!isSelfContainedHeader(FE, FID, PP->getSourceManager(),
+                               PP->getHeaderSearchInfo())) {
       // A .inc or .def file is often included into a real header to define
       // symbols (e.g. LLVM tablegen files).
       if (Filename.endswith(".inc") || Filename.endswith(".def"))
@@ -278,54 +279,6 @@ class SymbolCollector::HeaderFileURICache {
     // Standard case: just insert the file itself.
     return toURI(FE);
   }
-
-  bool isSelfContainedHeader(FileID FID, const FileEntry *FE) {
-    // FIXME: Should files that have been #import'd be considered
-    // self-contained? That's really a property of the includer,
-    // not of the file.
-    if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE) &&
-        !PP->getHeaderSearchInfo().hasFileBeenImported(FE))
-      return false;
-    // This pattern indicates that a header can't be used without
-    // particular preprocessor state, usually set up by another header.
-    if (isDontIncludeMeHeader(SM.getBufferData(FID)))
-      return false;
-    return true;
-  }
-
-  // Is Line an #if or #ifdef directive?
-  static bool isIf(llvm::StringRef Line) {
-    Line = Line.ltrim();
-    if (!Line.consume_front("#"))
-      return false;
-    Line = Line.ltrim();
-    return Line.startswith("if");
-  }
-
-  // Is Line an #error directive mentioning includes?
-  static bool isErrorAboutInclude(llvm::StringRef Line) {
-    Line = Line.ltrim();
-    if (!Line.consume_front("#"))
-      return false;
-    Line = Line.ltrim();
-    if (!Line.startswith("error"))
-      return false;
-    return Line.contains_insensitive(
-        "includ"); // Matches "include" or "including".
-  }
-
-  // Heuristically headers that only want to be included via an umbrella.
-  static bool isDontIncludeMeHeader(llvm::StringRef Content) {
-    llvm::StringRef Line;
-    // Only sniff up to 100 lines or 10KB.
-    Content = Content.take_front(100 * 100);
-    for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
-      std::tie(Line, Content) = Content.split('\n');
-      if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
-        return true;
-    }
-    return false;
-  }
 };
 
 // Return the symbol location of the token at \p TokLoc.

diff  --git a/clang-tools-extra/clangd/unittests/HeadersTests.cpp b/clang-tools-extra/clangd/unittests/HeadersTests.cpp
index 85c3e88ae05b..b9e1d2c6974c 100644
--- a/clang-tools-extra/clangd/unittests/HeadersTests.cpp
+++ b/clang-tools-extra/clangd/unittests/HeadersTests.cpp
@@ -80,8 +80,7 @@ class HeadersTest : public ::testing::Test {
     EXPECT_TRUE(
         Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0]));
     IncludeStructure Includes;
-    Clang->getPreprocessor().addPPCallbacks(
-        Includes.collect(Clang->getSourceManager()));
+    Clang->getPreprocessor().addPPCallbacks(Includes.collect(*Clang));
     EXPECT_FALSE(Action.Execute());
     Action.EndSourceFile();
     return Includes;
@@ -363,6 +362,39 @@ TEST_F(HeadersTest, PresumedLocations) {
   EXPECT_THAT(collectIncludes().MainFileIncludes,
               Contains(AllOf(IncludeLine(2), Written("<a.h>"))));
 }
+
+TEST_F(HeadersTest, SelfContainedHeaders) {
+  // Including through non-builtin file has no effects.
+  FS.Files[MainFile] = R"cpp(
+#include "includeguarded.h"
+#include "nonguarded.h"
+#include "pp_depend.h"
+#include "pragmaguarded.h"
+)cpp";
+  FS.Files["pragmaguarded.h"] = R"cpp(
+#pragma once
+)cpp";
+  FS.Files["includeguarded.h"] = R"cpp(
+#ifndef INCLUDE_GUARDED_H
+#define INCLUDE_GUARDED_H
+void foo();
+#endif // INCLUDE_GUARDED_H
+)cpp";
+  FS.Files["nonguarded.h"] = R"cpp(
+)cpp";
+  FS.Files["pp_depend.h"] = R"cpp(
+  #ifndef REQUIRED_PP_DIRECTIVE
+  # error You have to have PP directive set to include this one!
+  #endif
+)cpp";
+
+  auto Includes = collectIncludes();
+  EXPECT_TRUE(Includes.isSelfContained(getID("pragmaguarded.h", Includes)));
+  EXPECT_TRUE(Includes.isSelfContained(getID("includeguarded.h", Includes)));
+  EXPECT_FALSE(Includes.isSelfContained(getID("nonguarded.h", Includes)));
+  EXPECT_FALSE(Includes.isSelfContained(getID("pp_depend.h", Includes)));
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang

diff  --git a/clang-tools-extra/clangd/unittests/PreambleTests.cpp b/clang-tools-extra/clangd/unittests/PreambleTests.cpp
index 3d021979d179..2e3dec96cb22 100644
--- a/clang-tools-extra/clangd/unittests/PreambleTests.cpp
+++ b/clang-tools-extra/clangd/unittests/PreambleTests.cpp
@@ -82,8 +82,7 @@ collectPatchedIncludes(llvm::StringRef ModifiedContents,
     return {};
   }
   IncludeStructure Includes;
-  Clang->getPreprocessor().addPPCallbacks(
-      Includes.collect(Clang->getSourceManager()));
+  Clang->getPreprocessor().addPPCallbacks(Includes.collect(*Clang));
   if (llvm::Error Err = Action.Execute()) {
     ADD_FAILURE() << "failed to execute action: " << std::move(Err);
     return {};


        


More information about the cfe-commits mailing list