[clang-tools-extra] ec7c8ba - [clangd] Make use of syntax tokens in ReplayPreamble

Kadir Cetinkaya via cfe-commits cfe-commits at lists.llvm.org
Wed Mar 4 02:01:50 PST 2020


Author: Kadir Cetinkaya
Date: 2020-03-04T11:01:35+01:00
New Revision: ec7c8bae84557a987d8dce701e8d1ef9183f6cf3

URL: https://github.com/llvm/llvm-project/commit/ec7c8bae84557a987d8dce701e8d1ef9183f6cf3
DIFF: https://github.com/llvm/llvm-project/commit/ec7c8bae84557a987d8dce701e8d1ef9183f6cf3.diff

LOG: [clangd] Make use of syntax tokens in ReplayPreamble

Summary: Replace usage of RawLexer with syntax tokens inside ReplayPreamble.

Reviewers: sammccall

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D74842

Added: 
    

Modified: 
    clang-tools-extra/clangd/ParsedAST.cpp
    clang-tools-extra/clangd/unittests/ParsedASTTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp
index e43c2ce66261..b18c9af077c6 100644
--- a/clang-tools-extra/clangd/ParsedAST.cpp
+++ b/clang-tools-extra/clangd/ParsedAST.cpp
@@ -114,16 +114,16 @@ class ReplayPreamble : private PPCallbacks {
   // Attach preprocessor hooks such that preamble events will be injected at
   // the appropriate time.
   // Events will be delivered to the *currently registered* PP callbacks.
-  static void attach(const IncludeStructure &Includes,
-                     CompilerInstance &Clang) {
+  static void attach(const IncludeStructure &Includes, CompilerInstance &Clang,
+                     const PreambleBounds &PB) {
     auto &PP = Clang.getPreprocessor();
     auto *ExistingCallbacks = PP.getPPCallbacks();
     // No need to replay events if nobody is listening.
     if (!ExistingCallbacks)
       return;
-    PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(
-        new ReplayPreamble(Includes, ExistingCallbacks,
-                           Clang.getSourceManager(), PP, Clang.getLangOpts())));
+    PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(new ReplayPreamble(
+        Includes, ExistingCallbacks, Clang.getSourceManager(), PP,
+        Clang.getLangOpts(), PB)));
     // We're relying on the fact that addPPCallbacks keeps the old PPCallbacks
     // around, creating a chaining wrapper. Guard against other implementations.
     assert(PP.getPPCallbacks() != ExistingCallbacks &&
@@ -133,9 +133,13 @@ class ReplayPreamble : private PPCallbacks {
 private:
   ReplayPreamble(const IncludeStructure &Includes, PPCallbacks *Delegate,
                  const SourceManager &SM, Preprocessor &PP,
-                 const LangOptions &LangOpts)
-      : Includes(Includes), Delegate(Delegate), SM(SM), PP(PP),
-        LangOpts(LangOpts) {}
+                 const LangOptions &LangOpts, const PreambleBounds &PB)
+      : Includes(Includes), Delegate(Delegate), SM(SM), PP(PP) {
+    // Only tokenize the preamble section of the main file, as we are not
+    // interested in the rest of the tokens.
+    MainFileTokens = syntax::tokenize(
+        syntax::FileRange(SM.getMainFileID(), 0, PB.Size), SM, LangOpts);
+  }
 
   // In a normal compile, the preamble traverses the following structure:
   //
@@ -167,33 +171,53 @@ class ReplayPreamble : private PPCallbacks {
         if (auto FE = SM.getFileManager().getFile(Inc.Resolved))
           File = *FE;
 
+      // Re-lex the #include directive to find its interesting parts.
+      auto HashLoc = SM.getComposedLoc(SM.getMainFileID(), Inc.HashOffset);
+      auto HashTok = llvm::partition_point(MainFileTokens,
+                                           [&HashLoc](const syntax::Token &T) {
+                                             return T.location() < HashLoc;
+                                           });
+      assert(HashTok != MainFileTokens.end() && HashTok->kind() == tok::hash);
+
+      auto IncludeTok = std::next(HashTok);
+      assert(IncludeTok != MainFileTokens.end());
+
+      auto FileTok = std::next(IncludeTok);
+      assert(FileTok != MainFileTokens.end());
+
+      // Create a fake import/include token, none of the callers seem to care
+      // about clang::Token::Flags.
+      Token SynthesizedIncludeTok;
+      SynthesizedIncludeTok.startToken();
+      SynthesizedIncludeTok.setLocation(IncludeTok->location());
+      SynthesizedIncludeTok.setLength(IncludeTok->length());
+      SynthesizedIncludeTok.setKind(tok::raw_identifier);
+      SynthesizedIncludeTok.setRawIdentifierData(IncludeTok->text(SM).data());
+      PP.LookUpIdentifierInfo(SynthesizedIncludeTok);
+
+      // Same here, create a fake one for Filename, including angles or quotes.
+      Token SynthesizedFilenameTok;
+      SynthesizedFilenameTok.startToken();
+      SynthesizedFilenameTok.setLocation(FileTok->location());
+      // Note that we can't make use of FileTok->length/text in here as in the
+      // case of angled includes this will contain tok::less instead of
+      // filename. Whereas Inc.Written contains the full header name including
+      // quotes/angles.
+      SynthesizedFilenameTok.setLength(Inc.Written.length());
+      SynthesizedFilenameTok.setKind(tok::header_name);
+      SynthesizedFilenameTok.setLiteralData(Inc.Written.data());
+
       llvm::StringRef WrittenFilename =
           llvm::StringRef(Inc.Written).drop_front().drop_back();
-      bool Angled = llvm::StringRef(Inc.Written).startswith("<");
-
-      // Re-lex the #include directive to find its interesting parts.
-      llvm::StringRef Src = SM.getBufferData(SM.getMainFileID());
-      Lexer RawLexer(SM.getLocForStartOfFile(SM.getMainFileID()), LangOpts,
-                     Src.begin(), Src.begin() + Inc.HashOffset, Src.end());
-      Token HashTok, IncludeTok, FilenameTok;
-      RawLexer.LexFromRawLexer(HashTok);
-      assert(HashTok.getKind() == tok::hash);
-      RawLexer.setParsingPreprocessorDirective(true);
-      RawLexer.LexFromRawLexer(IncludeTok);
-      IdentifierInfo *II = PP.getIdentifierInfo(IncludeTok.getRawIdentifier());
-      IncludeTok.setIdentifierInfo(II);
-      IncludeTok.setKind(II->getTokenID());
-      RawLexer.LexIncludeFilename(FilenameTok);
-
-      Delegate->InclusionDirective(
-          HashTok.getLocation(), IncludeTok, WrittenFilename, Angled,
-          CharSourceRange::getCharRange(FilenameTok.getLocation(),
-                                        FilenameTok.getEndLoc()),
-          File, "SearchPath", "RelPath", /*Imported=*/nullptr, Inc.FileKind);
+      Delegate->InclusionDirective(HashTok->location(), SynthesizedIncludeTok,
+                                   WrittenFilename, Inc.Written.front() == '<',
+                                   FileTok->range(SM).toCharRange(SM), File,
+                                   "SearchPath", "RelPath",
+                                   /*Imported=*/nullptr, Inc.FileKind);
       if (File)
         // FIXME: Use correctly named FileEntryRef.
-        Delegate->FileSkipped(FileEntryRef(File->getName(), *File), FilenameTok,
-                              Inc.FileKind);
+        Delegate->FileSkipped(FileEntryRef(File->getName(), *File),
+                              SynthesizedFilenameTok, Inc.FileKind);
       else {
         llvm::SmallString<1> UnusedRecovery;
         Delegate->FileNotFound(WrittenFilename, UnusedRecovery);
@@ -205,7 +229,7 @@ class ReplayPreamble : private PPCallbacks {
   PPCallbacks *Delegate;
   const SourceManager &SM;
   Preprocessor &PP;
-  const LangOptions &LangOpts;
+  std::vector<syntax::Token> MainFileTokens;
 };
 
 } // namespace
@@ -337,7 +361,7 @@ ParsedAST::build(std::unique_ptr<clang::CompilerInvocation> CI,
   auto Includes = Preamble ? Preamble->Includes : IncludeStructure{};
   // Replay the preamble includes so that clang-tidy checks can see them.
   if (Preamble)
-    ReplayPreamble::attach(Includes, *Clang);
+    ReplayPreamble::attach(Includes, *Clang, Preamble->Preamble.getBounds());
   // Important: collectIncludeStructure is registered *after* ReplayPreamble!
   // Otherwise we would collect the replayed includes again...
   // (We can't *just* use the replayed includes, they don't have Resolved path).

diff  --git a/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp b/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp
index a19a88b66888..bc14bc95416e 100644
--- a/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ParsedASTTests.cpp
@@ -11,6 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "../../clang-tidy/ClangTidyModule.h"
+#include "../../clang-tidy/ClangTidyModuleRegistry.h"
 #include "AST.h"
 #include "Annotations.h"
 #include "Compiler.h"
@@ -20,8 +22,13 @@
 #include "TestFS.h"
 #include "TestTU.h"
 #include "clang/AST/DeclTemplate.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/PPCallbacks.h"
+#include "clang/Lex/Token.h"
 #include "clang/Tooling/Syntax/Tokens.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "gmock/gmock-matchers.h"
 #include "gmock/gmock.h"
@@ -71,6 +78,10 @@ MATCHER_P(WithTemplateArgs, ArgName, "") {
   return false;
 }
 
+MATCHER_P(RangeIs, R, "") {
+  return arg.beginOffset() == R.Begin && arg.endOffset() == R.End;
+}
+
 TEST(ParsedASTTest, TopLevelDecls) {
   TestTU TU;
   TU.HeaderCode = R"(
@@ -296,6 +307,116 @@ TEST(ParsedASTTest, CollectsMainFileMacroExpansions) {
               testing::UnorderedElementsAreArray(TestCase.points()));
 }
 
+TEST(ParsedASTTest, ReplayPreambleForTidyCheckers) {
+  struct Inclusion {
+    Inclusion(const SourceManager &SM, SourceLocation HashLoc,
+              const Token &IncludeTok, llvm::StringRef FileName, bool IsAngled,
+              CharSourceRange FilenameRange)
+        : HashOffset(SM.getDecomposedLoc(HashLoc).second), IncTok(IncludeTok),
+          IncDirective(IncludeTok.getIdentifierInfo()->getName()),
+          FileNameOffset(SM.getDecomposedLoc(FilenameRange.getBegin()).second),
+          FileName(FileName), IsAngled(IsAngled) {}
+    size_t HashOffset;
+    syntax::Token IncTok;
+    llvm::StringRef IncDirective;
+    size_t FileNameOffset;
+    llvm::StringRef FileName;
+    bool IsAngled;
+  };
+  static std::vector<Inclusion> Includes;
+  static std::vector<syntax::Token> SkippedFiles;
+  struct ReplayPreamblePPCallback : public PPCallbacks {
+    const SourceManager &SM;
+    explicit ReplayPreamblePPCallback(const SourceManager &SM) : SM(SM) {}
+
+    void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
+                            StringRef FileName, bool IsAngled,
+                            CharSourceRange FilenameRange, const FileEntry *,
+                            StringRef, StringRef, const Module *,
+                            SrcMgr::CharacteristicKind) override {
+      Includes.emplace_back(SM, HashLoc, IncludeTok, FileName, IsAngled,
+                            FilenameRange);
+    }
+
+    void FileSkipped(const FileEntryRef &, const Token &FilenameTok,
+                     SrcMgr::CharacteristicKind) override {
+      SkippedFiles.emplace_back(FilenameTok);
+    }
+  };
+  struct ReplayPreambleCheck : public tidy::ClangTidyCheck {
+    ReplayPreambleCheck(StringRef Name, tidy::ClangTidyContext *Context)
+        : ClangTidyCheck(Name, Context) {}
+    void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP,
+                             Preprocessor *ModuleExpanderPP) override {
+      PP->addPPCallbacks(::std::make_unique<ReplayPreamblePPCallback>(SM));
+    }
+  };
+  struct ReplayPreambleModule : public tidy::ClangTidyModule {
+    void
+    addCheckFactories(tidy::ClangTidyCheckFactories &CheckFactories) override {
+      CheckFactories.registerCheck<ReplayPreambleCheck>(
+          "replay-preamble-check");
+    }
+  };
+
+  static tidy::ClangTidyModuleRegistry::Add<ReplayPreambleModule> X(
+      "replay-preamble-module", "");
+  TestTU TU;
+  // This check records inclusion directives replayed by clangd.
+  TU.ClangTidyChecks = "replay-preamble-check";
+  llvm::Annotations Test(R"cpp(
+    $hash^#$include[[import]] $filebegin^"$filerange[[bar.h]]"
+    $hash^#$include[[include_next]] $filebegin^"$filerange[[baz.h]]"
+    $hash^#$include[[include]] $filebegin^<$filerange[[a.h]]>)cpp");
+  llvm::StringRef Code = Test.code();
+  TU.Code = Code.str();
+  TU.AdditionalFiles["bar.h"] = "";
+  TU.AdditionalFiles["baz.h"] = "";
+  TU.AdditionalFiles["a.h"] = "";
+  TU.ExtraArgs = {"-isystem."};
+
+  const auto &AST = TU.build();
+  const auto &SM = AST.getSourceManager();
+
+  auto HashLocs = Test.points("hash");
+  ASSERT_EQ(HashLocs.size(), Includes.size());
+  auto IncludeRanges = Test.ranges("include");
+  ASSERT_EQ(IncludeRanges.size(), Includes.size());
+  auto FileBeginLocs = Test.points("filebegin");
+  ASSERT_EQ(FileBeginLocs.size(), Includes.size());
+  auto FileRanges = Test.ranges("filerange");
+  ASSERT_EQ(FileRanges.size(), Includes.size());
+
+  ASSERT_EQ(SkippedFiles.size(), Includes.size());
+  for (size_t I = 0; I < Includes.size(); ++I) {
+    const auto &Inc = Includes[I];
+
+    EXPECT_EQ(Inc.HashOffset, HashLocs[I]);
+
+    auto IncRange = IncludeRanges[I];
+    EXPECT_THAT(Inc.IncTok.range(SM), RangeIs(IncRange));
+    EXPECT_EQ(Inc.IncTok.kind(), tok::identifier);
+    EXPECT_EQ(Inc.IncDirective,
+              Code.substr(IncRange.Begin, IncRange.End - IncRange.Begin));
+
+    EXPECT_EQ(Inc.FileNameOffset, FileBeginLocs[I]);
+    EXPECT_EQ(Inc.IsAngled, Code[FileBeginLocs[I]] == '<');
+
+    auto FileRange = FileRanges[I];
+    EXPECT_EQ(Inc.FileName,
+              Code.substr(FileRange.Begin, FileRange.End - FileRange.Begin));
+
+    EXPECT_EQ(SM.getDecomposedLoc(SkippedFiles[I].location()).second,
+              Inc.FileNameOffset);
+    // This also contains quotes/angles so increment the range by one from both
+    // sides.
+    EXPECT_EQ(
+        SkippedFiles[I].text(SM),
+        Code.substr(FileRange.Begin - 1, FileRange.End - FileRange.Begin + 2));
+    EXPECT_EQ(SkippedFiles[I].kind(), tok::header_name);
+  }
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang


        


More information about the cfe-commits mailing list