[clang-tools-extra] fcde3d5 - [clangd] Patch PP directives to use stale preambles while building ASTs

Kadir Cetinkaya via cfe-commits cfe-commits at lists.llvm.org
Fri May 29 03:47:54 PDT 2020


Author: Kadir Cetinkaya
Date: 2020-05-29T12:46:53+02:00
New Revision: fcde3d5b04b612ebc4164fe8f3e83f93cd1fce53

URL: https://github.com/llvm/llvm-project/commit/fcde3d5b04b612ebc4164fe8f3e83f93cd1fce53
DIFF: https://github.com/llvm/llvm-project/commit/fcde3d5b04b612ebc4164fe8f3e83f93cd1fce53.diff

LOG: [clangd] Patch PP directives to use stale preambles while building ASTs

Summary:
Depends on D79930.

This enables more accurate parsing of the AST, by making new macro
definitions in preamble section visible. This is handled by injecting
define directives into preamble patch.

This patch doesn't handle any location mappings yet, so features like go-to-def,
go-to-refs and hover might not work as expected. These will be addressed in a
follow-up patch.

Reviewers: sammccall

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D79992

Added: 
    

Modified: 
    clang-tools-extra/clangd/Preamble.cpp
    clang-tools-extra/clangd/Preamble.h
    clang-tools-extra/clangd/unittests/PreambleTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/Preamble.cpp b/clang-tools-extra/clangd/Preamble.cpp
index 959b241dac12..dcfafc914d10 100644
--- a/clang-tools-extra/clangd/Preamble.cpp
+++ b/clang-tools-extra/clangd/Preamble.cpp
@@ -15,6 +15,7 @@
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Frontend/FrontendActions.h"
@@ -106,14 +107,70 @@ class CppFilePreambleCallbacks : public PreambleCallbacks {
   const SourceManager *SourceMgr = nullptr;
 };
 
-/// Gets the includes in the preamble section of the file by running
-/// preprocessor over \p Contents. Returned includes do not contain resolved
-/// paths. \p VFS and \p Cmd is used to build the compiler invocation, which
-/// might stat/read files.
-llvm::Expected<std::vector<Inclusion>>
-scanPreambleIncludes(llvm::StringRef Contents,
-                     llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
-                     const tooling::CompileCommand &Cmd) {
+// Represents directives other than includes, where basic textual information is
+// enough.
+struct TextualPPDirective {
+  unsigned DirectiveLine;
+  // Full text that's representing the directive, including the `#`.
+  std::string Text;
+
+  bool operator==(const TextualPPDirective &RHS) const {
+    return std::tie(DirectiveLine, Text) ==
+           std::tie(RHS.DirectiveLine, RHS.Text);
+  }
+};
+
+// Collects #define directives inside the main file.
+struct DirectiveCollector : public PPCallbacks {
+  DirectiveCollector(const Preprocessor &PP,
+                     std::vector<TextualPPDirective> &TextualDirectives)
+      : LangOpts(PP.getLangOpts()), SM(PP.getSourceManager()),
+        TextualDirectives(TextualDirectives) {}
+
+  void FileChanged(SourceLocation Loc, FileChangeReason Reason,
+                   SrcMgr::CharacteristicKind FileType,
+                   FileID PrevFID) override {
+    InMainFile = SM.isWrittenInMainFile(Loc);
+  }
+
+  void MacroDefined(const Token &MacroNameTok,
+                    const MacroDirective *MD) override {
+    if (!InMainFile)
+      return;
+    TextualDirectives.emplace_back();
+    TextualPPDirective &TD = TextualDirectives.back();
+
+    auto DecompLoc = SM.getDecomposedLoc(MacroNameTok.getLocation());
+    TD.DirectiveLine = SM.getLineNumber(DecompLoc.first, DecompLoc.second);
+
+    SourceRange DefRange(
+        MD->getMacroInfo()->getDefinitionLoc(),
+        Lexer::getLocForEndOfToken(MD->getMacroInfo()->getDefinitionEndLoc(), 0,
+                                   SM, LangOpts));
+    llvm::raw_string_ostream OS(TD.Text);
+    OS << "#define " << toSourceCode(SM, DefRange);
+  }
+
+private:
+  bool InMainFile = true;
+  const LangOptions &LangOpts;
+  const SourceManager &SM;
+  std::vector<TextualPPDirective> &TextualDirectives;
+};
+
+struct ScannedPreamble {
+  std::vector<Inclusion> Includes;
+  std::vector<TextualPPDirective> TextualDirectives;
+};
+
+/// Scans the preprocessor directives in the preamble section of the file by
+/// running preprocessor over \p Contents. Returned includes do not contain
+/// resolved paths. \p VFS and \p Cmd is used to build the compiler invocation,
+/// which might stat/read files.
+llvm::Expected<ScannedPreamble>
+scanPreamble(llvm::StringRef Contents,
+             llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
+             const tooling::CompileCommand &Cmd) {
   // Build and run Preprocessor over the preamble.
   ParseInputs PI;
   PI.Contents = Contents.str();
@@ -147,14 +204,18 @@ scanPreambleIncludes(llvm::StringRef Contents,
   if (!Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0]))
     return llvm::createStringError(llvm::inconvertibleErrorCode(),
                                    "failed BeginSourceFile");
+  const auto &SM = Clang->getSourceManager();
   Preprocessor &PP = Clang->getPreprocessor();
   IncludeStructure Includes;
+  PP.addPPCallbacks(collectIncludeStructureCallback(SM, &Includes));
+  ScannedPreamble SP;
   PP.addPPCallbacks(
-      collectIncludeStructureCallback(Clang->getSourceManager(), &Includes));
+      std::make_unique<DirectiveCollector>(PP, SP.TextualDirectives));
   if (llvm::Error Err = Action.Execute())
     return std::move(Err);
   Action.EndSourceFile();
-  return Includes.MainFileIncludes;
+  SP.Includes = std::move(Includes.MainFileIncludes);
+  return SP;
 }
 
 const char *spellingForIncDirective(tok::PPKeywordKind IncludeDirective) {
@@ -277,7 +338,7 @@ PreamblePatch PreamblePatch::create(llvm::StringRef FileName,
                                     const ParseInputs &Modified,
                                     const PreambleData &Baseline) {
   assert(llvm::sys::path::is_absolute(FileName) && "relative FileName!");
-  // First scan the include directives in Baseline and Modified. These will be
+  // First scan preprocessor directives in Baseline and Modified. These will be
   // used to figure out newly added directives in Modified. Scanning can fail,
   // the code just bails out and creates an empty patch in such cases, as:
   // - If scanning for Baseline fails, no knowledge of existing includes hence
@@ -285,25 +346,28 @@ PreamblePatch PreamblePatch::create(llvm::StringRef FileName,
   //   whole preamble, which is terribly slow.
   // - If scanning for Modified fails, cannot figure out newly added ones so
   //   there's nothing to do but generate an empty patch.
-  auto BaselineIncludes = scanPreambleIncludes(
+  auto BaselineScan = scanPreamble(
       // Contents needs to be null-terminated.
       Baseline.Preamble.getContents().str(),
       Baseline.StatCache->getConsumingFS(Modified.FS), Modified.CompileCommand);
-  if (!BaselineIncludes) {
-    elog("Failed to scan includes for baseline of {0}: {1}", FileName,
-         BaselineIncludes.takeError());
-    return {};
+  if (!BaselineScan) {
+    elog("Failed to scan baseline of {0}: {1}", FileName,
+         BaselineScan.takeError());
+    return PreamblePatch::unmodified(Baseline);
   }
-  auto ModifiedIncludes = scanPreambleIncludes(
+  auto ModifiedScan = scanPreamble(
       Modified.Contents, Baseline.StatCache->getConsumingFS(Modified.FS),
       Modified.CompileCommand);
-  if (!ModifiedIncludes) {
-    elog("Failed to scan includes for modified contents of {0}: {1}", FileName,
-         ModifiedIncludes.takeError());
-    return {};
+  if (!ModifiedScan) {
+    elog("Failed to scan modified contents of {0}: {1}", FileName,
+         ModifiedScan.takeError());
+    return PreamblePatch::unmodified(Baseline);
   }
-  // No patch needed if includes are equal.
-  if (*BaselineIncludes == *ModifiedIncludes)
+
+  bool IncludesChanged = BaselineScan->Includes != ModifiedScan->Includes;
+  bool DirectivesChanged =
+      BaselineScan->TextualDirectives != ModifiedScan->TextualDirectives;
+  if (!IncludesChanged && !DirectivesChanged)
     return PreamblePatch::unmodified(Baseline);
 
   PreamblePatch PP;
@@ -313,18 +377,6 @@ PreamblePatch PreamblePatch::create(llvm::StringRef FileName,
                           "__preamble_patch__.h");
   PP.PatchFileName = PatchName.str().str();
 
-  // We are only interested in newly added includes, record the ones in Baseline
-  // for exclusion.
-  llvm::DenseMap<std::pair<tok::PPKeywordKind, llvm::StringRef>,
-                 /*Resolved=*/llvm::StringRef>
-      ExistingIncludes;
-  for (const auto &Inc : Baseline.Includes.MainFileIncludes)
-    ExistingIncludes[{Inc.Directive, Inc.Written}] = Inc.Resolved;
-  // There might be includes coming from disabled regions, record these for
-  // exclusion too. note that we don't have resolved paths for those.
-  for (const auto &Inc : *BaselineIncludes)
-    ExistingIncludes.try_emplace({Inc.Directive, Inc.Written});
-  // Calculate extra includes that needs to be inserted.
   llvm::raw_string_ostream Patch(PP.PatchContents);
   // Set default filename for subsequent #line directives
   Patch << "#line 0 \"";
@@ -332,25 +384,55 @@ PreamblePatch PreamblePatch::create(llvm::StringRef FileName,
   // might lead to problems on windows especially.
   escapeBackslashAndQuotes(FileName, Patch);
   Patch << "\"\n";
-  for (auto &Inc : *ModifiedIncludes) {
-    auto It = ExistingIncludes.find({Inc.Directive, Inc.Written});
-    // Include already present in the baseline preamble. Set resolved path and
-    // put into preamble includes.
-    if (It != ExistingIncludes.end()) {
-      Inc.Resolved = It->second.str();
-      PP.PreambleIncludes.push_back(Inc);
-      continue;
+
+  if (IncludesChanged) {
+    // We are only interested in newly added includes, record the ones in
+    // Baseline for exclusion.
+    llvm::DenseMap<std::pair<tok::PPKeywordKind, llvm::StringRef>,
+                   /*Resolved=*/llvm::StringRef>
+        ExistingIncludes;
+    for (const auto &Inc : Baseline.Includes.MainFileIncludes)
+      ExistingIncludes[{Inc.Directive, Inc.Written}] = Inc.Resolved;
+    // There might be includes coming from disabled regions, record these for
+    // exclusion too. note that we don't have resolved paths for those.
+    for (const auto &Inc : BaselineScan->Includes)
+      ExistingIncludes.try_emplace({Inc.Directive, Inc.Written});
+    // Calculate extra includes that needs to be inserted.
+    for (auto &Inc : ModifiedScan->Includes) {
+      auto It = ExistingIncludes.find({Inc.Directive, Inc.Written});
+      // Include already present in the baseline preamble. Set resolved path and
+      // put into preamble includes.
+      if (It != ExistingIncludes.end()) {
+        Inc.Resolved = It->second.str();
+        PP.PreambleIncludes.push_back(Inc);
+        continue;
+      }
+      // Include is new in the modified preamble. Inject it into the patch and
+      // use #line to set the presumed location to where it is spelled.
+      auto LineCol = offsetToClangLineColumn(Modified.Contents, Inc.HashOffset);
+      Patch << llvm::formatv("#line {0}\n", LineCol.first);
+      Patch << llvm::formatv(
+          "#{0} {1}\n", spellingForIncDirective(Inc.Directive), Inc.Written);
     }
-    // Include is new in the modified preamble. Inject it into the patch and use
-    // #line to set the presumed location to where it is spelled.
-    auto LineCol = offsetToClangLineColumn(Modified.Contents, Inc.HashOffset);
-    Patch << llvm::formatv("#line {0}\n", LineCol.first);
-    Patch << llvm::formatv("#{0} {1}\n", spellingForIncDirective(Inc.Directive),
-                           Inc.Written);
   }
-  Patch.flush();
 
-  // FIXME: Handle more directives, e.g. define/undef.
+  if (DirectivesChanged) {
+    // We need to patch all the directives, since they are order dependent. e.g:
+    // #define BAR(X) NEW(X) // Newly introduced in Modified
+    // #define BAR(X) OLD(X) // Exists in the Baseline
+    //
+    // If we've patched only the first directive, the macro definition would've
+    // been wrong for the rest of the file, since patch is applied after the
+    // baseline preamble.
+    //
+    // Note that we deliberately ignore conditional directives and undefs to
+    // reduce complexity. The former might cause problems because scanning is
+    // imprecise and might pick directives from disabled regions.
+    for (const auto &TD : ModifiedScan->TextualDirectives)
+      Patch << TD.Text << '\n';
+  }
+  dlog("Created preamble patch: {0}", Patch.str());
+  Patch.flush();
   return PP;
 }
 

diff  --git a/clang-tools-extra/clangd/Preamble.h b/clang-tools-extra/clangd/Preamble.h
index c88688a51736..f9816611153f 100644
--- a/clang-tools-extra/clangd/Preamble.h
+++ b/clang-tools-extra/clangd/Preamble.h
@@ -119,6 +119,9 @@ class PreamblePatch {
   /// using the presumed-location mechanism.
   std::vector<Inclusion> preambleIncludes() const;
 
+  /// Returns textual patch contents.
+  llvm::StringRef text() const { return PatchContents; }
+
 private:
   PreamblePatch() = default;
   std::string PatchContents;

diff  --git a/clang-tools-extra/clangd/unittests/PreambleTests.cpp b/clang-tools-extra/clangd/unittests/PreambleTests.cpp
index b1548ef4732b..b69efe75beec 100644
--- a/clang-tools-extra/clangd/unittests/PreambleTests.cpp
+++ b/clang-tools-extra/clangd/unittests/PreambleTests.cpp
@@ -26,7 +26,9 @@
 #include <string>
 #include <vector>
 
+using testing::Contains;
 using testing::Field;
+using testing::MatchesRegex;
 
 namespace clang {
 namespace clangd {
@@ -181,6 +183,109 @@ TEST(PreamblePatchTest, PatchesPreambleIncludes) {
               ElementsAre(AllOf(Field(&Inclusion::Written, "\"a.h\""),
                                 Field(&Inclusion::Resolved, testPath("a.h")))));
 }
+
+llvm::Optional<ParsedAST> createPatchedAST(llvm::StringRef Baseline,
+                                           llvm::StringRef Modified) {
+  auto BaselinePreamble = TestTU::withCode(Baseline).preamble();
+  if (!BaselinePreamble) {
+    ADD_FAILURE() << "Failed to build baseline preamble";
+    return llvm::None;
+  }
+
+  IgnoreDiagnostics Diags;
+  auto TU = TestTU::withCode(Modified);
+  auto CI = buildCompilerInvocation(TU.inputs(), Diags);
+  if (!CI) {
+    ADD_FAILURE() << "Failed to build compiler invocation";
+    return llvm::None;
+  }
+  return ParsedAST::build(testPath("main.cpp"), TU.inputs(), std::move(CI), {},
+                          BaselinePreamble);
+}
+
+std::string getPreamblePatch(llvm::StringRef Baseline,
+                             llvm::StringRef Modified) {
+  auto BaselinePreamble = TestTU::withCode(Baseline).preamble();
+  if (!BaselinePreamble) {
+    ADD_FAILURE() << "Failed to build baseline preamble";
+    return "";
+  }
+  auto TU = TestTU::withCode(Modified);
+  return PreamblePatch::create(testPath("main.cpp"), TU.inputs(),
+                               *BaselinePreamble)
+      .text()
+      .str();
+}
+
+TEST(PreamblePatchTest, Define) {
+  // BAR should be defined while parsing the AST.
+  struct {
+    llvm::StringLiteral Contents;
+    llvm::StringLiteral ExpectedPatch;
+  } Cases[] = {
+      {
+          R"cpp(
+        #define BAR
+        [[BAR]])cpp",
+          R"cpp(#line 0 ".*main.cpp"
+#define BAR
+)cpp",
+      },
+      // multiline macro
+      {
+          R"cpp(
+        #define BAR \
+
+        [[BAR]])cpp",
+          R"cpp(#line 0 ".*main.cpp"
+#define BAR
+)cpp",
+      },
+      // multiline macro
+      {
+          R"cpp(
+        #define \
+                BAR
+        [[BAR]])cpp",
+          R"cpp(#line 0 ".*main.cpp"
+#define BAR
+)cpp",
+      },
+  };
+
+  for (const auto &Case : Cases) {
+    SCOPED_TRACE(Case.Contents);
+    Annotations Modified(Case.Contents);
+    EXPECT_THAT(getPreamblePatch("", Modified.code()),
+                MatchesRegex(Case.ExpectedPatch.str()));
+
+    auto AST = createPatchedAST("", Modified.code());
+    ASSERT_TRUE(AST);
+    EXPECT_THAT(AST->getDiagnostics(),
+                Not(Contains(Field(&Diag::Range, Modified.range()))));
+  }
+}
+
+TEST(PreamblePatchTest, OrderingPreserved) {
+  llvm::StringLiteral Baseline = "#define BAR(X) X";
+  Annotations Modified(R"cpp(
+    #define BAR(X, Y) X Y
+    #define BAR(X) X
+    [[BAR]](int y);
+  )cpp");
+
+  llvm::StringLiteral ExpectedPatch(R"cpp(#line 0 ".*main.cpp"
+#define BAR\(X, Y\) X Y
+#define BAR\(X\) X
+)cpp");
+  EXPECT_THAT(getPreamblePatch(Baseline, Modified.code()),
+              MatchesRegex(ExpectedPatch.str()));
+
+  auto AST = createPatchedAST(Baseline, Modified.code());
+  ASSERT_TRUE(AST);
+  EXPECT_THAT(AST->getDiagnostics(),
+              Not(Contains(Field(&Diag::Range, Modified.range()))));
+}
 } // namespace
 } // namespace clangd
 } // namespace clang


        


More information about the cfe-commits mailing list