[clang-tools-extra] r367453 - [clangd] Ignore semicolons, whitespace, and comments in SelectionTree.

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Wed Jul 31 10:52:40 PDT 2019


Author: sammccall
Date: Wed Jul 31 10:52:40 2019
New Revision: 367453

URL: http://llvm.org/viewvc/llvm-project?rev=367453&view=rev
Log:
[clangd] Ignore semicolons, whitespace, and comments in SelectionTree.

Summary:
Whitespace and comments are a clear bugfix: selecting some
comments/space near a statement doesn't mean you're selecting the
surrounding block.

Semicolons are less obvious, but for similar reasons: these tokens
aren't actually claimed by any AST node (usually), so an AST-based model
like SelectionTree shouldn't take them into account.

Callers may still sometimes care about semis of course:
 - when the selection is an expr with a non-expr parent, selection of
   the semicolon indicates intent to select the statement.
 - when a statement with a trailing semi is selected, we need to know
   its range to ensure it can be removed.
SelectionTree may or may not play a role here, but these are separate questions
from its core function of describing which AST nodes were selected.

The mechanism here is the TokenBuffer from syntax-trees. We use it in a
fairly low-level way (just to get boundaries of raw spelled tokens). The
actual mapping of AST nodes to coordinates continues to use the (fairly
mature) SourceLocation based logic. TokenBuffer/Syntax trees
don't currently offer an alternative to getFileRange(), I think.

Reviewers: SureYeaah, kadircet

Subscribers: MaskRay, jkorous, arphaman, cfe-commits, ilya-biryukov

Tags: #clang

Differential Revision: https://reviews.llvm.org/D65486

Modified:
    clang-tools-extra/trunk/clangd/Selection.cpp
    clang-tools-extra/trunk/clangd/Selection.h
    clang-tools-extra/trunk/clangd/refactor/Tweak.cpp
    clang-tools-extra/trunk/clangd/unittests/SelectionTests.cpp

Modified: clang-tools-extra/trunk/clangd/Selection.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/Selection.cpp?rev=367453&r1=367452&r2=367453&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/Selection.cpp (original)
+++ clang-tools-extra/trunk/clangd/Selection.cpp Wed Jul 31 10:52:40 2019
@@ -17,6 +17,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TokenKinds.h"
+#include "clang/Tooling/Syntax/Tokens.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -28,39 +29,91 @@ namespace {
 using Node = SelectionTree::Node;
 using ast_type_traits::DynTypedNode;
 
-// Stores a collection of (possibly-overlapping) integer ranges.
-// When new ranges are added, hit-tests them against existing ones.
-class RangeSet {
+// Identifies which tokens are selected, and evaluates claims of source ranges
+// by AST nodes. Tokens may be claimed only once: first-come, first-served.
+class SelectedTokens {
 public:
-  // Returns true if any new offsets are covered.
-  // This is naive (linear in number of successful add() calls), but ok for now.
-  bool add(unsigned Begin, unsigned End) {
-    assert(std::is_sorted(Ranges.begin(), Ranges.end()));
-    assert(Begin < End);
-
-    if (covered(Begin, End))
-      return false;
-    auto Pair = std::make_pair(Begin, End);
-    Ranges.insert(llvm::upper_bound(Ranges, Pair), Pair);
-    return true;
+  SelectedTokens(llvm::ArrayRef<syntax::Token> Spelled, const SourceManager &SM,
+                 unsigned SelBegin, unsigned SelEnd)
+      : SelBegin(SelBegin), SelEnd(SelEnd) {
+    // Extract bounds and selected-ness for all tokens spelled in the file.
+    Tokens.reserve(Spelled.size());
+    for (const auto& Tok : Spelled) {
+      // As well as comments, don't count semicolons as real tokens.
+      // They're not properly claimed as expr-statement is missing from the AST.
+      if (Tok.kind() == tok::comment || Tok.kind() == tok::semi)
+        continue;
+
+      Tokens.emplace_back();
+      TokInfo &S = Tokens.back();
+      S.StartOffset = SM.getFileOffset(Tok.location());
+      S.EndOffset = S.StartOffset + Tok.length();
+      if (S.StartOffset >= SelBegin && S.EndOffset <= SelEnd)
+        S.Selected = SelectionTree::Complete;
+      else if (S.EndOffset > SelBegin && S.StartOffset < SelEnd)
+        S.Selected = SelectionTree::Partial;
+      else
+        S.Selected = SelectionTree::Unselected;
+      S.Claimed = false;
+    }
   }
 
-private:
-  bool covered(unsigned Begin, unsigned End) {
-    assert(Begin < End);
-    for (const auto &R : Ranges) {
-      if (Begin < R.first)
-        return false; // The prefix [Begin, R.first) is not covered.
-      if (Begin < R.second) {
-        Begin = R.second; // Prefix is covered, truncate the range.
-        if (Begin >= End)
-          return true;
+  // Associates any tokens overlapping [Begin, End) with an AST node.
+  // Tokens that were already claimed by another AST node are not claimed again.
+  // Returns whether the node is selected in the sense of SelectionTree.
+  SelectionTree::Selection claim(unsigned Begin, unsigned End) {
+    assert(Begin <= End);
+
+    // Fast-path for missing the selection entirely.
+    if (Begin >= SelEnd || End <= SelBegin)
+      return SelectionTree::Unselected;
+
+    // We will consider the range (at least partially) selected if it hit any
+    // selected and previously unclaimed token.
+    bool ClaimedAnyToken = false;
+    // The selection is (at most) partial if:
+    // - any claimed token is partially selected
+    // - any token in the range is unselected
+    bool PartialSelection = false;
+
+    // Find the first token that (maybe) overlaps the claimed range.
+    auto Start = llvm::partition_point(Tokens, [&](const TokInfo &Tok) {
+      return Tok.EndOffset <= Begin;
+    });
+    // Iterate over every token that overlaps the range.
+    // Claim selected tokens, and update the two result flags.
+    for (auto It = Start; It != Tokens.end() && It->StartOffset < End; ++It) {
+      if (It->Selected) {
+        if (!It->Claimed) {
+          // Token is selected, in the node's range, and unclaimed; claim it.
+          It->Claimed = true;
+          ClaimedAnyToken = true;
+          // If the token was only partially selected, so is the node.
+          PartialSelection |= (It->Selected == SelectionTree::Partial);
+        }
+      } else {
+        // If the node covers an unselected token, it's not completely selected.
+        PartialSelection = true;
       }
     }
-    return false;
+
+    if (!ClaimedAnyToken)
+      return SelectionTree::Unselected;
+    return PartialSelection ? SelectionTree::Partial : SelectionTree::Complete;
   }
 
-  std::vector<std::pair<unsigned, unsigned>> Ranges; // Always sorted.
+private:
+  struct TokInfo {
+    unsigned StartOffset;
+    unsigned EndOffset;
+    SelectionTree::Selection Selected;
+    bool Claimed;
+    bool operator<(const TokInfo &Other) const {
+      return StartOffset < Other.StartOffset;
+    }
+  };
+  std::vector<TokInfo> Tokens;
+  unsigned SelBegin, SelEnd;
 };
 
 // Show the type of a node for debugging.
@@ -99,14 +152,16 @@ class SelectionVisitor : public Recursiv
 public:
   // Runs the visitor to gather selected nodes and their ancestors.
   // If there is any selection, the root (TUDecl) is the first node.
-  static std::deque<Node> collect(ASTContext &AST, const PrintingPolicy &PP,
-                                  unsigned Begin, unsigned End, FileID File) {
-    SelectionVisitor V(AST, PP, Begin, End, File);
+  static std::deque<Node> collect(ASTContext &AST,
+                                  const syntax::TokenBuffer &Tokens,
+                                  const PrintingPolicy &PP, unsigned Begin,
+                                  unsigned End, FileID File) {
+    SelectionVisitor V(AST, Tokens, PP, Begin, End, File);
     V.TraverseAST(AST);
     assert(V.Stack.size() == 1 && "Unpaired push/pop?");
     assert(V.Stack.top() == &V.Nodes.front());
-    // We selected TUDecl if characters were unclaimed (or the file is empty).
-    if (V.Nodes.size() == 1 || V.Claimed.add(Begin, End)) {
+    // We selected TUDecl if tokens were unclaimed (or the file is empty).
+    if (V.Nodes.size() == 1 || V.Claimed.claim(Begin, End)) {
       StringRef FileContent = AST.getSourceManager().getBufferData(File);
       // Don't require the trailing newlines to be selected.
       bool SelectedAll = Begin == 0 && End >= FileContent.rtrim().size();
@@ -176,15 +231,18 @@ public:
 private:
   using Base = RecursiveASTVisitor<SelectionVisitor>;
 
-  SelectionVisitor(ASTContext &AST, const PrintingPolicy &PP, unsigned SelBegin,
-                   unsigned SelEnd, FileID SelFile)
+  SelectionVisitor(ASTContext &AST, const syntax::TokenBuffer &Tokens,
+                   const PrintingPolicy &PP, unsigned SelBegin, unsigned SelEnd,
+                   FileID SelFile)
       : SM(AST.getSourceManager()), LangOpts(AST.getLangOpts()),
 #ifndef NDEBUG
         PrintPolicy(PP),
 #endif
-        SelBegin(SelBegin), SelEnd(SelEnd), SelFile(SelFile),
+        Claimed(Tokens.spelledTokens(SelFile), SM, SelBegin, SelEnd),
+        SelFile(SelFile),
         SelBeginTokenStart(SM.getFileOffset(Lexer::GetBeginningOfToken(
-            SM.getComposedLoc(SelFile, SelBegin), SM, LangOpts))) {
+            SM.getComposedLoc(SelFile, SelBegin), SM, LangOpts))),
+        SelEnd(SelEnd) {
     // Ensure we have a node for the TU decl, regardless of traversal scope.
     Nodes.emplace_back();
     Nodes.back().ASTNode = DynTypedNode::create(*AST.getTranslationUnitDecl());
@@ -318,30 +376,16 @@ private:
     // Otherwise, nodes in macro expansions can't be selected.
     if (B.first != SelFile || E.first != SelFile)
       return SelectionTree::Unselected;
-    // Is there any overlap at all between the selection and range?
-    if (B.second >= SelEnd || E.second < SelBegin)
-      return SelectionTree::Unselected;
-    // We may have hit something.
-    auto PreciseBounds = std::make_pair(B.second, E.second);
-    // Trim range using the selection, drop it if empty.
-    B.second = std::max(B.second, SelBegin);
-    E.second = std::min(E.second, SelEnd);
-    if (B.second >= E.second)
-      return SelectionTree::Unselected;
     // Attempt to claim the remaining range. If there's nothing to claim, only
     // children were selected.
-    if (!Claimed.add(B.second, E.second))
-      return SelectionTree::Unselected;
-    dlog("{1}hit selection: {0}",
-         SourceRange(SM.getComposedLoc(B.first, B.second),
-                     SM.getComposedLoc(E.first, E.second))
-             .printToString(SM),
-         indent());
-    // Some of our own characters are covered, this is a true hit.
-    // Determine whether the node was completely covered.
-    return (PreciseBounds.first >= SelBegin && PreciseBounds.second <= SelEnd)
-               ? SelectionTree::Complete
-               : SelectionTree::Partial;
+    SelectionTree::Selection Result = Claimed.claim(B.second, E.second);
+    if (Result)
+      dlog("{1}hit selection: {0}",
+           SourceRange(SM.getComposedLoc(B.first, B.second),
+                       SM.getComposedLoc(E.first, E.second))
+               .printToString(SM),
+           indent());
+    return Result;
   }
 
   std::string indent(int Offset = 0) {
@@ -357,11 +401,8 @@ private:
   const PrintingPolicy &PrintPolicy;
 #endif
   std::stack<Node *> Stack;
-  RangeSet Claimed;
+  SelectedTokens Claimed;
   std::deque<Node> Nodes; // Stable pointers as we add more nodes.
-  // Half-open selection range.
-  unsigned SelBegin;
-  unsigned SelEnd;
   FileID SelFile;
   // If the selection start slices a token in half, the beginning of that token.
   // This is useful for checking whether the end of a token range overlaps
@@ -369,6 +410,7 @@ private:
   // range.end + measureToken(range.end) < SelBegin (assuming range.end points
   // to a token), and it saves a lex every time.
   unsigned SelBeginTokenStart;
+  unsigned SelEnd;
 };
 
 } // namespace
@@ -414,7 +456,8 @@ static std::pair<unsigned, unsigned> poi
   return {Offset, Offset + 1};
 }
 
-SelectionTree::SelectionTree(ASTContext &AST, unsigned Begin, unsigned End)
+SelectionTree::SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens,
+                             unsigned Begin, unsigned End)
     : PrintPolicy(AST.getLangOpts()) {
   // No fundamental reason the selection needs to be in the main file,
   // but that's all clangd has needed so far.
@@ -428,13 +471,14 @@ SelectionTree::SelectionTree(ASTContext
   dlog("Computing selection for {0}",
        SourceRange(SM.getComposedLoc(FID, Begin), SM.getComposedLoc(FID, End))
            .printToString(SM));
-  Nodes = SelectionVisitor::collect(AST, PrintPolicy, Begin, End, FID);
+  Nodes = SelectionVisitor::collect(AST, Tokens, PrintPolicy, Begin, End, FID);
   Root = Nodes.empty() ? nullptr : &Nodes.front();
   dlog("Built selection tree\n{0}", *this);
 }
 
-SelectionTree::SelectionTree(ASTContext &AST, unsigned Offset)
-    : SelectionTree(AST, Offset, Offset) {}
+SelectionTree::SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens,
+                             unsigned Offset)
+    : SelectionTree(AST, Tokens, Offset, Offset) {}
 
 const Node *SelectionTree::commonAncestor() const {
   const Node *Ancestor = Root;

Modified: clang-tools-extra/trunk/clangd/Selection.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/Selection.h?rev=367453&r1=367452&r2=367453&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/Selection.h (original)
+++ clang-tools-extra/trunk/clangd/Selection.h Wed Jul 31 10:52:40 2019
@@ -35,6 +35,7 @@
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SELECTION_H
 #include "clang/AST/ASTTypeTraits.h"
 #include "clang/AST/PrettyPrinter.h"
+#include "clang/Tooling/Syntax/Tokens.h"
 #include "llvm/ADT/SmallVector.h"
 
 namespace clang {
@@ -56,8 +57,9 @@ class ParsedAST;
 //
 // SelectionTree tries to behave sensibly in the presence of macros, but does
 // not model any preprocessor concepts: the output is a subset of the AST.
-// Currently comments, directives etc are treated as part of the lexically
-// containing AST node, (though we may want to change this in future).
+//
+// Comments, directives and whitespace are completely ignored.
+// Semicolons are also ignored, as the AST generally does not model them well.
 //
 // The SelectionTree owns the Node structures, but the ASTNode attributes
 // point back into the AST it was constructed with.
@@ -66,11 +68,13 @@ public:
   // Creates a selection tree at the given byte offset in the main file.
   // This is approximately equivalent to a range of one character.
   // (Usually, the character to the right of Offset, sometimes to the left).
-  SelectionTree(ASTContext &AST, unsigned Offset);
+  SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens,
+                unsigned Offset);
   // Creates a selection tree for the given range in the main file.
   // The range includes bytes [Start, End).
   // If Start == End, uses the same heuristics as SelectionTree(AST, Start).
-  SelectionTree(ASTContext &AST, unsigned Start, unsigned End);
+  SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens,
+                unsigned Start, unsigned End);
 
   // Describes to what extent an AST node is covered by the selection.
   enum Selection {

Modified: clang-tools-extra/trunk/clangd/refactor/Tweak.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/refactor/Tweak.cpp?rev=367453&r1=367452&r2=367453&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/refactor/Tweak.cpp (original)
+++ clang-tools-extra/trunk/clangd/refactor/Tweak.cpp Wed Jul 31 10:52:40 2019
@@ -41,7 +41,7 @@ void validateRegistry() {
 Tweak::Selection::Selection(ParsedAST &AST, unsigned RangeBegin,
                             unsigned RangeEnd)
     : AST(AST), SelectionBegin(RangeBegin), SelectionEnd(RangeEnd),
-      ASTSelection(AST.getASTContext(), RangeBegin, RangeEnd) {
+      ASTSelection(AST.getASTContext(), AST.getTokens(), RangeBegin, RangeEnd) {
   auto &SM = AST.getSourceManager();
   Code = SM.getBufferData(SM.getMainFileID());
   Cursor = SM.getComposedLoc(SM.getMainFileID(), RangeBegin);

Modified: clang-tools-extra/trunk/clangd/unittests/SelectionTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/unittests/SelectionTests.cpp?rev=367453&r1=367452&r2=367453&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/unittests/SelectionTests.cpp (original)
+++ clang-tools-extra/trunk/clangd/unittests/SelectionTests.cpp Wed Jul 31 10:52:40 2019
@@ -23,16 +23,16 @@ SelectionTree makeSelectionTree(const St
   Annotations Test(MarkedCode);
   switch (Test.points().size()) {
   case 1: // Point selection.
-    return SelectionTree(AST.getASTContext(),
+    return SelectionTree(AST.getASTContext(), AST.getTokens(),
                          cantFail(positionToOffset(Test.code(), Test.point())));
   case 2: // Range selection.
     return SelectionTree(
-        AST.getASTContext(),
+        AST.getASTContext(), AST.getTokens(),
         cantFail(positionToOffset(Test.code(), Test.points()[0])),
         cantFail(positionToOffset(Test.code(), Test.points()[1])));
   default:
     ADD_FAILURE() << "Expected 1-2 points for selection.\n" << MarkedCode;
-    return SelectionTree(AST.getASTContext(), 0u, 0u);
+    return SelectionTree(AST.getASTContext(), AST.getTokens(), 0u, 0u);
   }
 }
 
@@ -219,6 +219,9 @@ TEST(SelectionTest, CommonAncestor) {
       {"void foo() { [[foo^]] (); }", "DeclRefExpr"},
       {"int bar; void foo() [[{ foo (); }]]^", "CompoundStmt"},
 
+      // Ignores whitespace, comments, and semicolons in the selection.
+      {"void foo() { [[foo^()]]; /*comment*/^}", "CallExpr"},
+
       // Tricky case: FunctionTypeLoc in FunctionDecl has a hole in it.
       {"[[^void]] foo();", "BuiltinTypeLoc"},
       {"[[void foo^()]];", "FunctionProtoTypeLoc"},




More information about the cfe-commits mailing list