[clang-tools-extra] 70914aa - Use pseudo parser for folding ranges

Mon Jul 18 02:35:43 PDT 2022

Author: Utkarsh Saxena
Date: 2022-07-18T11:35:34+02:00
New Revision: 70914aa631561aa9a29681bfe5159b4ea6952060

URL: https://github.com/llvm/llvm-project/commit/70914aa631561aa9a29681bfe5159b4ea6952060
DIFF: https://github.com/llvm/llvm-project/commit/70914aa631561aa9a29681bfe5159b4ea6952060.diff

LOG: Use pseudo parser for folding ranges

This first version only uses bracket matching. We plan to extend this to
use DirectiveTree as well.

Also includes changes to Token to allow retrieving corresponding token
in token stream of original source file.

Differential Revision: https://reviews.llvm.org/D129648

Added: 
    

Modified: 
    clang-tools-extra/clangd/CMakeLists.txt
    clang-tools-extra/clangd/SemanticSelection.cpp
    clang-tools-extra/clangd/SemanticSelection.h
    clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp
    clang-tools-extra/pseudo/include/clang-pseudo/Token.h
    clang-tools-extra/pseudo/lib/CMakeLists.txt
    clang-tools-extra/pseudo/lib/Lex.cpp
    clang-tools-extra/pseudo/unittests/TokenTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt
index 7cfbd6f95750e..de8f087a52a5e 100644

--- a/clang-tools-extra/clangd/CMakeLists.txt
+++ b/clang-tools-extra/clangd/CMakeLists.txt
@@ -170,6 +170,8 @@ target_link_libraries(clangDaemon
   clangTidy
 
   clangdSupport
+
+  clangPseudo
   )
 if(CLANGD_TIDY_CHECKS)
   target_link_libraries(clangDaemon PRIVATE ${ALL_CLANG_TIDY_CHECKS})

diff  --git a/clang-tools-extra/clangd/SemanticSelection.cpp b/clang-tools-extra/clangd/SemanticSelection.cpp
index f118f3ec04b08..affac26fec912 100644
--- a/clang-tools-extra/clangd/SemanticSelection.cpp
+++ b/clang-tools-extra/clangd/SemanticSelection.cpp
@@ -11,6 +11,9 @@
 #include "Protocol.h"
 #include "Selection.h"
 #include "SourceCode.h"
+#include "clang-pseudo/Bracket.h"
+#include "clang-pseudo/DirectiveTree.h"
+#include "clang-pseudo/Token.h"
 #include "clang/AST/DeclBase.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
@@ -170,5 +173,46 @@ llvm::Expected<std::vector<FoldingRange>> getFoldingRanges(ParsedAST &AST) {
   return collectFoldingRanges(SyntaxTree, TM);
 }
 
+// FIXME(kirillbobyrev): Collect comments, PP conditional regions, includes and
+// other code regions (e.g. public/private/protected sections of classes,
+// control flow statement bodies).
+// Related issue: https://github.com/clangd/clangd/issues/310
+llvm::Expected<std::vector<FoldingRange>>
+getFoldingRanges(const std::string &Code) {
+  auto OrigStream = clang::pseudo::lex(Code, clang::pseudo::genericLangOpts());
+
+  auto DirectiveStructure = clang::pseudo::DirectiveTree::parse(OrigStream);
+  clang::pseudo::chooseConditionalBranches(DirectiveStructure, OrigStream);
+
+  // FIXME: Provide ranges in the disabled-PP regions as well.
+  auto Preprocessed = DirectiveStructure.stripDirectives(OrigStream);
+
+  auto ParseableStream = cook(Preprocessed, clang::pseudo::genericLangOpts());
+  pseudo::pairBrackets(ParseableStream);
+
+  std::vector<FoldingRange> Result;
+  for (const auto &Tok : ParseableStream.tokens()) {
+    if (auto *Paired = Tok.pair()) {
+      // Process only token at the start of the range. Avoid ranges on a single
+      // line.
+      if (Tok.Line < Paired->Line) {
+        Position Start = offsetToPosition(
+            Code,
+            OrigStream.tokens()[Tok.OriginalIndex].text().data() - Code.data());
+        Position End = offsetToPosition(
+            Code, OrigStream.tokens()[Paired->OriginalIndex].text().data() -
+                      Code.data());
+        FoldingRange FR;
+        FR.startLine = Start.line;
+        FR.startCharacter = Start.character + 1;
+        FR.endLine = End.line;
+        FR.endCharacter = End.character;
+        Result.push_back(FR);
+      }
+    }
+  }
+  return Result;
+}
+
 } // namespace clangd
 } // namespace clang

diff  --git a/clang-tools-extra/clangd/SemanticSelection.h b/clang-tools-extra/clangd/SemanticSelection.h
index 2fe37871ec680..337d8d38a0e0d 100644
--- a/clang-tools-extra/clangd/SemanticSelection.h
+++ b/clang-tools-extra/clangd/SemanticSelection.h
@@ -15,6 +15,7 @@
 #include "ParsedAST.h"
 #include "Protocol.h"
 #include "llvm/Support/Error.h"
+#include <string>
 #include <vector>
 namespace clang {
 namespace clangd {
@@ -29,6 +30,11 @@ llvm::Expected<SelectionRange> getSemanticRanges(ParsedAST &AST, Position Pos);
 /// This should include large scopes, preprocessor blocks etc.
 llvm::Expected<std::vector<FoldingRange>> getFoldingRanges(ParsedAST &AST);
 
+/// Returns a list of ranges whose contents might be collapsible in an editor.
+/// This version uses the pseudoparser which does not require the AST.
+llvm::Expected<std::vector<FoldingRange>>
+getFoldingRanges(const std::string &Code);
+
 } // namespace clangd
 } // namespace clang
 

diff  --git a/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp b/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp
index 397494e80f9e8..a41553097fdfc 100644
--- a/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp
@@ -265,6 +265,86 @@ TEST(FoldingRanges, All) {
   }
 }
 
+TEST(FoldingRangesPseudoParser, All) {
+  const char *Tests[] = {
+      R"cpp(
+        #define FOO int foo() {\
+          int Variable = 42; \
+        }
+
+        // Do not generate folding range for braces within macro expansion.
+        FOO
+
+        // Do not generate folding range within macro arguments.
+        #define FUNCTOR(functor) functor
+        void func() {[[
+          FUNCTOR([](){});
+        ]]}
+
+        // Do not generate folding range with a brace coming from macro.
+        #define LBRACE {
+        void bar() LBRACE
+          int X = 42;
+        }
+      )cpp",
+      R"cpp(
+        void func() {[[
+          int Variable = 100;
+
+          if (Variable > 5) {[[
+            Variable += 42;
+          ]]} else if (Variable++)
+            ++Variable;
+          else {[[
+            Variable--;
+          ]]}
+
+          // Do not generate FoldingRange for empty CompoundStmts.
+          for (;;) {}
+
+          // If there are newlines between {}, we should generate one.
+          for (;;) {[[
+
+          ]]}
+        ]]}
+      )cpp",
+      R"cpp(
+        class Foo {[[
+        public:
+          Foo() {[[
+            int X = 1;
+          ]]}
+
+        private:
+          int getBar() {[[
+            return 42;
+          ]]}
+
+          // Braces are located at the same line: no folding range here.
+          void getFooBar() { }
+        ]]};
+      )cpp",
+      R"cpp(
+        // Range boundaries on escaped newlines.
+        class Foo \
+        \
+        {[[  \
+        public:
+          Foo() {[[\
+            int X = 1;
+          ]]}   \
+        ]]};
+      )cpp",
+  };
+  for (const char *Test : Tests) {
+    auto T = Annotations(Test);
+    EXPECT_THAT(
+        gatherFoldingRanges(llvm::cantFail(getFoldingRanges(T.code().str()))),
+        UnorderedElementsAreArray(T.ranges()))
+        << Test;
+  }
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang

diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/Token.h b/clang-tools-extra/pseudo/include/clang-pseudo/Token.h
index 36e5221a0d30c..e4a8659f739cf 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/Token.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/Token.h
@@ -67,6 +67,8 @@ struct Token {
   uint8_t Indent = 0;
   /// Flags have some meaning defined by the function that produced this stream.
   uint8_t Flags = 0;
+  /// Index into the original token stream (as raw-lexed from the source code).
+  Index OriginalIndex = Invalid;
   // Helpers to get/set Flags based on `enum class`.
   template <class T> bool flag(T Mask) const {
     return Flags & uint8_t{static_cast<std::underlying_type_t<T>>(Mask)};
@@ -96,7 +98,7 @@ struct Token {
   /// If this token is a paired bracket, the offset of the pair in the stream.
   int32_t Pair = 0;
 };
-static_assert(sizeof(Token) <= sizeof(char *) + 20, "Careful with layout!");
+static_assert(sizeof(Token) <= sizeof(char *) + 24, "Careful with layout!");
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Token &);
 
 /// A half-open range of tokens within a stream.

diff  --git a/clang-tools-extra/pseudo/lib/CMakeLists.txt b/clang-tools-extra/pseudo/lib/CMakeLists.txt
index efcf9267e7173..d517eef35503b 100644
--- a/clang-tools-extra/pseudo/lib/CMakeLists.txt
+++ b/clang-tools-extra/pseudo/lib/CMakeLists.txt
@@ -17,3 +17,7 @@ add_clang_library(clangPseudo
   clangLex
   clangPseudoGrammar
   )
+
+  target_include_directories(clangPseudo INTERFACE
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
+  )

diff  --git a/clang-tools-extra/pseudo/lib/Lex.cpp b/clang-tools-extra/pseudo/lib/Lex.cpp
index c96e2f27cba95..4b89ad017ef1f 100644
--- a/clang-tools-extra/pseudo/lib/Lex.cpp
+++ b/clang-tools-extra/pseudo/lib/Lex.cpp
@@ -26,6 +26,8 @@ TokenStream lex(const std::string &Code, const clang::LangOptions &LangOpts) {
 
   TokenStream Result;
   clang::Token CT;
+  // Index into the token stream of original source code.
+  Token::Index TokenIndex = 0;
   unsigned LastOffset = 0;
   unsigned Line = 0;
   unsigned Indent = 0;
@@ -66,6 +68,7 @@ TokenStream lex(const std::string &Code, const clang::LangOptions &LangOpts) {
     if (CT.needsCleaning() || CT.hasUCN())
       Tok.setFlag(LexFlags::NeedsCleaning);
 
+    Tok.OriginalIndex = TokenIndex++;
     Result.push(Tok);
     LastOffset = Offset;
   }

diff  --git a/clang-tools-extra/pseudo/unittests/TokenTest.cpp b/clang-tools-extra/pseudo/unittests/TokenTest.cpp
index 8280a9b29341e..5b71accfad50f 100644
--- a/clang-tools-extra/pseudo/unittests/TokenTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/TokenTest.cpp
@@ -31,6 +31,10 @@ MATCHER_P2(lineIndent, Line, Indent, "") {
   return arg.Line == (unsigned)Line && arg.Indent == (unsigned)Indent;
 }
 
+MATCHER_P(originalIndex, index, "") {
+  return arg.OriginalIndex == (Token::Index)index;
+}
+
 TEST(TokenTest, Lex) {
   LangOptions Opts;
   std::string Code = R"cpp(
@@ -105,20 +109,23 @@ tokens
       Raw.tokens(),
       ElementsAre(AllOf(token("one_\\\ntoken", tok::raw_identifier),
                         hasFlag(LexFlags::StartsPPLine),
-                        hasFlag(LexFlags::NeedsCleaning), lineIndent(1, 0)),
+                        hasFlag(LexFlags::NeedsCleaning), lineIndent(1, 0),
+                        originalIndex(0)),
                   AllOf(token("two", tok::raw_identifier),
                         hasFlag(LexFlags::StartsPPLine),
-                        Not(hasFlag(LexFlags::NeedsCleaning))),
+                        Not(hasFlag(LexFlags::NeedsCleaning)),
+                        originalIndex(1)),
                   AllOf(token("\\\ntokens", tok::raw_identifier),
                         Not(hasFlag(LexFlags::StartsPPLine)),
-                        hasFlag(LexFlags::NeedsCleaning))));
+                        hasFlag(LexFlags::NeedsCleaning), originalIndex(2))));
 
   TokenStream Cooked = cook(Raw, Opts);
   EXPECT_THAT(
       Cooked.tokens(),
-      ElementsAre(AllOf(token("one_token", tok::identifier), lineIndent(1, 0)),
-                  token("two", tok::identifier),
-                  token("tokens", tok::identifier)));
+      ElementsAre(AllOf(token("one_token", tok::identifier), lineIndent(1, 0),
+                        originalIndex(0)),
+                  AllOf(token("two", tok::identifier), originalIndex(1)),
+                  AllOf(token("tokens", tok::identifier), originalIndex(2))));
 }
 
 TEST(TokenTest, EncodedCharacters) {
@@ -182,13 +189,14 @@ TEST(TokenTest, SplitGreaterGreater) {
 )cpp";
   TokenStream Cook = cook(lex(Code, Opts), Opts);
   TokenStream Split = stripComments(Cook);
-  EXPECT_THAT(Split.tokens(), ElementsAreArray({
-                                  token(">", tok::greater),
-                                  token(">", tok::greater),
-                                  token(">", tok::greater),
-                                  token(">", tok::greater),
-                                  token(">>=", tok::greatergreaterequal),
-                              }));
+  EXPECT_THAT(Split.tokens(),
+              ElementsAre(AllOf(token(">", tok::greater), originalIndex(0)),
+                          AllOf(token(">", tok::greater), originalIndex(0)),
+                          // Token 1 and 2 are comments.
+                          AllOf(token(">", tok::greater), originalIndex(3)),
+                          AllOf(token(">", tok::greater), originalIndex(3)),
+                          AllOf(token(">>=", tok::greatergreaterequal),
+                                originalIndex(4))));
 }
 
 TEST(TokenTest, DropComments) {
@@ -199,13 +207,16 @@ TEST(TokenTest, DropComments) {
 )cpp";
   TokenStream Raw = cook(lex(Code, Opts), Opts);
   TokenStream Stripped = stripComments(Raw);
-  EXPECT_THAT(Raw.tokens(),
-              ElementsAreArray(
-                  {token("// comment", tok::comment), token("int", tok::kw_int),
-                   token("/*abc*/", tok::comment), token(";", tok::semi)}));
-
-  EXPECT_THAT(Stripped.tokens(), ElementsAreArray({token("int", tok::kw_int),
-                                                   token(";", tok::semi)}));
+  EXPECT_THAT(
+      Raw.tokens(),
+      ElementsAre(AllOf(token("// comment", tok::comment), originalIndex(0)),
+                  AllOf(token("int", tok::kw_int), originalIndex(1)),
+                  AllOf(token("/*abc*/", tok::comment), originalIndex(2)),
+                  AllOf(token(";", tok::semi), originalIndex(3))));
+
+  EXPECT_THAT(Stripped.tokens(),
+              ElementsAre(AllOf(token("int", tok::kw_int), originalIndex(1)),
+                          AllOf(token(";", tok::semi), originalIndex(3))));
 }
 
 } // namespace