[clang] e702bdb - [Syntax] Build SimpleDeclaration node that groups multiple declarators

Ilya Biryukov via cfe-commits cfe-commits at lists.llvm.org
Fri Nov 29 02:40:14 PST 2019


Author: Ilya Biryukov
Date: 2019-11-29T11:39:45+01:00
New Revision: e702bdb8598fcb4224f465569e7692a155c3eb3e

URL: https://github.com/llvm/llvm-project/commit/e702bdb8598fcb4224f465569e7692a155c3eb3e
DIFF: https://github.com/llvm/llvm-project/commit/e702bdb8598fcb4224f465569e7692a155c3eb3e.diff

LOG: [Syntax] Build SimpleDeclaration node that groups multiple declarators

Summary:
Also remove the temporary TopLevelDeclaration node and add
UnknownDeclaration to represent other unknown nodes.

See the follow-up change for building more top-level declarations.
Adding declarators is also pretty involved and will be done in another
follow-up patch.

Reviewers: gribozavr2

Reviewed By: gribozavr2

Subscribers: merge_guards_bot, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D70787

Added: 
    

Modified: 
    clang/include/clang/Tooling/Syntax/Nodes.h
    clang/lib/Tooling/Syntax/BuildTree.cpp
    clang/lib/Tooling/Syntax/Nodes.cpp
    clang/unittests/Tooling/Syntax/TreeTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h
index c40b6bd24817..c4db4da892c2 100644
--- a/clang/include/clang/Tooling/Syntax/Nodes.h
+++ b/clang/include/clang/Tooling/Syntax/Nodes.h
@@ -37,7 +37,6 @@ namespace syntax {
 enum class NodeKind : uint16_t {
   Leaf,
   TranslationUnit,
-  TopLevelDeclaration,
 
   // Expressions
   UnknownExpression,
@@ -57,7 +56,11 @@ enum class NodeKind : uint16_t {
   ReturnStatement,
   RangeBasedForStatement,
   ExpressionStatement,
-  CompoundStatement
+  CompoundStatement,
+
+  // Declarations
+  UnknownDeclaration,
+  SimpleDeclaration,
 };
 /// For debugging purposes.
 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, NodeKind K);
@@ -102,20 +105,6 @@ class TranslationUnit final : public Tree {
   }
 };
 
-/// FIXME: this node is temporary and will be replaced with nodes for various
-///        'declarations' and 'declarators' from the C/C++ grammar
-///
-/// Represents any top-level declaration. Only there to give the syntax tree a
-/// bit of structure until we implement syntax nodes for declarations and
-/// declarators.
-class TopLevelDeclaration final : public Tree {
-public:
-  TopLevelDeclaration() : Tree(NodeKind::TopLevelDeclaration) {}
-  static bool classof(const Node *N) {
-    return N->kind() == NodeKind::TopLevelDeclaration;
-  }
-};
-
 /// A base class for all expressions. Note that expressions are not statements,
 /// even though they are in clang.
 class Expression : public Tree {
@@ -313,6 +302,38 @@ class CompoundStatement final : public Statement {
   syntax::Leaf *rbrace();
 };
 
+/// A declaration that can appear at the top-level. Note that this does *not*
+/// correspond 1-to-1 to clang::Decl. Syntax trees distinguish between top-level
+/// declarations (e.g. namespace definitions) and declarators (e.g. variables,
+/// typedefs, etc.). Declarators are stored inside SimpleDeclaration.
+class Declaration : public Tree {
+public:
+  Declaration(NodeKind K) : Tree(K) {}
+  static bool classof(const Node *N) {
+    return NodeKind::UnknownDeclaration <= N->kind() &&
+           N->kind() <= NodeKind::SimpleDeclaration;
+  }
+};
+
+/// Declaration of an unknown kind, e.g. not yet supported in syntax trees.
+class UnknownDeclaration final : public Declaration {
+public:
+  UnknownDeclaration() : Declaration(NodeKind::UnknownDeclaration) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::UnknownDeclaration;
+  }
+};
+
+/// Groups multiple declarators (e.g. variables, typedefs, etc.) together. All
+/// grouped declarators share the same declaration specifiers (e.g. 'int' or
+/// 'typedef').
+class SimpleDeclaration final : public Declaration {
+public:
+  SimpleDeclaration() : Declaration(NodeKind::SimpleDeclaration) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::SimpleDeclaration;
+  }
+};
 } // namespace syntax
 } // namespace clang
 #endif

diff  --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp
index 22cdb89b7bfb..67081497d04c 100644
--- a/clang/lib/Tooling/Syntax/BuildTree.cpp
+++ b/clang/lib/Tooling/Syntax/BuildTree.cpp
@@ -6,6 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 #include "clang/Tooling/Syntax/BuildTree.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Stmt.h"
 #include "clang/Basic/LLVM.h"
@@ -56,6 +58,14 @@ class syntax::TreeBuilder {
   /// Range.
   void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New);
 
+  /// Must be called with the range of each `DeclaratorDecl`. Ensures the
+  /// corresponding declarator nodes are covered by `SimpleDeclaration`.
+  void noticeDeclaratorRange(llvm::ArrayRef<syntax::Token> Range);
+
+  /// Notifies that we should not consume trailing semicolon when computing
+  /// token range of \p D.
+  void noticeDeclaratorWithoutSemicolon(Decl *D);
+
   /// Mark the \p Child node with a corresponding \p Role. All marked children
   /// should be consumed by foldNode.
   /// (!) when called on expressions (clang::Expr is derived from clang::Stmt),
@@ -94,7 +104,14 @@ class syntax::TreeBuilder {
     return llvm::makeArrayRef(findToken(First), std::next(findToken(Last)));
   }
   llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const {
-    return getRange(D->getBeginLoc(), D->getEndLoc());
+    auto Tokens = getRange(D->getBeginLoc(), D->getEndLoc());
+    if (llvm::isa<NamespaceDecl>(D))
+      return Tokens;
+    if (DeclsWithoutSemicolons.count(D))
+      return Tokens;
+    // FIXME: do not consume trailing semicolon on function definitions.
+    // Most declarations own a semicolon in syntax trees, but not in clang AST.
+    return withTrailingSemicolon(Tokens);
   }
   llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const {
     return getRange(E->getBeginLoc(), E->getEndLoc());
@@ -108,14 +125,22 @@ class syntax::TreeBuilder {
 
     // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and
     // all statements that end with those. Consume this semicolon here.
-    //
-    // (!) statements never consume 'eof', so looking at the next token is ok.
+    if (Tokens.back().kind() == tok::semi)
+      return Tokens;
+    return withTrailingSemicolon(Tokens);
+  }
+
+private:
+  llvm::ArrayRef<syntax::Token>
+  withTrailingSemicolon(llvm::ArrayRef<syntax::Token> Tokens) const {
+    assert(!Tokens.empty());
+    assert(Tokens.back().kind() != tok::eof);
+    // (!) we never consume 'eof', so looking at the next token is ok.
     if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi)
       return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1);
     return Tokens;
   }
 
-private:
   /// Finds a token starting at \p L. The token must exist.
   const syntax::Token *findToken(SourceLocation L) const;
 
@@ -136,6 +161,8 @@ class syntax::TreeBuilder {
                      {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}});
     }
 
+    ~Forest() { assert(DelayedFolds.empty()); }
+
     void assignRole(llvm::ArrayRef<syntax::Token> Range,
                     syntax::NodeRole Role) {
       assert(!Range.empty());
@@ -148,30 +175,46 @@ class syntax::TreeBuilder {
       It->second.Role = Role;
     }
 
-    /// Add \p Node to the forest and fill its children nodes based on the \p
-    /// NodeRange.
-    void foldChildren(llvm::ArrayRef<syntax::Token> NodeTokens,
+    /// Add \p Node to the forest and attach child nodes based on \p Tokens.
+    void foldChildren(llvm::ArrayRef<syntax::Token> Tokens,
                       syntax::Tree *Node) {
-      assert(!NodeTokens.empty());
-      assert(Node->firstChild() == nullptr && "node already has children");
-
-      auto *FirstToken = NodeTokens.begin();
-      auto BeginChildren = Trees.lower_bound(FirstToken);
-      assert(BeginChildren != Trees.end() &&
-             BeginChildren->first == FirstToken &&
-             "fold crosses boundaries of existing subtrees");
-      auto EndChildren = Trees.lower_bound(NodeTokens.end());
-      assert((EndChildren == Trees.end() ||
-              EndChildren->first == NodeTokens.end()) &&
-             "fold crosses boundaries of existing subtrees");
+      // Execute delayed folds inside `Tokens`.
+      auto BeginExecuted = DelayedFolds.lower_bound(Tokens.begin());
+      auto It = BeginExecuted;
+      for (; It != DelayedFolds.end() && It->second.End <= Tokens.end(); ++It)
+        foldChildrenEager(llvm::makeArrayRef(It->first, It->second.End),
+                          It->second.Node);
+      DelayedFolds.erase(BeginExecuted, It);
+
+      // Attach children to `Node`.
+      foldChildrenEager(Tokens, Node);
+    }
 
-      // (!) we need to go in reverse order, because we can only prepend.
-      for (auto It = EndChildren; It != BeginChildren; --It)
-        Node->prependChildLowLevel(std::prev(It)->second.Node,
-                                   std::prev(It)->second.Role);
+    /// Schedule a call to `foldChildren` that will only be executed when
+    /// containing node is folded. The range of delayed nodes can be extended by
+    /// calling `extendDelayedFold`. Only one delayed node for each starting
+    /// token is allowed.
+    void foldChildrenDelayed(llvm::ArrayRef<syntax::Token> Tokens,
+                             syntax::Tree *Node) {
+      assert(!Tokens.empty());
+      bool Inserted =
+          DelayedFolds.insert({Tokens.begin(), DelayedFold{Tokens.end(), Node}})
+              .second;
+      (void)Inserted;
+      assert(Inserted && "Multiple delayed folds start at the same token");
+    }
 
-      Trees.erase(BeginChildren, EndChildren);
-      Trees.insert({FirstToken, NodeAndRole(Node)});
+    /// If there a delayed fold, starting at `ExtendedRange.begin()`, extends
+    /// its endpoint to `ExtendedRange.end()` and returns true.
+    /// Otherwise, returns false.
+    bool extendDelayedFold(llvm::ArrayRef<syntax::Token> ExtendedRange) {
+      assert(!ExtendedRange.empty());
+      auto It = DelayedFolds.find(ExtendedRange.data());
+      if (It == DelayedFolds.end())
+        return false;
+      assert(It->second.End <= ExtendedRange.end());
+      It->second.End = ExtendedRange.end();
+      return true;
     }
 
     // EXPECTS: all tokens were consumed and are owned by a single root node.
@@ -199,6 +242,30 @@ class syntax::TreeBuilder {
     }
 
   private:
+    /// Implementation detail of `foldChildren`, does acutal folding ignoring
+    /// delayed folds.
+    void foldChildrenEager(llvm::ArrayRef<syntax::Token> Tokens,
+                           syntax::Tree *Node) {
+      assert(Node->firstChild() == nullptr && "node already has children");
+
+      auto *FirstToken = Tokens.begin();
+      auto BeginChildren = Trees.lower_bound(FirstToken);
+      assert((BeginChildren == Trees.end() ||
+              BeginChildren->first == FirstToken) &&
+             "fold crosses boundaries of existing subtrees");
+      auto EndChildren = Trees.lower_bound(Tokens.end());
+      assert(
+          (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) &&
+          "fold crosses boundaries of existing subtrees");
+
+      // (!) we need to go in reverse order, because we can only prepend.
+      for (auto It = EndChildren; It != BeginChildren; --It)
+        Node->prependChildLowLevel(std::prev(It)->second.Node,
+                                   std::prev(It)->second.Role);
+
+      Trees.erase(BeginChildren, EndChildren);
+      Trees.insert({FirstToken, NodeAndRole(Node)});
+    }
     /// A with a role that should be assigned to it when adding to a parent.
     struct NodeAndRole {
       explicit NodeAndRole(syntax::Node *Node)
@@ -214,6 +281,13 @@ class syntax::TreeBuilder {
     /// FIXME: storing the end tokens is redundant.
     /// FIXME: the key of a map is redundant, it is also stored in NodeForRange.
     std::map<const syntax::Token *, NodeAndRole> Trees;
+
+    /// See documentation of `foldChildrenDelayed` for details.
+    struct DelayedFold {
+      const syntax::Token *End = nullptr;
+      syntax::Tree *Node = nullptr;
+    };
+    std::map<const syntax::Token *, DelayedFold> DelayedFolds;
   };
 
   /// For debugging purposes.
@@ -221,6 +295,7 @@ class syntax::TreeBuilder {
 
   syntax::Arena &Arena;
   Forest Pending;
+  llvm::DenseSet<Decl*> DeclsWithoutSemicolons;
 };
 
 namespace {
@@ -231,20 +306,30 @@ class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
 
   bool shouldTraversePostOrder() const { return true; }
 
-  bool TraverseDecl(Decl *D) {
-    if (!D || isa<TranslationUnitDecl>(D))
-      return RecursiveASTVisitor::TraverseDecl(D);
-    if (!llvm::isa<TranslationUnitDecl>(D->getDeclContext()))
-      return true; // Only build top-level decls for now, do not recurse.
-    return RecursiveASTVisitor::TraverseDecl(D);
+  bool WalkUpFromDeclaratorDecl(DeclaratorDecl *D) {
+    // Ensure declarators are covered by SimpleDeclaration.
+    Builder.noticeDeclaratorRange(Builder.getRange(D));
+    // FIXME: build nodes for the declarator too.
+    return true;
+  }
+  bool WalkUpFromTypedefNameDecl(TypedefNameDecl *D) {
+    // Also a declarator.
+    Builder.noticeDeclaratorRange(Builder.getRange(D));
+    // FIXME: build nodes for the declarator too.
+    return true;
   }
 
   bool VisitDecl(Decl *D) {
-    assert(llvm::isa<TranslationUnitDecl>(D->getDeclContext()) &&
-           "expected a top-level decl");
     assert(!D->isImplicit());
     Builder.foldNode(Builder.getRange(D),
-                     new (allocator()) syntax::TopLevelDeclaration());
+                     new (allocator()) syntax::UnknownDeclaration());
+    return true;
+  }
+
+  bool WalkUpFromTagDecl(TagDecl *C) {
+    // Avoid building UnknownDeclaration here, syntatically 'struct X {}' and
+    // similar are part of declaration specifiers and do not introduce a new
+    // top-level declaration.
     return true;
   }
 
@@ -291,7 +376,11 @@ class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
   }
 
   bool TraverseStmt(Stmt *S) {
-    if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) {
+    if (auto *DS = llvm::dyn_cast_or_null<DeclStmt>(S)) {
+      // We want to consume the semicolon, make sure SimpleDeclaration does not.
+      for (auto *D : DS->decls())
+        Builder.noticeDeclaratorWithoutSemicolon(D);
+    } else if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) {
       // (!) do not recurse into subexpressions.
       // we do not have syntax trees for expressions yet, so we only want to see
       // the first top-level expression.
@@ -429,6 +518,18 @@ void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range,
   Pending.foldChildren(Range, New);
 }
 
+void syntax::TreeBuilder::noticeDeclaratorRange(
+    llvm::ArrayRef<syntax::Token> Range) {
+  if (Pending.extendDelayedFold(Range))
+    return;
+  Pending.foldChildrenDelayed(Range,
+                              new (allocator()) syntax::SimpleDeclaration);
+}
+
+void syntax::TreeBuilder::noticeDeclaratorWithoutSemicolon(Decl *D) {
+  DeclsWithoutSemicolons.insert(D);
+}
+
 void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) {
   if (Loc.isInvalid())
     return;

diff  --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp
index 776330ab585f..b2ed4ffa22c2 100644
--- a/clang/lib/Tooling/Syntax/Nodes.cpp
+++ b/clang/lib/Tooling/Syntax/Nodes.cpp
@@ -16,8 +16,6 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) {
     return OS << "Leaf";
   case NodeKind::TranslationUnit:
     return OS << "TranslationUnit";
-  case NodeKind::TopLevelDeclaration:
-    return OS << "TopLevelDeclaration";
   case NodeKind::UnknownExpression:
     return OS << "UnknownExpression";
   case NodeKind::UnknownStatement:
@@ -50,6 +48,10 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) {
     return OS << "ExpressionStatement";
   case NodeKind::CompoundStatement:
     return OS << "CompoundStatement";
+  case NodeKind::UnknownDeclaration:
+    return OS << "UnknownDeclaration";
+  case NodeKind::SimpleDeclaration:
+    return OS << "SimpleDeclaration";
   }
   llvm_unreachable("unknown node kind");
 }

diff  --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp
index c8be48b1361d..3d30a074ddd8 100644
--- a/clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -130,7 +130,7 @@ void foo() {}
     )cpp",
           R"txt(
 *: TranslationUnit
-|-TopLevelDeclaration
+|-SimpleDeclaration
 | |-int
 | |-main
 | |-(
@@ -138,7 +138,7 @@ void foo() {}
 | `-CompoundStatement
 |   |-{
 |   `-}
-`-TopLevelDeclaration
+`-SimpleDeclaration
   |-void
   |-foo
   |-(
@@ -157,7 +157,7 @@ int main() {
         )cpp",
           R"txt(
 *: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
   |-int
   |-main
   |-(
@@ -202,7 +202,7 @@ void test() {
 )cpp",
        R"txt(
 *: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
   |-void
   |-test
   |-(
@@ -224,7 +224,7 @@ void test() {
       {"void test() { int a = 10; }",
        R"txt(
 *: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
   |-void
   |-test
   |-(
@@ -232,16 +232,18 @@ void test() {
   `-CompoundStatement
     |-{
     |-DeclarationStatement
-    | |-int
-    | |-a
-    | |-=
-    | |-10
+    | |-SimpleDeclaration
+    | | |-int
+    | | |-a
+    | | |-=
+    | | `-UnknownExpression
+    | |   `-10
     | `-;
     `-}
 )txt"},
       {"void test() { ; }", R"txt(
 *: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
   |-void
   |-test
   |-(
@@ -263,7 +265,7 @@ void test() {
 )cpp",
        R"txt(
 *: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
   |-void
   |-test
   |-(
@@ -299,7 +301,7 @@ void test() {
 )cpp",
        R"txt(
 *: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
   |-void
   |-test
   |-(
@@ -329,7 +331,7 @@ int test() { return 1; }
       )cpp",
        R"txt(
 *: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
   |-int
   |-test
   |-(
@@ -352,7 +354,7 @@ void test() {
       )cpp",
        R"txt(
 *: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
   |-void
   |-test
   |-(
@@ -360,18 +362,21 @@ void test() {
   `-CompoundStatement
     |-{
     |-DeclarationStatement
-    | |-int
-    | |-a
-    | |-[
-    | |-3
-    | |-]
+    | |-SimpleDeclaration
+    | | |-int
+    | | |-a
+    | | |-[
+    | | |-UnknownExpression
+    | | | `-3
+    | | `-]
     | `-;
     |-RangeBasedForStatement
     | |-for
     | |-(
-    | |-int
-    | |-x
-    | |-:
+    | |-SimpleDeclaration
+    | | |-int
+    | | |-x
+    | | `-:
     | |-UnknownExpression
     | | `-a
     | |-)
@@ -384,7 +389,7 @@ void test() {
       // counterpart.
       {"void main() { foo: return 100; }", R"txt(
 *: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
   |-void
   |-main
   |-(
@@ -411,7 +416,7 @@ void test() {
     )cpp",
        R"txt(
 *: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
   |-void
   |-test
   |-(
@@ -444,7 +449,70 @@ void test() {
     |   | `-)
     |   `-;
     `-}
-)txt"}};
+)txt"},
+      // Multiple declarators group into a single SimpleDeclaration.
+      {R"cpp(
+      int *a, b;
+  )cpp",
+       R"txt(
+*: TranslationUnit
+`-SimpleDeclaration
+  |-int
+  |-*
+  |-a
+  |-,
+  |-b
+  `-;
+  )txt"},
+      {R"cpp(
+    typedef int *a, b;
+  )cpp",
+       R"txt(
+*: TranslationUnit
+`-SimpleDeclaration
+  |-typedef
+  |-int
+  |-*
+  |-a
+  |-,
+  |-b
+  `-;
+  )txt"},
+      // Multiple declarators inside a statement.
+      {R"cpp(
+void foo() {
+      int *a, b;
+      typedef int *ta, tb;
+}
+  )cpp",
+       R"txt(
+*: TranslationUnit
+`-SimpleDeclaration
+  |-void
+  |-foo
+  |-(
+  |-)
+  `-CompoundStatement
+    |-{
+    |-DeclarationStatement
+    | |-SimpleDeclaration
+    | | |-int
+    | | |-*
+    | | |-a
+    | | |-,
+    | | `-b
+    | `-;
+    |-DeclarationStatement
+    | |-SimpleDeclaration
+    | | |-typedef
+    | | |-int
+    | | |-*
+    | | |-ta
+    | | |-,
+    | | `-tb
+    | `-;
+    `-}
+  )txt"}};
 
   for (const auto &T : Cases) {
     auto *Root = buildTree(T.first);


        


More information about the cfe-commits mailing list