[clang] e702bdb - [Syntax] Build SimpleDeclaration node that groups multiple declarators
Ilya Biryukov via cfe-commits
cfe-commits at lists.llvm.org
Fri Nov 29 02:40:14 PST 2019
Author: Ilya Biryukov
Date: 2019-11-29T11:39:45+01:00
New Revision: e702bdb8598fcb4224f465569e7692a155c3eb3e
URL: https://github.com/llvm/llvm-project/commit/e702bdb8598fcb4224f465569e7692a155c3eb3e
DIFF: https://github.com/llvm/llvm-project/commit/e702bdb8598fcb4224f465569e7692a155c3eb3e.diff
LOG: [Syntax] Build SimpleDeclaration node that groups multiple declarators
Summary:
Also remove the temporary TopLevelDeclaration node and add
UnknownDeclaration to represent other unknown nodes.
See the follow-up change for building more top-level declarations.
Adding declarators is also pretty involved and will be done in another
follow-up patch.
Reviewers: gribozavr2
Reviewed By: gribozavr2
Subscribers: merge_guards_bot, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D70787
Added:
Modified:
clang/include/clang/Tooling/Syntax/Nodes.h
clang/lib/Tooling/Syntax/BuildTree.cpp
clang/lib/Tooling/Syntax/Nodes.cpp
clang/unittests/Tooling/Syntax/TreeTest.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h
index c40b6bd24817..c4db4da892c2 100644
--- a/clang/include/clang/Tooling/Syntax/Nodes.h
+++ b/clang/include/clang/Tooling/Syntax/Nodes.h
@@ -37,7 +37,6 @@ namespace syntax {
enum class NodeKind : uint16_t {
Leaf,
TranslationUnit,
- TopLevelDeclaration,
// Expressions
UnknownExpression,
@@ -57,7 +56,11 @@ enum class NodeKind : uint16_t {
ReturnStatement,
RangeBasedForStatement,
ExpressionStatement,
- CompoundStatement
+ CompoundStatement,
+
+ // Declarations
+ UnknownDeclaration,
+ SimpleDeclaration,
};
/// For debugging purposes.
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, NodeKind K);
@@ -102,20 +105,6 @@ class TranslationUnit final : public Tree {
}
};
-/// FIXME: this node is temporary and will be replaced with nodes for various
-/// 'declarations' and 'declarators' from the C/C++ grammar
-///
-/// Represents any top-level declaration. Only there to give the syntax tree a
-/// bit of structure until we implement syntax nodes for declarations and
-/// declarators.
-class TopLevelDeclaration final : public Tree {
-public:
- TopLevelDeclaration() : Tree(NodeKind::TopLevelDeclaration) {}
- static bool classof(const Node *N) {
- return N->kind() == NodeKind::TopLevelDeclaration;
- }
-};
-
/// A base class for all expressions. Note that expressions are not statements,
/// even though they are in clang.
class Expression : public Tree {
@@ -313,6 +302,38 @@ class CompoundStatement final : public Statement {
syntax::Leaf *rbrace();
};
+/// A declaration that can appear at the top-level. Note that this does *not*
+/// correspond 1-to-1 to clang::Decl. Syntax trees distinguish between top-level
+/// declarations (e.g. namespace definitions) and declarators (e.g. variables,
+/// typedefs, etc.). Declarators are stored inside SimpleDeclaration.
+class Declaration : public Tree {
+public:
+ Declaration(NodeKind K) : Tree(K) {}
+ static bool classof(const Node *N) {
+ return NodeKind::UnknownDeclaration <= N->kind() &&
+ N->kind() <= NodeKind::SimpleDeclaration;
+ }
+};
+
+/// Declaration of an unknown kind, e.g. not yet supported in syntax trees.
+class UnknownDeclaration final : public Declaration {
+public:
+ UnknownDeclaration() : Declaration(NodeKind::UnknownDeclaration) {}
+ static bool classof(const Node *N) {
+ return N->kind() == NodeKind::UnknownDeclaration;
+ }
+};
+
+/// Groups multiple declarators (e.g. variables, typedefs, etc.) together. All
+/// grouped declarators share the same declaration specifiers (e.g. 'int' or
+/// 'typedef').
+class SimpleDeclaration final : public Declaration {
+public:
+ SimpleDeclaration() : Declaration(NodeKind::SimpleDeclaration) {}
+ static bool classof(const Node *N) {
+ return N->kind() == NodeKind::SimpleDeclaration;
+ }
+};
} // namespace syntax
} // namespace clang
#endif
diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp
index 22cdb89b7bfb..67081497d04c 100644
--- a/clang/lib/Tooling/Syntax/BuildTree.cpp
+++ b/clang/lib/Tooling/Syntax/BuildTree.cpp
@@ -6,6 +6,8 @@
//
//===----------------------------------------------------------------------===//
#include "clang/Tooling/Syntax/BuildTree.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclBase.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/Stmt.h"
#include "clang/Basic/LLVM.h"
@@ -56,6 +58,14 @@ class syntax::TreeBuilder {
/// Range.
void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New);
+ /// Must be called with the range of each `DeclaratorDecl`. Ensures the
+ /// corresponding declarator nodes are covered by `SimpleDeclaration`.
+ void noticeDeclaratorRange(llvm::ArrayRef<syntax::Token> Range);
+
+ /// Notifies that we should not consume trailing semicolon when computing
+ /// token range of \p D.
+ void noticeDeclaratorWithoutSemicolon(Decl *D);
+
/// Mark the \p Child node with a corresponding \p Role. All marked children
/// should be consumed by foldNode.
/// (!) when called on expressions (clang::Expr is derived from clang::Stmt),
@@ -94,7 +104,14 @@ class syntax::TreeBuilder {
return llvm::makeArrayRef(findToken(First), std::next(findToken(Last)));
}
llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const {
- return getRange(D->getBeginLoc(), D->getEndLoc());
+ auto Tokens = getRange(D->getBeginLoc(), D->getEndLoc());
+ if (llvm::isa<NamespaceDecl>(D))
+ return Tokens;
+ if (DeclsWithoutSemicolons.count(D))
+ return Tokens;
+ // FIXME: do not consume trailing semicolon on function definitions.
+ // Most declarations own a semicolon in syntax trees, but not in clang AST.
+ return withTrailingSemicolon(Tokens);
}
llvm::ArrayRef<syntax::Token> getExprRange(const Expr *E) const {
return getRange(E->getBeginLoc(), E->getEndLoc());
@@ -108,14 +125,22 @@ class syntax::TreeBuilder {
// Some statements miss a trailing semicolon, e.g. 'return', 'continue' and
// all statements that end with those. Consume this semicolon here.
- //
- // (!) statements never consume 'eof', so looking at the next token is ok.
+ if (Tokens.back().kind() == tok::semi)
+ return Tokens;
+ return withTrailingSemicolon(Tokens);
+ }
+
+private:
+ llvm::ArrayRef<syntax::Token>
+ withTrailingSemicolon(llvm::ArrayRef<syntax::Token> Tokens) const {
+ assert(!Tokens.empty());
+ assert(Tokens.back().kind() != tok::eof);
+ // (!) we never consume 'eof', so looking at the next token is ok.
if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi)
return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1);
return Tokens;
}
-private:
/// Finds a token starting at \p L. The token must exist.
const syntax::Token *findToken(SourceLocation L) const;
@@ -136,6 +161,8 @@ class syntax::TreeBuilder {
{&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}});
}
+ ~Forest() { assert(DelayedFolds.empty()); }
+
void assignRole(llvm::ArrayRef<syntax::Token> Range,
syntax::NodeRole Role) {
assert(!Range.empty());
@@ -148,30 +175,46 @@ class syntax::TreeBuilder {
It->second.Role = Role;
}
- /// Add \p Node to the forest and fill its children nodes based on the \p
- /// NodeRange.
- void foldChildren(llvm::ArrayRef<syntax::Token> NodeTokens,
+ /// Add \p Node to the forest and attach child nodes based on \p Tokens.
+ void foldChildren(llvm::ArrayRef<syntax::Token> Tokens,
syntax::Tree *Node) {
- assert(!NodeTokens.empty());
- assert(Node->firstChild() == nullptr && "node already has children");
-
- auto *FirstToken = NodeTokens.begin();
- auto BeginChildren = Trees.lower_bound(FirstToken);
- assert(BeginChildren != Trees.end() &&
- BeginChildren->first == FirstToken &&
- "fold crosses boundaries of existing subtrees");
- auto EndChildren = Trees.lower_bound(NodeTokens.end());
- assert((EndChildren == Trees.end() ||
- EndChildren->first == NodeTokens.end()) &&
- "fold crosses boundaries of existing subtrees");
+ // Execute delayed folds inside `Tokens`.
+ auto BeginExecuted = DelayedFolds.lower_bound(Tokens.begin());
+ auto It = BeginExecuted;
+ for (; It != DelayedFolds.end() && It->second.End <= Tokens.end(); ++It)
+ foldChildrenEager(llvm::makeArrayRef(It->first, It->second.End),
+ It->second.Node);
+ DelayedFolds.erase(BeginExecuted, It);
+
+ // Attach children to `Node`.
+ foldChildrenEager(Tokens, Node);
+ }
- // (!) we need to go in reverse order, because we can only prepend.
- for (auto It = EndChildren; It != BeginChildren; --It)
- Node->prependChildLowLevel(std::prev(It)->second.Node,
- std::prev(It)->second.Role);
+ /// Schedule a call to `foldChildren` that will only be executed when
+ /// containing node is folded. The range of delayed nodes can be extended by
+ /// calling `extendDelayedFold`. Only one delayed node for each starting
+ /// token is allowed.
+ void foldChildrenDelayed(llvm::ArrayRef<syntax::Token> Tokens,
+ syntax::Tree *Node) {
+ assert(!Tokens.empty());
+ bool Inserted =
+ DelayedFolds.insert({Tokens.begin(), DelayedFold{Tokens.end(), Node}})
+ .second;
+ (void)Inserted;
+ assert(Inserted && "Multiple delayed folds start at the same token");
+ }
- Trees.erase(BeginChildren, EndChildren);
- Trees.insert({FirstToken, NodeAndRole(Node)});
+ /// If there a delayed fold, starting at `ExtendedRange.begin()`, extends
+ /// its endpoint to `ExtendedRange.end()` and returns true.
+ /// Otherwise, returns false.
+ bool extendDelayedFold(llvm::ArrayRef<syntax::Token> ExtendedRange) {
+ assert(!ExtendedRange.empty());
+ auto It = DelayedFolds.find(ExtendedRange.data());
+ if (It == DelayedFolds.end())
+ return false;
+ assert(It->second.End <= ExtendedRange.end());
+ It->second.End = ExtendedRange.end();
+ return true;
}
// EXPECTS: all tokens were consumed and are owned by a single root node.
@@ -199,6 +242,30 @@ class syntax::TreeBuilder {
}
private:
+ /// Implementation detail of `foldChildren`, does acutal folding ignoring
+ /// delayed folds.
+ void foldChildrenEager(llvm::ArrayRef<syntax::Token> Tokens,
+ syntax::Tree *Node) {
+ assert(Node->firstChild() == nullptr && "node already has children");
+
+ auto *FirstToken = Tokens.begin();
+ auto BeginChildren = Trees.lower_bound(FirstToken);
+ assert((BeginChildren == Trees.end() ||
+ BeginChildren->first == FirstToken) &&
+ "fold crosses boundaries of existing subtrees");
+ auto EndChildren = Trees.lower_bound(Tokens.end());
+ assert(
+ (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) &&
+ "fold crosses boundaries of existing subtrees");
+
+ // (!) we need to go in reverse order, because we can only prepend.
+ for (auto It = EndChildren; It != BeginChildren; --It)
+ Node->prependChildLowLevel(std::prev(It)->second.Node,
+ std::prev(It)->second.Role);
+
+ Trees.erase(BeginChildren, EndChildren);
+ Trees.insert({FirstToken, NodeAndRole(Node)});
+ }
/// A with a role that should be assigned to it when adding to a parent.
struct NodeAndRole {
explicit NodeAndRole(syntax::Node *Node)
@@ -214,6 +281,13 @@ class syntax::TreeBuilder {
/// FIXME: storing the end tokens is redundant.
/// FIXME: the key of a map is redundant, it is also stored in NodeForRange.
std::map<const syntax::Token *, NodeAndRole> Trees;
+
+ /// See documentation of `foldChildrenDelayed` for details.
+ struct DelayedFold {
+ const syntax::Token *End = nullptr;
+ syntax::Tree *Node = nullptr;
+ };
+ std::map<const syntax::Token *, DelayedFold> DelayedFolds;
};
/// For debugging purposes.
@@ -221,6 +295,7 @@ class syntax::TreeBuilder {
syntax::Arena &Arena;
Forest Pending;
+ llvm::DenseSet<Decl*> DeclsWithoutSemicolons;
};
namespace {
@@ -231,20 +306,30 @@ class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
bool shouldTraversePostOrder() const { return true; }
- bool TraverseDecl(Decl *D) {
- if (!D || isa<TranslationUnitDecl>(D))
- return RecursiveASTVisitor::TraverseDecl(D);
- if (!llvm::isa<TranslationUnitDecl>(D->getDeclContext()))
- return true; // Only build top-level decls for now, do not recurse.
- return RecursiveASTVisitor::TraverseDecl(D);
+ bool WalkUpFromDeclaratorDecl(DeclaratorDecl *D) {
+ // Ensure declarators are covered by SimpleDeclaration.
+ Builder.noticeDeclaratorRange(Builder.getRange(D));
+ // FIXME: build nodes for the declarator too.
+ return true;
+ }
+ bool WalkUpFromTypedefNameDecl(TypedefNameDecl *D) {
+ // Also a declarator.
+ Builder.noticeDeclaratorRange(Builder.getRange(D));
+ // FIXME: build nodes for the declarator too.
+ return true;
}
bool VisitDecl(Decl *D) {
- assert(llvm::isa<TranslationUnitDecl>(D->getDeclContext()) &&
- "expected a top-level decl");
assert(!D->isImplicit());
Builder.foldNode(Builder.getRange(D),
- new (allocator()) syntax::TopLevelDeclaration());
+ new (allocator()) syntax::UnknownDeclaration());
+ return true;
+ }
+
+ bool WalkUpFromTagDecl(TagDecl *C) {
+ // Avoid building UnknownDeclaration here, syntatically 'struct X {}' and
+ // similar are part of declaration specifiers and do not introduce a new
+ // top-level declaration.
return true;
}
@@ -291,7 +376,11 @@ class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
}
bool TraverseStmt(Stmt *S) {
- if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) {
+ if (auto *DS = llvm::dyn_cast_or_null<DeclStmt>(S)) {
+ // We want to consume the semicolon, make sure SimpleDeclaration does not.
+ for (auto *D : DS->decls())
+ Builder.noticeDeclaratorWithoutSemicolon(D);
+ } else if (auto *E = llvm::dyn_cast_or_null<Expr>(S)) {
// (!) do not recurse into subexpressions.
// we do not have syntax trees for expressions yet, so we only want to see
// the first top-level expression.
@@ -429,6 +518,18 @@ void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range,
Pending.foldChildren(Range, New);
}
+void syntax::TreeBuilder::noticeDeclaratorRange(
+ llvm::ArrayRef<syntax::Token> Range) {
+ if (Pending.extendDelayedFold(Range))
+ return;
+ Pending.foldChildrenDelayed(Range,
+ new (allocator()) syntax::SimpleDeclaration);
+}
+
+void syntax::TreeBuilder::noticeDeclaratorWithoutSemicolon(Decl *D) {
+ DeclsWithoutSemicolons.insert(D);
+}
+
void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) {
if (Loc.isInvalid())
return;
diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp
index 776330ab585f..b2ed4ffa22c2 100644
--- a/clang/lib/Tooling/Syntax/Nodes.cpp
+++ b/clang/lib/Tooling/Syntax/Nodes.cpp
@@ -16,8 +16,6 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) {
return OS << "Leaf";
case NodeKind::TranslationUnit:
return OS << "TranslationUnit";
- case NodeKind::TopLevelDeclaration:
- return OS << "TopLevelDeclaration";
case NodeKind::UnknownExpression:
return OS << "UnknownExpression";
case NodeKind::UnknownStatement:
@@ -50,6 +48,10 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) {
return OS << "ExpressionStatement";
case NodeKind::CompoundStatement:
return OS << "CompoundStatement";
+ case NodeKind::UnknownDeclaration:
+ return OS << "UnknownDeclaration";
+ case NodeKind::SimpleDeclaration:
+ return OS << "SimpleDeclaration";
}
llvm_unreachable("unknown node kind");
}
diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp
index c8be48b1361d..3d30a074ddd8 100644
--- a/clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -130,7 +130,7 @@ void foo() {}
)cpp",
R"txt(
*: TranslationUnit
-|-TopLevelDeclaration
+|-SimpleDeclaration
| |-int
| |-main
| |-(
@@ -138,7 +138,7 @@ void foo() {}
| `-CompoundStatement
| |-{
| `-}
-`-TopLevelDeclaration
+`-SimpleDeclaration
|-void
|-foo
|-(
@@ -157,7 +157,7 @@ int main() {
)cpp",
R"txt(
*: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
|-int
|-main
|-(
@@ -202,7 +202,7 @@ void test() {
)cpp",
R"txt(
*: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
|-void
|-test
|-(
@@ -224,7 +224,7 @@ void test() {
{"void test() { int a = 10; }",
R"txt(
*: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
|-void
|-test
|-(
@@ -232,16 +232,18 @@ void test() {
`-CompoundStatement
|-{
|-DeclarationStatement
- | |-int
- | |-a
- | |-=
- | |-10
+ | |-SimpleDeclaration
+ | | |-int
+ | | |-a
+ | | |-=
+ | | `-UnknownExpression
+ | | `-10
| `-;
`-}
)txt"},
{"void test() { ; }", R"txt(
*: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
|-void
|-test
|-(
@@ -263,7 +265,7 @@ void test() {
)cpp",
R"txt(
*: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
|-void
|-test
|-(
@@ -299,7 +301,7 @@ void test() {
)cpp",
R"txt(
*: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
|-void
|-test
|-(
@@ -329,7 +331,7 @@ int test() { return 1; }
)cpp",
R"txt(
*: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
|-int
|-test
|-(
@@ -352,7 +354,7 @@ void test() {
)cpp",
R"txt(
*: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
|-void
|-test
|-(
@@ -360,18 +362,21 @@ void test() {
`-CompoundStatement
|-{
|-DeclarationStatement
- | |-int
- | |-a
- | |-[
- | |-3
- | |-]
+ | |-SimpleDeclaration
+ | | |-int
+ | | |-a
+ | | |-[
+ | | |-UnknownExpression
+ | | | `-3
+ | | `-]
| `-;
|-RangeBasedForStatement
| |-for
| |-(
- | |-int
- | |-x
- | |-:
+ | |-SimpleDeclaration
+ | | |-int
+ | | |-x
+ | | `-:
| |-UnknownExpression
| | `-a
| |-)
@@ -384,7 +389,7 @@ void test() {
// counterpart.
{"void main() { foo: return 100; }", R"txt(
*: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
|-void
|-main
|-(
@@ -411,7 +416,7 @@ void test() {
)cpp",
R"txt(
*: TranslationUnit
-`-TopLevelDeclaration
+`-SimpleDeclaration
|-void
|-test
|-(
@@ -444,7 +449,70 @@ void test() {
| | `-)
| `-;
`-}
-)txt"}};
+)txt"},
+ // Multiple declarators group into a single SimpleDeclaration.
+ {R"cpp(
+ int *a, b;
+ )cpp",
+ R"txt(
+*: TranslationUnit
+`-SimpleDeclaration
+ |-int
+ |-*
+ |-a
+ |-,
+ |-b
+ `-;
+ )txt"},
+ {R"cpp(
+ typedef int *a, b;
+ )cpp",
+ R"txt(
+*: TranslationUnit
+`-SimpleDeclaration
+ |-typedef
+ |-int
+ |-*
+ |-a
+ |-,
+ |-b
+ `-;
+ )txt"},
+ // Multiple declarators inside a statement.
+ {R"cpp(
+void foo() {
+ int *a, b;
+ typedef int *ta, tb;
+}
+ )cpp",
+ R"txt(
+*: TranslationUnit
+`-SimpleDeclaration
+ |-void
+ |-foo
+ |-(
+ |-)
+ `-CompoundStatement
+ |-{
+ |-DeclarationStatement
+ | |-SimpleDeclaration
+ | | |-int
+ | | |-*
+ | | |-a
+ | | |-,
+ | | `-b
+ | `-;
+ |-DeclarationStatement
+ | |-SimpleDeclaration
+ | | |-typedef
+ | | |-int
+ | | |-*
+ | | |-ta
+ | | |-,
+ | | `-tb
+ | `-;
+ `-}
+ )txt"}};
for (const auto &T : Cases) {
auto *Root = buildTree(T.first);
More information about the cfe-commits
mailing list