[clang] 98aa067 - [Syntax] Start to move trivial Node class definitions to TableGen. NFC
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Wed Nov 11 03:41:03 PST 2020
Author: Sam McCall
Date: 2020-11-11T12:39:43+01:00
New Revision: 98aa067109ed482e428bc16e1321dbe756efc57c
URL: https://github.com/llvm/llvm-project/commit/98aa067109ed482e428bc16e1321dbe756efc57c
DIFF: https://github.com/llvm/llvm-project/commit/98aa067109ed482e428bc16e1321dbe756efc57c.diff
LOG: [Syntax] Start to move trivial Node class definitions to TableGen. NFC
This defines two node archetypes with trivial class definitions:
- Alternatives: the generated abstract classes are trivial as all
functionality is via LLVM RTTI
- Unconstrained: this is a placeholder, I think all of these are going to be
Lists but today they have no special accessors etc, so we just say
"could contain anything", and migrate them one-by-one to Sequence later.
Compared to Dmitri's prototype, Nodes.td looks more like a class hierarchy and
less like a grammar. (E.g. variants list the Alternatives parent rather than
vice versa).
The main reasons for this:
- the hierarchy is an important part of the API we want direct control over.
- e.g. we may introduce abstract bases like "loop" that the grammar doesn't
care about in order to model is-a concepts that might make refactorings
more expressive. This is less natural in a grammar-like idiom.
- e.g. we're likely to have to model some alternatives as variants and others
as class hierarchies, the choice will probably be based on natural is-a
relationships.
- it reduces the cognitive load of switching from editing *.td to working with
code that uses the generated classes
Differential Revision: https://reviews.llvm.org/D90543
Added:
Modified:
clang/include/clang/Tooling/Syntax/CMakeLists.txt
clang/include/clang/Tooling/Syntax/Nodes.h
clang/include/clang/Tooling/Syntax/Nodes.td
clang/include/clang/Tooling/Syntax/Syntax.td
clang/utils/TableGen/ClangSyntaxEmitter.cpp
clang/utils/TableGen/TableGen.cpp
clang/utils/TableGen/TableGenBackends.h
Removed:
################################################################################
diff --git a/clang/include/clang/Tooling/Syntax/CMakeLists.txt b/clang/include/clang/Tooling/Syntax/CMakeLists.txt
index a740015de365..b71bcb4a9797 100644
--- a/clang/include/clang/Tooling/Syntax/CMakeLists.txt
+++ b/clang/include/clang/Tooling/Syntax/CMakeLists.txt
@@ -2,3 +2,6 @@ clang_tablegen(Nodes.inc -gen-clang-syntax-node-list
SOURCE Nodes.td
TARGET ClangSyntaxNodeList)
+clang_tablegen(NodeClasses.inc -gen-clang-syntax-node-classes
+ SOURCE Nodes.td
+ TARGET ClangSyntaxNodeClasses)
diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h
index ed170beb75c8..54b8c33199ff 100644
--- a/clang/include/clang/Tooling/Syntax/Nodes.h
+++ b/clang/include/clang/Tooling/Syntax/Nodes.h
@@ -114,67 +114,7 @@ enum class NodeRole : uint8_t {
/// For debugging purposes.
raw_ostream &operator<<(raw_ostream &OS, NodeRole R);
-class SimpleDeclarator;
-
-/// A root node for a translation unit. Parent is always null.
-class TranslationUnit final : public Tree {
-public:
- TranslationUnit() : Tree(NodeKind::TranslationUnit) {}
- static bool classof(const Node *N);
-};
-
-/// A base class for all expressions. Note that expressions are not statements,
-/// even though they are in clang.
-class Expression : public Tree {
-public:
- Expression(NodeKind K) : Tree(K) {}
- static bool classof(const Node *N);
-};
-
-/// A sequence of these specifiers make a `nested-name-specifier`.
-/// e.g. the `std` or `vector<int>` in `std::vector<int>::size`.
-class NameSpecifier : public Tree {
-public:
- NameSpecifier(NodeKind K) : Tree(K) {}
- static bool classof(const Node *N);
-};
-
-/// The global namespace name specifier, this specifier doesn't correspond to a
-/// token instead an absence of tokens before a `::` characterizes it, in
-/// `::std::vector<int>` it would be characterized by the absence of a token
-/// before the first `::`
-class GlobalNameSpecifier final : public NameSpecifier {
-public:
- GlobalNameSpecifier() : NameSpecifier(NodeKind::GlobalNameSpecifier) {}
- static bool classof(const Node *N);
-};
-
-/// A name specifier holding a decltype, of the form: `decltype ( expression ) `
-/// e.g. the `decltype(s)` in `decltype(s)::size`.
-class DecltypeNameSpecifier final : public NameSpecifier {
-public:
- DecltypeNameSpecifier() : NameSpecifier(NodeKind::DecltypeNameSpecifier) {}
- static bool classof(const Node *N);
-};
-
-/// A identifier name specifier, of the form `identifier`
-/// e.g. the `std` in `std::vector<int>::size`.
-class IdentifierNameSpecifier final : public NameSpecifier {
-public:
- IdentifierNameSpecifier()
- : NameSpecifier(NodeKind::IdentifierNameSpecifier) {}
- static bool classof(const Node *N);
-};
-
-/// A name specifier with a simple-template-id, of the form `template_opt
-/// identifier < template-args >` e.g. the `vector<int>` in
-/// `std::vector<int>::size`.
-class SimpleTemplateNameSpecifier final : public NameSpecifier {
-public:
- SimpleTemplateNameSpecifier()
- : NameSpecifier(NodeKind::SimpleTemplateNameSpecifier) {}
- static bool classof(const Node *N);
-};
+#include "clang/Tooling/Syntax/NodeClasses.inc"
/// Models a `nested-name-specifier`. C++ [expr.prim.id.qual]
/// e.g. the `std::vector<int>::` in `std::vector<int>::size`.
diff --git a/clang/include/clang/Tooling/Syntax/Nodes.td b/clang/include/clang/Tooling/Syntax/Nodes.td
index bd12457df3c2..821e78e1dba5 100644
--- a/clang/include/clang/Tooling/Syntax/Nodes.td
+++ b/clang/include/clang/Tooling/Syntax/Nodes.td
@@ -17,13 +17,18 @@
include "clang/Tooling/Syntax/Syntax.td"
-def Node : External<?> {}
-def Leaf : External<Node> {}
-def Tree : External<Node> {}
+def TranslationUnit : Unconstrained {
+ let documentation = [{
+ A root node for a translation unit. Parent is always null.
+ }];
+}
-def TranslationUnit : External<Tree> {}
-
-def Expression : External<Tree> {}
+def Expression : Alternatives {
+ let documentation = [{
+ A base class for all expressions. Note that expressions are not statements,
+ even though they are in clang.
+ }];
+}
def UnknownExpression : External<Expression> {}
def UnaryOperatorExpression : External<Tree> {}
def PrefixUnaryOperatorExpression : External<UnaryOperatorExpression> {}
@@ -99,8 +104,36 @@ def CallArguments : External<List> {}
def NestedNameSpecifier : External<List> {}
// Name Specifiers.
-def NameSpecifier : External<Tree> {}
-def GlobalNameSpecifier : External<NameSpecifier> {}
-def DecltypeNameSpecifier : External<NameSpecifier> {}
-def IdentifierNameSpecifier : External<NameSpecifier> {}
-def SimpleTemplateNameSpecifier : External<NameSpecifier> {}
+def NameSpecifier : Alternatives {
+ let documentation = [{
+ A sequence of these specifiers make a `nested-name-specifier`.
+ e.g. the `std` or `vector<int>` in `std::vector<int>::size`.
+ }];
+}
+def GlobalNameSpecifier : Unconstrained<NameSpecifier> {
+ let documentation = [{
+ The global namespace name specifier, this specifier doesn't correspond to a
+ token instead an absence of tokens before a `::` characterizes it, in
+ `::std::vector<int>` it would be characterized by the absence of a token
+ before the first `::`
+ }];
+}
+def DecltypeNameSpecifier : Unconstrained<NameSpecifier> {
+ let documentation = [{
+ A name specifier holding a decltype, of the form: `decltype ( expression ) `
+ e.g. the `decltype(s)` in `decltype(s)::size`.
+ }];
+}
+def IdentifierNameSpecifier : Unconstrained<NameSpecifier> {
+ let documentation = [{
+ A identifier name specifier, of the form `identifier`
+ e.g. the `std` in `std::vector<int>::size`.
+ }];
+}
+def SimpleTemplateNameSpecifier : Unconstrained<NameSpecifier> {
+ let documentation = [{
+ A name specifier with a simple-template-id, of the form `template_opt
+ identifier < template-args >` e.g. the `vector<int>` in
+ `std::vector<int>::size`.
+ }];
+}
diff --git a/clang/include/clang/Tooling/Syntax/Syntax.td b/clang/include/clang/Tooling/Syntax/Syntax.td
index 2e1d69190e42..3decceae85f2 100644
--- a/clang/include/clang/Tooling/Syntax/Syntax.td
+++ b/clang/include/clang/Tooling/Syntax/Syntax.td
@@ -31,10 +31,28 @@
class NodeType {
// The NodeType that this node is derived from in the Node class hierarchy.
NodeType base = ?;
+ // Documentation for this Node subclass.
+ string documentation;
}
// A node type which is defined in Nodes.h rather than by generated code.
// We merely specify the inheritance hierarchy here.
class External<NodeType base_> : NodeType { let base = base_; }
-// FIXME: add sequence, list, and alternative archetypes.
+// Special nodes defined here.
+def Node : External<?> {}
+def Leaf : External<Node> {}
+def Tree : External<Node> {}
+
+// An abstract node type which merely serves as a base for more specific types.
+//
+// This corresponds to an alternative rule in the grammar, such as:
+// Statement = IfStatement | ForStatement | ...
+// Statement is modeled using Alternatives, and IfStatement.base is Statement.
+class Alternatives<NodeType base_ = Tree> : NodeType { let base = base_; }
+
+// A node type which may contain anything and has no specific accessors.
+// These are generally placeholders for a more precise implementation.
+class Unconstrained<NodeType base_ = Tree> : NodeType { let base = base_; }
+
+// FIXME: add sequence and list archetypes.
diff --git a/clang/utils/TableGen/ClangSyntaxEmitter.cpp b/clang/utils/TableGen/ClangSyntaxEmitter.cpp
index d5d61f168626..c0b09220dc73 100644
--- a/clang/utils/TableGen/ClangSyntaxEmitter.cpp
+++ b/clang/utils/TableGen/ClangSyntaxEmitter.cpp
@@ -16,19 +16,25 @@
// ABSTRACT_NODE(Type, Base, FirstKind, LastKind)
// similar to those for AST nodes such as AST/DeclNodes.inc.
//
-// In future, the class definitions will be produced by additional backends.
+// The -gen-clang-syntax-node-classes backend produces definitions for the
+// syntax::Node subclasses (except those marked as External).
+//
+// In future, another backend will encode the structure of the various node
+// types in tables so their invariants can be checked and enforced.
//
//===----------------------------------------------------------------------===//
#include "TableGenBackends.h"
#include <deque>
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
namespace {
+using llvm::formatv;
// The class hierarchy of Node types.
// We assemble this in order to be able to define the NodeKind enum in a
@@ -41,10 +47,15 @@ class Hierarchy {
for (llvm::Record *Derived : Records.getAllDerivedDefinitions("NodeType"))
if (llvm::Record *Base = Derived->getValueAsOptionalDef("base"))
link(Derived, Base);
- for (NodeType &N : AllTypes)
+ for (NodeType &N : AllTypes) {
llvm::sort(N.Derived, [](const NodeType *L, const NodeType *R) {
return L->Record->getName() < R->Record->getName();
});
+ // Alternatives nodes must have subclasses, External nodes may do.
+ assert(N.Record->isSubClassOf("Alternatives") ||
+ N.Record->isSubClassOf("External") || N.Derived.empty());
+ assert(!N.Record->isSubClassOf("Alternatives") || !N.Derived.empty());
+ }
}
struct NodeType {
@@ -60,6 +71,16 @@ class Hierarchy {
return *NI->second;
}
+ // Traverse the hierarchy in pre-order (base classes before derived).
+ void visit(llvm::function_ref<void(const NodeType &)> CB,
+ const NodeType *Start = nullptr) {
+ if (Start == nullptr)
+ Start = &get();
+ CB(*Start);
+ for (const NodeType *D : Start->Derived)
+ visit(CB, D);
+ }
+
private:
void add(const llvm::Record *R) {
AllTypes.emplace_back();
@@ -87,26 +108,12 @@ const Hierarchy::NodeType &lastConcrete(const Hierarchy::NodeType &N) {
return N.Derived.empty() ? N : lastConcrete(*N.Derived.back());
}
-void emitNodeList(const Hierarchy::NodeType &N, llvm::raw_ostream &OS) {
- // Don't emit ABSTRACT_NODE for node itself, which has no parent.
- if (N.Base != nullptr) {
- if (N.Derived.empty())
- OS << llvm::formatv("CONCRETE_NODE({0},{1})\n", N.name(), N.Base->name());
- else
- OS << llvm::formatv("ABSTRACT_NODE({0},{1},{2},{3})\n", N.name(),
- N.Base->name(), firstConcrete(N).name(),
- lastConcrete(N).name());
- }
- for (const auto *C : N.Derived)
- emitNodeList(*C, OS);
-}
-
} // namespace
void clang::EmitClangSyntaxNodeList(llvm::RecordKeeper &Records,
llvm::raw_ostream &OS) {
llvm::emitSourceFileHeader("Syntax tree node list", OS);
- OS << "// Generated from " << Records.getInputFilename() << "\n";
+ Hierarchy H(Records);
OS << R"cpp(
#ifndef NODE
#define NODE(Kind, Base)
@@ -121,10 +128,77 @@ void clang::EmitClangSyntaxNodeList(llvm::RecordKeeper &Records,
#endif
)cpp";
- emitNodeList(Hierarchy(Records).get(), OS);
+ H.visit([&](const Hierarchy::NodeType &N) {
+ // Don't emit ABSTRACT_NODE for node itself, which has no parent.
+ if (N.Base == nullptr)
+ return;
+ if (N.Derived.empty())
+ OS << formatv("CONCRETE_NODE({0},{1})\n", N.name(), N.Base->name());
+ else
+ OS << formatv("ABSTRACT_NODE({0},{1},{2},{3})\n", N.name(),
+ N.Base->name(), firstConcrete(N).name(),
+ lastConcrete(N).name());
+ });
OS << R"cpp(
#undef NODE
#undef CONCRETE_NODE
#undef ABSTRACT_NODE
)cpp";
}
+
+// Format a documentation string as a C++ comment.
+// Trims leading whitespace handling since comments come from a TableGen file:
+// documentation = [{
+// This is a widget. Example:
+// widget.explode()
+// }];
+// and should be formatted as:
+// /// This is a widget. Example:
+// /// widget.explode()
+// Leading and trailing whitespace lines are stripped.
+// The indentation of the first line is stripped from all lines.
+static void printDoc(llvm::StringRef Doc, llvm::raw_ostream &OS) {
+ Doc = Doc.rtrim();
+ llvm::StringRef Line;
+ while (Line.trim().empty() && !Doc.empty())
+ std::tie(Line, Doc) = Doc.split('\n');
+ llvm::StringRef Indent = Line.take_while(llvm::isSpace);
+ for (; !Line.empty() || !Doc.empty(); std::tie(Line, Doc) = Doc.split('\n')) {
+ Line.consume_front(Indent);
+ OS << "/// " << Line << "\n";
+ }
+}
+
+void clang::EmitClangSyntaxNodeClasses(llvm::RecordKeeper &Records,
+ llvm::raw_ostream &OS) {
+ llvm::emitSourceFileHeader("Syntax tree node list", OS);
+ Hierarchy H(Records);
+
+ OS << "\n// Forward-declare node types so we don't have to carefully "
+ "sequence definitions.\n";
+ H.visit([&](const Hierarchy::NodeType &N) {
+ OS << "class " << N.name() << ";\n";
+ });
+
+ OS << "\n// Node definitions\n\n";
+ H.visit([&](const Hierarchy::NodeType &N) {
+ if (N.Record->isSubClassOf("External"))
+ return;
+ printDoc(N.Record->getValueAsString("documentation"), OS);
+ OS << formatv("class {0}{1} : public {2} {{\n", N.name(),
+ N.Derived.empty() ? " final" : "", N.Base->name());
+
+ // Constructor.
+ if (N.Derived.empty())
+ OS << formatv("public:\n {0}() : {1}(NodeKind::{0}) {{}\n", N.name(),
+ N.Base->name());
+ else
+ OS << formatv("protected:\n {0}(NodeKind K) : {1}(K) {{}\npublic:\n",
+ N.name(), N.Base->name());
+
+ // classof. FIXME: move definition inline once ~all nodes are generated.
+ OS << " static bool classof(const Node *N);\n";
+
+ OS << "};\n\n";
+ });
+}
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index 12d4ba6d47d1..1b919a77988b 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -56,6 +56,7 @@ enum ActionType {
GenClangOpcodes,
GenClangSACheckers,
GenClangSyntaxNodeList,
+ GenClangSyntaxNodeClasses,
GenClangCommentHTMLTags,
GenClangCommentHTMLTagsProperties,
GenClangCommentHTMLNamedCharacterReferences,
@@ -169,6 +170,8 @@ cl::opt<ActionType> Action(
"Generate Clang Static Analyzer checkers"),
clEnumValN(GenClangSyntaxNodeList, "gen-clang-syntax-node-list",
"Generate list of Clang Syntax Tree node types"),
+ clEnumValN(GenClangSyntaxNodeClasses, "gen-clang-syntax-node-classes",
+ "Generate definitions of Clang Syntax Tree node clasess"),
clEnumValN(GenClangCommentHTMLTags, "gen-clang-comment-html-tags",
"Generate efficient matchers for HTML tag "
"names that are used in documentation comments"),
@@ -362,6 +365,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
case GenClangSyntaxNodeList:
EmitClangSyntaxNodeList(Records, OS);
break;
+ case GenClangSyntaxNodeClasses:
+ EmitClangSyntaxNodeClasses(Records, OS);
+ break;
case GenArmNeon:
EmitNeon(Records, OS);
break;
diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index 47e552327cb1..dc4476cdff44 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -85,6 +85,8 @@ void EmitClangOpcodes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitClangSyntaxNodeList(llvm::RecordKeeper &Records,
llvm::raw_ostream &OS);
+void EmitClangSyntaxNodeClasses(llvm::RecordKeeper &Records,
+ llvm::raw_ostream &OS);
void EmitNeon(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitFP16(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
More information about the cfe-commits
mailing list