[clang] 98aa067 - [Syntax] Start to move trivial Node class definitions to TableGen. NFC

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Wed Nov 11 03:41:03 PST 2020


Author: Sam McCall
Date: 2020-11-11T12:39:43+01:00
New Revision: 98aa067109ed482e428bc16e1321dbe756efc57c

URL: https://github.com/llvm/llvm-project/commit/98aa067109ed482e428bc16e1321dbe756efc57c
DIFF: https://github.com/llvm/llvm-project/commit/98aa067109ed482e428bc16e1321dbe756efc57c.diff

LOG: [Syntax] Start to move trivial Node class definitions to TableGen. NFC

This defines two node archetypes with trivial class definitions:
 - Alternatives: the generated abstract classes are trivial as all
   functionality is via LLVM RTTI
 - Unconstrained: this is a placeholder, I think all of these are going to be
   Lists but today they have no special accessors etc, so we just say
   "could contain anything", and migrate them one-by-one to Sequence later.

Compared to Dmitri's prototype, Nodes.td looks more like a class hierarchy and
less like a grammar. (E.g. variants list the Alternatives parent rather than
vice versa).
The main reasons for this:
 - the hierarchy is an important part of the API we want direct control over.
   - e.g. we may introduce abstract bases like "loop" that the grammar doesn't
     care about in order to model is-a concepts that might make refactorings
     more expressive. This is less natural in a grammar-like idiom.
   - e.g. we're likely to have to model some alternatives as variants and others
     as class hierarchies, the choice will probably be based on natural is-a
     relationships.
 - it reduces the cognitive load of switching from editing *.td to working with
   code that uses the generated classes

Differential Revision: https://reviews.llvm.org/D90543

Added: 
    

Modified: 
    clang/include/clang/Tooling/Syntax/CMakeLists.txt
    clang/include/clang/Tooling/Syntax/Nodes.h
    clang/include/clang/Tooling/Syntax/Nodes.td
    clang/include/clang/Tooling/Syntax/Syntax.td
    clang/utils/TableGen/ClangSyntaxEmitter.cpp
    clang/utils/TableGen/TableGen.cpp
    clang/utils/TableGen/TableGenBackends.h

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Tooling/Syntax/CMakeLists.txt b/clang/include/clang/Tooling/Syntax/CMakeLists.txt
index a740015de365..b71bcb4a9797 100644
--- a/clang/include/clang/Tooling/Syntax/CMakeLists.txt
+++ b/clang/include/clang/Tooling/Syntax/CMakeLists.txt
@@ -2,3 +2,6 @@ clang_tablegen(Nodes.inc -gen-clang-syntax-node-list
   SOURCE Nodes.td
   TARGET ClangSyntaxNodeList)
 
+clang_tablegen(NodeClasses.inc -gen-clang-syntax-node-classes
+  SOURCE Nodes.td
+  TARGET ClangSyntaxNodeClasses)

diff  --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h
index ed170beb75c8..54b8c33199ff 100644
--- a/clang/include/clang/Tooling/Syntax/Nodes.h
+++ b/clang/include/clang/Tooling/Syntax/Nodes.h
@@ -114,67 +114,7 @@ enum class NodeRole : uint8_t {
 /// For debugging purposes.
 raw_ostream &operator<<(raw_ostream &OS, NodeRole R);
 
-class SimpleDeclarator;
-
-/// A root node for a translation unit. Parent is always null.
-class TranslationUnit final : public Tree {
-public:
-  TranslationUnit() : Tree(NodeKind::TranslationUnit) {}
-  static bool classof(const Node *N);
-};
-
-/// A base class for all expressions. Note that expressions are not statements,
-/// even though they are in clang.
-class Expression : public Tree {
-public:
-  Expression(NodeKind K) : Tree(K) {}
-  static bool classof(const Node *N);
-};
-
-/// A sequence of these specifiers make a `nested-name-specifier`.
-/// e.g. the `std` or `vector<int>` in `std::vector<int>::size`.
-class NameSpecifier : public Tree {
-public:
-  NameSpecifier(NodeKind K) : Tree(K) {}
-  static bool classof(const Node *N);
-};
-
-/// The global namespace name specifier, this specifier doesn't correspond to a
-/// token instead an absence of tokens before a `::` characterizes it, in
-/// `::std::vector<int>` it would be characterized by the absence of a token
-/// before the first `::`
-class GlobalNameSpecifier final : public NameSpecifier {
-public:
-  GlobalNameSpecifier() : NameSpecifier(NodeKind::GlobalNameSpecifier) {}
-  static bool classof(const Node *N);
-};
-
-/// A name specifier holding a decltype, of the form: `decltype ( expression ) `
-/// e.g. the `decltype(s)` in `decltype(s)::size`.
-class DecltypeNameSpecifier final : public NameSpecifier {
-public:
-  DecltypeNameSpecifier() : NameSpecifier(NodeKind::DecltypeNameSpecifier) {}
-  static bool classof(const Node *N);
-};
-
-/// A identifier name specifier, of the form `identifier`
-/// e.g. the `std` in `std::vector<int>::size`.
-class IdentifierNameSpecifier final : public NameSpecifier {
-public:
-  IdentifierNameSpecifier()
-      : NameSpecifier(NodeKind::IdentifierNameSpecifier) {}
-  static bool classof(const Node *N);
-};
-
-/// A name specifier with a simple-template-id, of the form `template_opt
-/// identifier < template-args >` e.g. the `vector<int>` in
-/// `std::vector<int>::size`.
-class SimpleTemplateNameSpecifier final : public NameSpecifier {
-public:
-  SimpleTemplateNameSpecifier()
-      : NameSpecifier(NodeKind::SimpleTemplateNameSpecifier) {}
-  static bool classof(const Node *N);
-};
+#include "clang/Tooling/Syntax/NodeClasses.inc"
 
 /// Models a `nested-name-specifier`. C++ [expr.prim.id.qual]
 /// e.g. the `std::vector<int>::` in `std::vector<int>::size`.

diff  --git a/clang/include/clang/Tooling/Syntax/Nodes.td b/clang/include/clang/Tooling/Syntax/Nodes.td
index bd12457df3c2..821e78e1dba5 100644
--- a/clang/include/clang/Tooling/Syntax/Nodes.td
+++ b/clang/include/clang/Tooling/Syntax/Nodes.td
@@ -17,13 +17,18 @@
 
 include "clang/Tooling/Syntax/Syntax.td"
 
-def Node : External<?> {}
-def Leaf : External<Node> {}
-def Tree : External<Node> {}
+def TranslationUnit : Unconstrained {
+  let documentation = [{
+    A root node for a translation unit. Parent is always null.
+  }];
+}
 
-def TranslationUnit : External<Tree> {}
-
-def Expression : External<Tree> {}
+def Expression : Alternatives {
+  let documentation = [{
+    A base class for all expressions. Note that expressions are not statements,
+    even though they are in clang.
+  }];
+}
 def UnknownExpression : External<Expression> {}
 def UnaryOperatorExpression : External<Tree> {}
 def PrefixUnaryOperatorExpression : External<UnaryOperatorExpression> {}
@@ -99,8 +104,36 @@ def CallArguments : External<List> {}
 def NestedNameSpecifier : External<List> {}
 
 // Name Specifiers.
-def NameSpecifier : External<Tree> {}
-def GlobalNameSpecifier : External<NameSpecifier> {}
-def DecltypeNameSpecifier : External<NameSpecifier> {}
-def IdentifierNameSpecifier : External<NameSpecifier> {}
-def SimpleTemplateNameSpecifier : External<NameSpecifier> {}
+def NameSpecifier : Alternatives {
+  let documentation = [{
+    A sequence of these specifiers make a `nested-name-specifier`.
+    e.g. the `std` or `vector<int>` in `std::vector<int>::size`.
+  }];
+}
+def GlobalNameSpecifier : Unconstrained<NameSpecifier> {
+  let documentation = [{
+    The global namespace name specifier, this specifier doesn't correspond to a
+    token instead an absence of tokens before a `::` characterizes it, in
+    `::std::vector<int>` it would be characterized by the absence of a token
+    before the first `::`
+  }];
+}
+def DecltypeNameSpecifier : Unconstrained<NameSpecifier> {
+  let documentation = [{
+    A name specifier holding a decltype, of the form: `decltype ( expression ) `
+    e.g. the `decltype(s)` in `decltype(s)::size`.
+  }];
+}
+def IdentifierNameSpecifier : Unconstrained<NameSpecifier> {
+  let documentation = [{
+    A identifier name specifier, of the form `identifier`
+    e.g. the `std` in `std::vector<int>::size`.
+  }];
+}
+def SimpleTemplateNameSpecifier : Unconstrained<NameSpecifier> {
+  let documentation = [{
+    A name specifier with a simple-template-id, of the form `template_opt
+    identifier < template-args >` e.g. the `vector<int>` in
+    `std::vector<int>::size`.
+  }];
+}

diff  --git a/clang/include/clang/Tooling/Syntax/Syntax.td b/clang/include/clang/Tooling/Syntax/Syntax.td
index 2e1d69190e42..3decceae85f2 100644
--- a/clang/include/clang/Tooling/Syntax/Syntax.td
+++ b/clang/include/clang/Tooling/Syntax/Syntax.td
@@ -31,10 +31,28 @@
 class NodeType {
   // The NodeType that this node is derived from in the Node class hierarchy.
   NodeType base = ?;
+  // Documentation for this Node subclass.
+  string documentation;
 }
 
 // A node type which is defined in Nodes.h rather than by generated code.
 // We merely specify the inheritance hierarchy here.
 class External<NodeType base_> : NodeType { let base = base_; }
 
-// FIXME: add sequence, list, and alternative archetypes.
+// Special nodes defined here.
+def Node : External<?> {}
+def Leaf : External<Node> {}
+def Tree : External<Node> {}
+
+// An abstract node type which merely serves as a base for more specific types.
+//
+// This corresponds to an alternative rule in the grammar, such as:
+//   Statement = IfStatement | ForStatement | ...
+// Statement is modeled using Alternatives, and IfStatement.base is Statement.
+class Alternatives<NodeType base_ = Tree> : NodeType { let base = base_; }
+
+// A node type which may contain anything and has no specific accessors.
+// These are generally placeholders for a more precise implementation.
+class Unconstrained<NodeType base_ = Tree> : NodeType { let base = base_; }
+
+// FIXME: add sequence and list archetypes.

diff  --git a/clang/utils/TableGen/ClangSyntaxEmitter.cpp b/clang/utils/TableGen/ClangSyntaxEmitter.cpp
index d5d61f168626..c0b09220dc73 100644
--- a/clang/utils/TableGen/ClangSyntaxEmitter.cpp
+++ b/clang/utils/TableGen/ClangSyntaxEmitter.cpp
@@ -16,19 +16,25 @@
 //   ABSTRACT_NODE(Type, Base, FirstKind, LastKind)
 // similar to those for AST nodes such as AST/DeclNodes.inc.
 //
-// In future, the class definitions will be produced by additional backends.
+// The -gen-clang-syntax-node-classes backend produces definitions for the
+// syntax::Node subclasses (except those marked as External).
+//
+// In future, another backend will encode the structure of the various node
+// types in tables so their invariants can be checked and enforced.
 //
 //===----------------------------------------------------------------------===//
 #include "TableGenBackends.h"
 
 #include <deque>
 
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
 
 namespace {
+using llvm::formatv;
 
 // The class hierarchy of Node types.
 // We assemble this in order to be able to define the NodeKind enum in a
@@ -41,10 +47,15 @@ class Hierarchy {
     for (llvm::Record *Derived : Records.getAllDerivedDefinitions("NodeType"))
       if (llvm::Record *Base = Derived->getValueAsOptionalDef("base"))
         link(Derived, Base);
-    for (NodeType &N : AllTypes)
+    for (NodeType &N : AllTypes) {
       llvm::sort(N.Derived, [](const NodeType *L, const NodeType *R) {
         return L->Record->getName() < R->Record->getName();
       });
+      // Alternatives nodes must have subclasses, External nodes may do.
+      assert(N.Record->isSubClassOf("Alternatives") ||
+             N.Record->isSubClassOf("External") || N.Derived.empty());
+      assert(!N.Record->isSubClassOf("Alternatives") || !N.Derived.empty());
+    }
   }
 
   struct NodeType {
@@ -60,6 +71,16 @@ class Hierarchy {
     return *NI->second;
   }
 
+  // Traverse the hierarchy in pre-order (base classes before derived).
+  void visit(llvm::function_ref<void(const NodeType &)> CB,
+             const NodeType *Start = nullptr) {
+    if (Start == nullptr)
+      Start = &get();
+    CB(*Start);
+    for (const NodeType *D : Start->Derived)
+      visit(CB, D);
+  }
+
 private:
   void add(const llvm::Record *R) {
     AllTypes.emplace_back();
@@ -87,26 +108,12 @@ const Hierarchy::NodeType &lastConcrete(const Hierarchy::NodeType &N) {
   return N.Derived.empty() ? N : lastConcrete(*N.Derived.back());
 }
 
-void emitNodeList(const Hierarchy::NodeType &N, llvm::raw_ostream &OS) {
-  // Don't emit ABSTRACT_NODE for node itself, which has no parent.
-  if (N.Base != nullptr) {
-    if (N.Derived.empty())
-      OS << llvm::formatv("CONCRETE_NODE({0},{1})\n", N.name(), N.Base->name());
-    else
-      OS << llvm::formatv("ABSTRACT_NODE({0},{1},{2},{3})\n", N.name(),
-                          N.Base->name(), firstConcrete(N).name(),
-                          lastConcrete(N).name());
-  }
-  for (const auto *C : N.Derived)
-    emitNodeList(*C, OS);
-}
-
 } // namespace
 
 void clang::EmitClangSyntaxNodeList(llvm::RecordKeeper &Records,
                                     llvm::raw_ostream &OS) {
   llvm::emitSourceFileHeader("Syntax tree node list", OS);
-  OS << "// Generated from " << Records.getInputFilename() << "\n";
+  Hierarchy H(Records);
   OS << R"cpp(
 #ifndef NODE
 #define NODE(Kind, Base)
@@ -121,10 +128,77 @@ void clang::EmitClangSyntaxNodeList(llvm::RecordKeeper &Records,
 #endif
 
 )cpp";
-  emitNodeList(Hierarchy(Records).get(), OS);
+  H.visit([&](const Hierarchy::NodeType &N) {
+    // Don't emit ABSTRACT_NODE for node itself, which has no parent.
+    if (N.Base == nullptr)
+      return;
+    if (N.Derived.empty())
+      OS << formatv("CONCRETE_NODE({0},{1})\n", N.name(), N.Base->name());
+    else
+      OS << formatv("ABSTRACT_NODE({0},{1},{2},{3})\n", N.name(),
+                    N.Base->name(), firstConcrete(N).name(),
+                    lastConcrete(N).name());
+  });
   OS << R"cpp(
 #undef NODE
 #undef CONCRETE_NODE
 #undef ABSTRACT_NODE
 )cpp";
 }
+
+// Format a documentation string as a C++ comment.
+// Trims leading whitespace handling since comments come from a TableGen file:
+//    documentation = [{
+//      This is a widget. Example:
+//        widget.explode()
+//    }];
+// and should be formatted as:
+//    /// This is a widget. Example:
+//    ///   widget.explode()
+// Leading and trailing whitespace lines are stripped.
+// The indentation of the first line is stripped from all lines.
+static void printDoc(llvm::StringRef Doc, llvm::raw_ostream &OS) {
+  Doc = Doc.rtrim();
+  llvm::StringRef Line;
+  while (Line.trim().empty() && !Doc.empty())
+    std::tie(Line, Doc) = Doc.split('\n');
+  llvm::StringRef Indent = Line.take_while(llvm::isSpace);
+  for (; !Line.empty() || !Doc.empty(); std::tie(Line, Doc) = Doc.split('\n')) {
+    Line.consume_front(Indent);
+    OS << "/// " << Line << "\n";
+  }
+}
+
+void clang::EmitClangSyntaxNodeClasses(llvm::RecordKeeper &Records,
+                                       llvm::raw_ostream &OS) {
+  llvm::emitSourceFileHeader("Syntax tree node list", OS);
+  Hierarchy H(Records);
+
+  OS << "\n// Forward-declare node types so we don't have to carefully "
+        "sequence definitions.\n";
+  H.visit([&](const Hierarchy::NodeType &N) {
+    OS << "class " << N.name() << ";\n";
+  });
+
+  OS << "\n// Node definitions\n\n";
+  H.visit([&](const Hierarchy::NodeType &N) {
+    if (N.Record->isSubClassOf("External"))
+      return;
+    printDoc(N.Record->getValueAsString("documentation"), OS);
+    OS << formatv("class {0}{1} : public {2} {{\n", N.name(),
+                  N.Derived.empty() ? " final" : "", N.Base->name());
+
+    // Constructor.
+    if (N.Derived.empty())
+      OS << formatv("public:\n  {0}() : {1}(NodeKind::{0}) {{}\n", N.name(),
+                    N.Base->name());
+    else
+      OS << formatv("protected:\n  {0}(NodeKind K) : {1}(K) {{}\npublic:\n",
+                    N.name(), N.Base->name());
+
+    // classof. FIXME: move definition inline once ~all nodes are generated.
+    OS << "  static bool classof(const Node *N);\n";
+
+    OS << "};\n\n";
+  });
+}

diff  --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index 12d4ba6d47d1..1b919a77988b 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -56,6 +56,7 @@ enum ActionType {
   GenClangOpcodes,
   GenClangSACheckers,
   GenClangSyntaxNodeList,
+  GenClangSyntaxNodeClasses,
   GenClangCommentHTMLTags,
   GenClangCommentHTMLTagsProperties,
   GenClangCommentHTMLNamedCharacterReferences,
@@ -169,6 +170,8 @@ cl::opt<ActionType> Action(
                    "Generate Clang Static Analyzer checkers"),
         clEnumValN(GenClangSyntaxNodeList, "gen-clang-syntax-node-list",
                    "Generate list of Clang Syntax Tree node types"),
+        clEnumValN(GenClangSyntaxNodeClasses, "gen-clang-syntax-node-classes",
+                   "Generate definitions of Clang Syntax Tree node clasess"),
         clEnumValN(GenClangCommentHTMLTags, "gen-clang-comment-html-tags",
                    "Generate efficient matchers for HTML tag "
                    "names that are used in documentation comments"),
@@ -362,6 +365,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenClangSyntaxNodeList:
     EmitClangSyntaxNodeList(Records, OS);
     break;
+  case GenClangSyntaxNodeClasses:
+    EmitClangSyntaxNodeClasses(Records, OS);
+    break;
   case GenArmNeon:
     EmitNeon(Records, OS);
     break;

diff  --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index 47e552327cb1..dc4476cdff44 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -85,6 +85,8 @@ void EmitClangOpcodes(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 
 void EmitClangSyntaxNodeList(llvm::RecordKeeper &Records,
                              llvm::raw_ostream &OS);
+void EmitClangSyntaxNodeClasses(llvm::RecordKeeper &Records,
+                                llvm::raw_ostream &OS);
 
 void EmitNeon(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
 void EmitFP16(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);


        


More information about the cfe-commits mailing list