[clang-tools-extra] 2315358 - [pseudo] Generate an enum type for identifying grammar rules.

Haojian Wu via cfe-commits cfe-commits at lists.llvm.org
Fri Jul 15 06:11:33 PDT 2022


Author: Haojian Wu
Date: 2022-07-15T15:09:31+02:00
New Revision: 23153589065555078cd35e2eb64bdcb711b2ec35

URL: https://github.com/llvm/llvm-project/commit/23153589065555078cd35e2eb64bdcb711b2ec35
DIFF: https://github.com/llvm/llvm-project/commit/23153589065555078cd35e2eb64bdcb711b2ec35.diff

LOG: [pseudo] Generate an enum type for identifying grammar rules.

The Rule enum type enables us to identify a grammar rule within C++'s
type system.

Differential Revision: https://reviews.llvm.org/D129359

Added: 
    

Modified: 
    clang-tools-extra/pseudo/gen/Main.cpp
    clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
    clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
    clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
    clang-tools-extra/pseudo/unittests/GrammarTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/pseudo/gen/Main.cpp b/clang-tools-extra/pseudo/gen/Main.cpp
index b74f3d46c3560..27857ca4b3e86 100644
--- a/clang-tools-extra/pseudo/gen/Main.cpp
+++ b/clang-tools-extra/pseudo/gen/Main.cpp
@@ -83,17 +83,19 @@ int main(int argc, char *argv[]) {
 #ifndef NONTERMINAL
 #define NONTERMINAL(X, Y)
 #endif
+#ifndef RULE
+#define RULE(X, Y)
+#endif
 #ifndef EXTENSION
 #define EXTENSION(X, Y)
 #endif
-    )cpp";
+)cpp";
     for (clang::pseudo::SymbolID ID = 0; ID < G.table().Nonterminals.size();
-         ++ID) {
-      std::string Name = G.symbolName(ID).str();
-      // translation-unit -> translation_unit
-      std::replace(Name.begin(), Name.end(), '-', '_');
-      Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID);
-    }
+         ++ID)
+      Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", G.mangleSymbol(ID),
+                                ID);
+    for (clang::pseudo::RuleID RID = 0; RID < G.table().Rules.size(); ++RID)
+      Out.os() << llvm::formatv("RULE({0}, {1})\n", G.mangleRule(RID), RID);
     for (clang::pseudo::ExtensionID EID = 1 /*skip the sentinel 0 value*/;
          EID < G.table().AttributeValues.size(); ++EID) {
       llvm::StringRef Name = G.table().AttributeValues[EID];
@@ -102,8 +104,9 @@ int main(int argc, char *argv[]) {
     }
     Out.os() << R"cpp(
 #undef NONTERMINAL
+#undef RULE
 #undef EXTENSION
-    )cpp";
+)cpp";
     break;
   case EmitGrammarContent:
     for (llvm::StringRef Line : llvm::split(GrammarText, '\n')) {

diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
index d2509927337ed..a1426722262e1 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
@@ -37,6 +37,12 @@ enum class Symbol : SymbolID {
 #undef NONTERMINAL
 };
 
+enum class Rule : RuleID {
+#define RULE(X, Y) X = Y,
+#include "CXXSymbols.inc"
+#undef RULE
+};
+
 enum class Extension : ExtensionID {
 #define EXTENSION(X, Y) X = Y,
 #include "CXXSymbols.inc"

diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
index ab11a84ebf295..ef22f71d801c0 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
@@ -165,6 +165,21 @@ class Grammar {
   // Terminals have names like "," (kw_comma) or "OPERATOR" (kw_operator).
   llvm::StringRef symbolName(SymbolID) const;
 
+  // Gets the mangled name for a terminal/nonterminal.
+  // Compared to names in the grammar,
+  //   nonterminals `ptr-declartor` becomes `ptr_declarator`;
+  //   terminal `,` becomes `comma`;
+  //   terminal `IDENTIFIER` becomes `identifier`;
+  //   terminal `INT` becomes `int`;
+  // NOTE: for nonterminals, the mangled name is the same as the cxx::Symbol
+  // enum class; for terminals, we deliberately stripped the `kw_` prefix in
+  // favor of the simplicity.
+  std::string mangleSymbol(SymbolID) const;
+  // Gets the mangled name for the rule.
+  // E.g. for the grammar rule `ptr-declarator := ptr-operator ptr-declarator`,
+  // it is `ptr_declarator_0ptr_operator_1ptr_declarator`.
+  std::string mangleRule(RuleID) const;
+
   // Lookup the SymbolID of the nonterminal symbol by Name.
   llvm::Optional<SymbolID> findNonterminal(llvm::StringRef Name) const;
 

diff  --git a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
index 149d97f28a778..da4e2dfd7a542 100644
--- a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
+++ b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
@@ -45,6 +45,28 @@ llvm::StringRef Grammar::symbolName(SymbolID SID) const {
   return T->Nonterminals[SID].Name;
 }
 
+std::string Grammar::mangleSymbol(SymbolID SID) const {
+  static const char *const TokNames[] = {
+#define TOK(X) #X,
+#define KEYWORD(X, Y) #X,
+#include "clang/Basic/TokenKinds.def"
+      nullptr};
+  if (clang::pseudo::isToken(SID))
+    return TokNames[clang::pseudo::symbolToToken(SID)];
+  std::string Name = symbolName(SID).str();
+  // translation-unit -> translation_unit
+  std::replace(Name.begin(), Name.end(), '-', '_');
+  return Name;
+}
+
+std::string Grammar::mangleRule(RuleID RID) const {
+  const auto &R = lookupRule(RID);
+  std::string MangleName = mangleSymbol(R.Target);
+  for (size_t I = 0; I < R.seq().size(); ++I)
+    MangleName += llvm::formatv("_{0}{1}", I, mangleSymbol(R.seq()[I]));
+  return MangleName;
+}
+
 llvm::Optional<SymbolID> Grammar::findNonterminal(llvm::StringRef Name) const {
   auto It = llvm::partition_point(
       T->Nonterminals,

diff  --git a/clang-tools-extra/pseudo/unittests/GrammarTest.cpp b/clang-tools-extra/pseudo/unittests/GrammarTest.cpp
index 3f595393612da..ee9a3ae2904b2 100644
--- a/clang-tools-extra/pseudo/unittests/GrammarTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/GrammarTest.cpp
@@ -114,6 +114,21 @@ TEST_F(GrammarTest, Annotation) {
   EXPECT_NE(G.lookupRule(ruleFor("x")).Guard, G.lookupRule(ruleFor("y")).Guard);
 }
 
+TEST_F(GrammarTest, MangleName) {
+  build(R"bnf(
+    _ := declaration
+
+    declaration := ptr-declarator ;
+    ptr-declarator := * IDENTIFIER
+
+  )bnf");
+  ASSERT_TRUE(Diags.empty());
+  EXPECT_EQ(G.mangleRule(ruleFor("declaration")),
+            "declaration_0ptr_declarator_1semi");
+  EXPECT_EQ(G.mangleRule(ruleFor("ptr-declarator")),
+            "ptr_declarator_0star_1identifier");
+}
+
 TEST_F(GrammarTest, Diagnostics) {
   build(R"cpp(
     _ := ,_opt


        


More information about the cfe-commits mailing list