[clang-tools-extra] 2315358 - [pseudo] Generate an enum type for identifying grammar rules.
Haojian Wu via cfe-commits
cfe-commits at lists.llvm.org
Fri Jul 15 06:11:33 PDT 2022
Author: Haojian Wu
Date: 2022-07-15T15:09:31+02:00
New Revision: 23153589065555078cd35e2eb64bdcb711b2ec35
URL: https://github.com/llvm/llvm-project/commit/23153589065555078cd35e2eb64bdcb711b2ec35
DIFF: https://github.com/llvm/llvm-project/commit/23153589065555078cd35e2eb64bdcb711b2ec35.diff
LOG: [pseudo] Generate an enum type for identifying grammar rules.
The Rule enum type enables us to identify a grammar rule within C++'s
type system.
Differential Revision: https://reviews.llvm.org/D129359
Added:
Modified:
clang-tools-extra/pseudo/gen/Main.cpp
clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
clang-tools-extra/pseudo/unittests/GrammarTest.cpp
Removed:
################################################################################
diff --git a/clang-tools-extra/pseudo/gen/Main.cpp b/clang-tools-extra/pseudo/gen/Main.cpp
index b74f3d46c3560..27857ca4b3e86 100644
--- a/clang-tools-extra/pseudo/gen/Main.cpp
+++ b/clang-tools-extra/pseudo/gen/Main.cpp
@@ -83,17 +83,19 @@ int main(int argc, char *argv[]) {
#ifndef NONTERMINAL
#define NONTERMINAL(X, Y)
#endif
+#ifndef RULE
+#define RULE(X, Y)
+#endif
#ifndef EXTENSION
#define EXTENSION(X, Y)
#endif
- )cpp";
+)cpp";
for (clang::pseudo::SymbolID ID = 0; ID < G.table().Nonterminals.size();
- ++ID) {
- std::string Name = G.symbolName(ID).str();
- // translation-unit -> translation_unit
- std::replace(Name.begin(), Name.end(), '-', '_');
- Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID);
- }
+ ++ID)
+ Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", G.mangleSymbol(ID),
+ ID);
+ for (clang::pseudo::RuleID RID = 0; RID < G.table().Rules.size(); ++RID)
+ Out.os() << llvm::formatv("RULE({0}, {1})\n", G.mangleRule(RID), RID);
for (clang::pseudo::ExtensionID EID = 1 /*skip the sentinel 0 value*/;
EID < G.table().AttributeValues.size(); ++EID) {
llvm::StringRef Name = G.table().AttributeValues[EID];
@@ -102,8 +104,9 @@ int main(int argc, char *argv[]) {
}
Out.os() << R"cpp(
#undef NONTERMINAL
+#undef RULE
#undef EXTENSION
- )cpp";
+)cpp";
break;
case EmitGrammarContent:
for (llvm::StringRef Line : llvm::split(GrammarText, '\n')) {
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
index d2509927337ed..a1426722262e1 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
@@ -37,6 +37,12 @@ enum class Symbol : SymbolID {
#undef NONTERMINAL
};
+enum class Rule : RuleID {
+#define RULE(X, Y) X = Y,
+#include "CXXSymbols.inc"
+#undef RULE
+};
+
enum class Extension : ExtensionID {
#define EXTENSION(X, Y) X = Y,
#include "CXXSymbols.inc"
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
index ab11a84ebf295..ef22f71d801c0 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
@@ -165,6 +165,21 @@ class Grammar {
// Terminals have names like "," (kw_comma) or "OPERATOR" (kw_operator).
llvm::StringRef symbolName(SymbolID) const;
+ // Gets the mangled name for a terminal/nonterminal.
+ // Compared to names in the grammar,
+ // nonterminals `ptr-declartor` becomes `ptr_declarator`;
+ // terminal `,` becomes `comma`;
+ // terminal `IDENTIFIER` becomes `identifier`;
+ // terminal `INT` becomes `int`;
+ // NOTE: for nonterminals, the mangled name is the same as the cxx::Symbol
+ // enum class; for terminals, we deliberately stripped the `kw_` prefix in
+ // favor of the simplicity.
+ std::string mangleSymbol(SymbolID) const;
+ // Gets the mangled name for the rule.
+ // E.g. for the grammar rule `ptr-declarator := ptr-operator ptr-declarator`,
+ // it is `ptr_declarator_0ptr_operator_1ptr_declarator`.
+ std::string mangleRule(RuleID) const;
+
// Lookup the SymbolID of the nonterminal symbol by Name.
llvm::Optional<SymbolID> findNonterminal(llvm::StringRef Name) const;
diff --git a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
index 149d97f28a778..da4e2dfd7a542 100644
--- a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
+++ b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
@@ -45,6 +45,28 @@ llvm::StringRef Grammar::symbolName(SymbolID SID) const {
return T->Nonterminals[SID].Name;
}
+std::string Grammar::mangleSymbol(SymbolID SID) const {
+ static const char *const TokNames[] = {
+#define TOK(X) #X,
+#define KEYWORD(X, Y) #X,
+#include "clang/Basic/TokenKinds.def"
+ nullptr};
+ if (clang::pseudo::isToken(SID))
+ return TokNames[clang::pseudo::symbolToToken(SID)];
+ std::string Name = symbolName(SID).str();
+ // translation-unit -> translation_unit
+ std::replace(Name.begin(), Name.end(), '-', '_');
+ return Name;
+}
+
+std::string Grammar::mangleRule(RuleID RID) const {
+ const auto &R = lookupRule(RID);
+ std::string MangleName = mangleSymbol(R.Target);
+ for (size_t I = 0; I < R.seq().size(); ++I)
+ MangleName += llvm::formatv("_{0}{1}", I, mangleSymbol(R.seq()[I]));
+ return MangleName;
+}
+
llvm::Optional<SymbolID> Grammar::findNonterminal(llvm::StringRef Name) const {
auto It = llvm::partition_point(
T->Nonterminals,
diff --git a/clang-tools-extra/pseudo/unittests/GrammarTest.cpp b/clang-tools-extra/pseudo/unittests/GrammarTest.cpp
index 3f595393612da..ee9a3ae2904b2 100644
--- a/clang-tools-extra/pseudo/unittests/GrammarTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/GrammarTest.cpp
@@ -114,6 +114,21 @@ TEST_F(GrammarTest, Annotation) {
EXPECT_NE(G.lookupRule(ruleFor("x")).Guard, G.lookupRule(ruleFor("y")).Guard);
}
+TEST_F(GrammarTest, MangleName) {
+ build(R"bnf(
+ _ := declaration
+
+ declaration := ptr-declarator ;
+ ptr-declarator := * IDENTIFIER
+
+ )bnf");
+ ASSERT_TRUE(Diags.empty());
+ EXPECT_EQ(G.mangleRule(ruleFor("declaration")),
+ "declaration_0ptr_declarator_1semi");
+ EXPECT_EQ(G.mangleRule(ruleFor("ptr-declarator")),
+ "ptr_declarator_0star_1identifier");
+}
+
TEST_F(GrammarTest, Diagnostics) {
build(R"cpp(
_ := ,_opt
More information about the cfe-commits
mailing list