[clang-tools-extra] 2a88fb2 - [pseudo] Eliminate the dangling-else syntax ambiguity.
Haojian Wu via cfe-commits
cfe-commits at lists.llvm.org
Fri Jul 22 00:13:29 PDT 2022
Author: Haojian Wu
Date: 2022-07-22T09:13:09+02:00
New Revision: 2a88fb2ecb72300bfbbc74c586fb415cc18c9f9d
URL: https://github.com/llvm/llvm-project/commit/2a88fb2ecb72300bfbbc74c586fb415cc18c9f9d
DIFF: https://github.com/llvm/llvm-project/commit/2a88fb2ecb72300bfbbc74c586fb415cc18c9f9d.diff
LOG: [pseudo] Eliminate the dangling-else syntax ambiguity.
- the grammar ambiguity is eliminated by a guard;
- modify the guard function signatures, now all parameters are folded in
to a single object, avoid a long parameter list (as we will add more
parameters in the near future);
Reviewed By: sammccall
Differential Revision: https://reviews.llvm.org/D130160
Added:
clang-tools-extra/pseudo/test/cxx/dangling-else.cpp
Modified:
clang-tools-extra/pseudo/include/clang-pseudo/Language.h
clang-tools-extra/pseudo/lib/GLR.cpp
clang-tools-extra/pseudo/lib/cxx/CXX.cpp
clang-tools-extra/pseudo/lib/cxx/cxx.bnf
clang-tools-extra/pseudo/unittests/GLRTest.cpp
Removed:
################################################################################
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h
index 3696543915cba..1a2b71f081da0 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h
@@ -19,6 +19,12 @@ class ForestNode;
class TokenStream;
class LRTable;
+struct GuardParams {
+ llvm::ArrayRef<const ForestNode *> RHS;
+ const TokenStream &Tokens;
+ // FIXME: use the index of Tokens.
+ SymbolID Lookahead;
+};
// A guard restricts when a grammar rule can be used.
//
// The GLR parser will use the guard to determine whether a rule reduction will
@@ -26,8 +32,7 @@ class LRTable;
// `virt-specifier := IDENTIFIER` only if the identifier's text is 'override`.
//
// Return true if the guard is satisfied.
-using RuleGuard = llvm::function_ref<bool(
- llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &)>;
+using RuleGuard = llvm::function_ref<bool(const GuardParams &)>;
// A recovery strategy determines a region of code to skip when parsing fails.
//
diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp
index df8381d04326b..ab230accdf8f8 100644
--- a/clang-tools-extra/pseudo/lib/GLR.cpp
+++ b/clang-tools-extra/pseudo/lib/GLR.cpp
@@ -421,7 +421,7 @@ class GLRReduce {
if (!R.Guarded)
return true;
if (auto Guard = Lang.Guards.lookup(RID))
- return Guard(RHS, Params.Code);
+ return Guard({RHS, Params.Code, Lookahead});
LLVM_DEBUG(llvm::dbgs()
<< llvm::formatv("missing guard implementation for rule {0}\n",
Lang.G.dumpRule(RID)));
diff --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
index 8fa24bfbbd0b5..7fc3a48d63189 100644
--- a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -156,15 +156,19 @@ bool isFunctionDeclarator(const ForestNode *Declarator) {
llvm_unreachable("unreachable");
}
+bool guardNextTokenNotElse(const GuardParams &P) {
+ return symbolToToken(P.Lookahead) != tok::kw_else;
+}
+
llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
#define TOKEN_GUARD(kind, cond) \
- [](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) { \
- const Token &Tok = onlyToken(tok::kind, RHS, Tokens); \
+ [](const GuardParams& P) { \
+ const Token &Tok = onlyToken(tok::kind, P.RHS, P.Tokens); \
return cond; \
}
#define SYMBOL_GUARD(kind, cond) \
- [](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) { \
- const ForestNode &N = onlySymbol((SymbolID)Symbol::kind, RHS, Tokens); \
+ [](const GuardParams& P) { \
+ const ForestNode &N = onlySymbol((SymbolID)Symbol::kind, P.RHS, P.Tokens); \
return cond; \
}
return {
@@ -186,6 +190,11 @@ llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
{(RuleID)Rule::contextual_zero_0numeric_constant,
TOKEN_GUARD(numeric_constant, Tok.text() == "0")},
+ {(RuleID)Rule::selection_statement_0if_1l_paren_2condition_3r_paren_4statement,
+ guardNextTokenNotElse},
+ {(RuleID)Rule::selection_statement_0if_1constexpr_2l_paren_3condition_4r_paren_5statement,
+ guardNextTokenNotElse},
+
// The grammar distinguishes (only) user-defined vs plain string literals,
// where the clang lexer distinguishes (only) encoding types.
{(RuleID)Rule::user_defined_string_literal_chunk_0string_literal,
diff --git a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
index d49fb8fb7cf42..8138d0fd481ed 100644
--- a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
+++ b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
@@ -290,7 +290,7 @@ expression-statement := expression_opt ;
compound-statement := { statement-seq_opt [recover=Brackets] }
statement-seq := statement
statement-seq := statement-seq statement
-selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement
+selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement [guard]
selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement ELSE statement
selection-statement := SWITCH ( init-statement_opt condition ) statement
iteration-statement := WHILE ( condition ) statement
diff --git a/clang-tools-extra/pseudo/test/cxx/dangling-else.cpp b/clang-tools-extra/pseudo/test/cxx/dangling-else.cpp
new file mode 100644
index 0000000000000..151f3931b53f9
--- /dev/null
+++ b/clang-tools-extra/pseudo/test/cxx/dangling-else.cpp
@@ -0,0 +1,22 @@
+// RUN: clang-pseudo -grammar=cxx -source=%s --start-symbol=statement-seq --print-forest | FileCheck %s
+
+// Verify the else should belong to the nested if statement
+if (true) if (true) {} else {}
+
+// CHECK: statement-seq~selection-statement := IF ( condition ) statement
+// CHECK-NEXT: ├─IF
+// CHECK-NEXT: ├─(
+// CHECK-NEXT: ├─condition~TRUE
+// CHECK-NEXT: ├─)
+// CHECK-NEXT: └─statement~selection-statement
+// CHECK-NEXT: ├─IF
+// CHECK-NEXT: ├─(
+// CHECK-NEXT: ├─condition~TRUE
+// CHECK-NEXT: ├─)
+// CHECK-NEXT: ├─statement~compound-statement := { }
+// CHECK-NEXT: │ ├─{
+// CHECK-NEXT: │ └─}
+// CHECK-NEXT: ├─ELSE
+// CHECK-NEXT: └─statement~compound-statement := { }
+// CHECK-NEXT: ├─{
+// CHECK-NEXT: └─}
diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
index 2c3ef265de392..5f87efec67044 100644
--- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -634,11 +634,12 @@ TEST_F(GLRTest, GuardExtension) {
start := IDENTIFIER [guard]
)bnf");
TestLang.Guards.try_emplace(
- ruleFor("start"),
- [&](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) {
- assert(RHS.size() == 1 &&
- RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
- return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "test";
+ ruleFor("start"), [&](const GuardParams &P) {
+ assert(P.RHS.size() == 1 &&
+ P.RHS.front()->symbol() ==
+ tokenSymbol(clang::tok::identifier));
+ return P.Tokens.tokens()[P.RHS.front()->startTokenIndex()]
+ .text() == "test";
});
clang::LangOptions LOptions;
TestLang.Table = LRTable::buildSLR(TestLang.G);
More information about the cfe-commits
mailing list