[clang-tools-extra] 3132e9c - [pseudo] Key guards by RuleID, add guards to literals (and 0).
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Thu Jul 21 13:42:50 PDT 2022
Author: Sam McCall
Date: 2022-07-21T22:42:31+02:00
New Revision: 3132e9cd7c9fda63f7c0babf8bd5f6d755ce9027
URL: https://github.com/llvm/llvm-project/commit/3132e9cd7c9fda63f7c0babf8bd5f6d755ce9027
DIFF: https://github.com/llvm/llvm-project/commit/3132e9cd7c9fda63f7c0babf8bd5f6d755ce9027.diff
LOG: [pseudo] Key guards by RuleID, add guards to literals (and 0).
After this, NUMERIC_CONSTANT and strings should parse only one way.
There are 8 types of literals, and 24 valid (literal, TokenKind) pairs.
This means adding 8 new named guards (or 24, if we want to assert the token).
It seems fairly clear to me at this point that the guard names are unneccesary
indirection: the guards are in fact coupled to the rule signature.
(Also add the zero guard I forgot in the previous patch.)
Differential Revision: https://reviews.llvm.org/D130066
Added:
clang-tools-extra/pseudo/test/cxx/literals.cpp
Modified:
clang-tools-extra/pseudo/include/clang-pseudo/Language.h
clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
clang-tools-extra/pseudo/lib/GLR.cpp
clang-tools-extra/pseudo/lib/cxx/CXX.cpp
clang-tools-extra/pseudo/lib/cxx/cxx.bnf
clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
clang-tools-extra/pseudo/test/cxx/mixed-designator.cpp
clang-tools-extra/pseudo/tool/ClangPseudo.cpp
clang-tools-extra/pseudo/unittests/GLRTest.cpp
clang-tools-extra/pseudo/unittests/GrammarTest.cpp
Removed:
################################################################################
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h
index 410ca075a5da4..3696543915cba 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h
@@ -46,7 +46,7 @@ struct Language {
LRTable Table;
// Binding extension ids to corresponding implementations.
- llvm::DenseMap<ExtensionID, RuleGuard> Guards;
+ llvm::DenseMap<RuleID, RuleGuard> Guards;
llvm::DenseMap<ExtensionID, RecoveryStrategy> RecoveryStrategies;
// FIXME: add clang::LangOptions.
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
index ef22f71d801c0..a3d85aacef23e 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
@@ -28,12 +28,12 @@
// ), and an extension point corresponds to a piece of native code. For
// example, C++ grammar has a rule:
//
-// contextual-override := IDENTIFIER [guard=Override]
+// compound_statement := { statement-seq [recover=Brackets] }
//
-// GLR parser only conducts the reduction of the rule if the IDENTIFIER
-// content is `override`. This Override guard is implemented in CXX.cpp by
-// binding the ExtensionID for the `Override` value to a specific C++ function
-// that performs the check.
+// The `recover` attribute instructs the parser that we should perform error
+// recovery if parsing the statement-seq fails. The `Brackets` recovery
+// heuristic is implemented in CXX.cpp by binding the ExtensionID for the
+// `Recovery` value to a specific C++ function that finds the recovery point.
//
// Notions about the BNF grammar:
// - "_" is the start symbol of the augmented grammar;
@@ -118,11 +118,8 @@ struct Rule {
uint8_t Size : SizeBits; // Size of the Sequence
SymbolID Sequence[MaxElements];
- // A guard extension controls whether a reduction of a rule will be conducted
- // by the GLR parser.
- // 0 is sentinel unset extension ID, indicating there is no guard extension
- // being set for this rule.
- ExtensionID Guard = 0;
+ // A guarded rule has extra logic to determine whether the RHS is eligible.
+ bool Guarded = false;
// Specifies the index within Sequence eligible for error recovery.
// Given stmt := { stmt-seq_opt }, if we fail to parse the stmt-seq then we
@@ -136,7 +133,7 @@ struct Rule {
return llvm::ArrayRef<SymbolID>(Sequence, Size);
}
friend bool operator==(const Rule &L, const Rule &R) {
- return L.Target == R.Target && L.seq() == R.seq() && L.Guard == R.Guard;
+ return L.Target == R.Target && L.seq() == R.seq() && L.Guarded == R.Guarded;
}
};
diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp
index 1664089725197..df8381d04326b 100644
--- a/clang-tools-extra/pseudo/lib/GLR.cpp
+++ b/clang-tools-extra/pseudo/lib/GLR.cpp
@@ -416,11 +416,11 @@ class GLRReduce {
}
private:
- bool canReduce(ExtensionID GuardID, RuleID RID,
+ bool canReduce(const Rule &R, RuleID RID,
llvm::ArrayRef<const ForestNode *> RHS) const {
- if (!GuardID)
+ if (!R.Guarded)
return true;
- if (auto Guard = Lang.Guards.lookup(GuardID))
+ if (auto Guard = Lang.Guards.lookup(RID))
return Guard(RHS, Params.Code);
LLVM_DEBUG(llvm::dbgs()
<< llvm::formatv("missing guard implementation for rule {0}\n",
@@ -441,7 +441,7 @@ class GLRReduce {
for (const auto *B : N->parents())
llvm::dbgs() << " --> base at S" << B->State << "\n";
});
- if (!canReduce(Rule.Guard, RID, TempSequence))
+ if (!canReduce(Rule, RID, TempSequence))
return;
// Copy the chain to stable storage so it can be enqueued.
if (SequenceStorageCount == SequenceStorage.size())
@@ -572,7 +572,7 @@ class GLRReduce {
TempSequence[Rule.Size - 1 - I] = Base->Payload;
Base = Base->parents().front();
}
- if (!canReduce(Rule.Guard, *RID, TempSequence))
+ if (!canReduce(Rule, *RID, TempSequence))
return true; // reduction is not available
const ForestNode *Parsed =
&Params.Forest.createSequence(Rule.Target, *RID, TempSequence);
diff --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
index 4b78a67e0be0f..8fa24bfbbd0b5 100644
--- a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -11,6 +11,9 @@
#include "clang-pseudo/Language.h"
#include "clang-pseudo/grammar/Grammar.h"
#include "clang-pseudo/grammar/LRTable.h"
+#include "clang/Basic/CharInfo.h"
+#include "clang/Basic/TokenKinds.h"
+#include "llvm/ADT/StringSwitch.h"
#include <utility>
namespace clang {
@@ -21,29 +24,88 @@ static const char *CXXBNF =
#include "CXXBNF.inc"
;
-bool guardOverride(llvm::ArrayRef<const ForestNode *> RHS,
- const TokenStream &Tokens) {
- assert(RHS.size() == 1 &&
- RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
- return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "override";
+// User-defined string literals look like `""suffix`.
+bool isStringUserDefined(const Token &Tok) {
+ return !Tok.text().endswith("\"");
}
-bool guardFinal(llvm::ArrayRef<const ForestNode *> RHS,
- const TokenStream &Tokens) {
- assert(RHS.size() == 1 &&
- RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
- return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "final";
-}
-bool guardModule(llvm::ArrayRef<const ForestNode *> RHS,
- const TokenStream &Tokens) {
- return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "module";
+bool isCharUserDefined(const Token &Tok) { return !Tok.text().endswith("'"); }
+
+// Combinable flags describing numbers.
+// Clang has just one numeric_token kind, the grammar has 4.
+enum NumericKind {
+ Integer = 0,
+ Floating = 1 << 0,
+ UserDefined = 1 << 1,
+};
+// Determine the kind of numeric_constant we have.
+// We can assume it's something valid, as it has been lexed.
+// FIXME: is this expensive enough that we should set flags on the token
+// and reuse them rather than computing it for each guard?
+unsigned numKind(const Token &Tok) {
+ assert(Tok.Kind == tok::numeric_constant);
+ llvm::StringRef Text = Tok.text();
+ if (Text.size() <= 1)
+ return Integer;
+ bool Hex =
+ Text.size() > 2 && Text[0] == '0' && (Text[1] == 'x' || Text[1] == 'X');
+ uint8_t K = Integer;
+
+ for (char C : Text) {
+ switch (C) {
+ case '.':
+ K |= Floating;
+ break;
+ case 'e':
+ case 'E':
+ if (!Hex)
+ K |= Floating;
+ break;
+ case 'p':
+ case 'P':
+ if (Hex)
+ K |= Floating;
+ break;
+ case '_':
+ K |= UserDefined;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // We would be done here, but there are stdlib UDLs that lack _.
+ // We must distinguish these from the builtin suffixes.
+ unsigned LastLetter = Text.size();
+ while (LastLetter > 0 && isLetter(Text[LastLetter - 1]))
+ --LastLetter;
+ if (LastLetter == Text.size()) // Common case
+ return NumericKind(K);
+ // Trailing d/e/f are not part of the suffix in hex numbers.
+ while (Hex && LastLetter < Text.size() && isHexDigit(Text[LastLetter]))
+ ++LastLetter;
+ return llvm::StringSwitch<int, unsigned>(Text.substr(LastLetter))
+ // std::chrono
+ .Cases("h", "min", "s", "ms", "us", "ns", "d", "y", K | UserDefined)
+ // complex
+ .Cases("il", "i", "if", K | UserDefined)
+ .Default(K);
}
-bool guardImport(llvm::ArrayRef<const ForestNode *> RHS,
- const TokenStream &Tokens) {
- return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "import";
+
+// RHS is expected to contain a single terminal.
+// Returns the corresponding token.
+const Token &onlyToken(tok::TokenKind Kind,
+ const ArrayRef<const ForestNode *> RHS,
+ const TokenStream &Tokens) {
+ assert(RHS.size() == 1 && RHS.front()->symbol() == tokenSymbol(Kind));
+ return Tokens.tokens()[RHS.front()->startTokenIndex()];
}
-bool guardExport(llvm::ArrayRef<const ForestNode *> RHS,
- const TokenStream &Tokens) {
- return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "export";
+// RHS is expected to contain a single symbol.
+// Returns the corresponding ForestNode.
+const ForestNode &onlySymbol(SymbolID Kind,
+ const ArrayRef<const ForestNode *> RHS,
+ const TokenStream &Tokens) {
+ assert(RHS.size() == 1 && RHS.front()->symbol() == Kind);
+ return *RHS.front();
}
bool isFunctionDeclarator(const ForestNode *Declarator) {
@@ -93,29 +155,92 @@ bool isFunctionDeclarator(const ForestNode *Declarator) {
}
llvm_unreachable("unreachable");
}
-bool guardFunction(llvm::ArrayRef<const ForestNode *> RHS,
- const TokenStream &Tokens) {
- assert(RHS.size() == 1 &&
- RHS.front()->symbol() == (SymbolID)(cxx::Symbol::declarator));
- return isFunctionDeclarator(RHS.front());
-}
-bool guardNonFunction(llvm::ArrayRef<const ForestNode *> RHS,
- const TokenStream &Tokens) {
- assert(RHS.size() == 1 &&
- RHS.front()->symbol() == (SymbolID)(cxx::Symbol::declarator));
- return !isFunctionDeclarator(RHS.front());
-}
llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
+#define TOKEN_GUARD(kind, cond) \
+ [](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) { \
+ const Token &Tok = onlyToken(tok::kind, RHS, Tokens); \
+ return cond; \
+ }
+#define SYMBOL_GUARD(kind, cond) \
+ [](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) { \
+ const ForestNode &N = onlySymbol((SymbolID)Symbol::kind, RHS, Tokens); \
+ return cond; \
+ }
return {
- {(ExtensionID)Extension::Override, guardOverride},
- {(ExtensionID)Extension::Final, guardFinal},
- {(ExtensionID)Extension::Import, guardImport},
- {(ExtensionID)Extension::Export, guardExport},
- {(ExtensionID)Extension::Module, guardModule},
- {(ExtensionID)Extension::FunctionDeclarator, guardFunction},
- {(ExtensionID)Extension::NonFunctionDeclarator, guardNonFunction},
+ {(RuleID)Rule::function_declarator_0declarator,
+ SYMBOL_GUARD(declarator, isFunctionDeclarator(&N))},
+ {(RuleID)Rule::non_function_declarator_0declarator,
+ SYMBOL_GUARD(declarator, !isFunctionDeclarator(&N))},
+
+ {(RuleID)Rule::contextual_override_0identifier,
+ TOKEN_GUARD(identifier, Tok.text() == "override")},
+ {(RuleID)Rule::contextual_final_0identifier,
+ TOKEN_GUARD(identifier, Tok.text() == "final")},
+ {(RuleID)Rule::import_keyword_0identifier,
+ TOKEN_GUARD(identifier, Tok.text() == "import")},
+ {(RuleID)Rule::export_keyword_0identifier,
+ TOKEN_GUARD(identifier, Tok.text() == "export")},
+ {(RuleID)Rule::module_keyword_0identifier,
+ TOKEN_GUARD(identifier, Tok.text() == "module")},
+ {(RuleID)Rule::contextual_zero_0numeric_constant,
+ TOKEN_GUARD(numeric_constant, Tok.text() == "0")},
+
+ // The grammar distinguishes (only) user-defined vs plain string literals,
+ // where the clang lexer distinguishes (only) encoding types.
+ {(RuleID)Rule::user_defined_string_literal_chunk_0string_literal,
+ TOKEN_GUARD(string_literal, isStringUserDefined(Tok))},
+ {(RuleID)Rule::user_defined_string_literal_chunk_0utf8_string_literal,
+ TOKEN_GUARD(utf8_string_literal, isStringUserDefined(Tok))},
+ {(RuleID)Rule::user_defined_string_literal_chunk_0utf16_string_literal,
+ TOKEN_GUARD(utf16_string_literal, isStringUserDefined(Tok))},
+ {(RuleID)Rule::user_defined_string_literal_chunk_0utf32_string_literal,
+ TOKEN_GUARD(utf32_string_literal, isStringUserDefined(Tok))},
+ {(RuleID)Rule::user_defined_string_literal_chunk_0wide_string_literal,
+ TOKEN_GUARD(wide_string_literal, isStringUserDefined(Tok))},
+ {(RuleID)Rule::string_literal_chunk_0string_literal,
+ TOKEN_GUARD(string_literal, !isStringUserDefined(Tok))},
+ {(RuleID)Rule::string_literal_chunk_0utf8_string_literal,
+ TOKEN_GUARD(utf8_string_literal, !isStringUserDefined(Tok))},
+ {(RuleID)Rule::string_literal_chunk_0utf16_string_literal,
+ TOKEN_GUARD(utf16_string_literal, !isStringUserDefined(Tok))},
+ {(RuleID)Rule::string_literal_chunk_0utf32_string_literal,
+ TOKEN_GUARD(utf32_string_literal, !isStringUserDefined(Tok))},
+ {(RuleID)Rule::string_literal_chunk_0wide_string_literal,
+ TOKEN_GUARD(wide_string_literal, !isStringUserDefined(Tok))},
+ // And the same for chars.
+ {(RuleID)Rule::user_defined_character_literal_0char_constant,
+ TOKEN_GUARD(char_constant, isCharUserDefined(Tok))},
+ {(RuleID)Rule::user_defined_character_literal_0utf8_char_constant,
+ TOKEN_GUARD(utf8_char_constant, isCharUserDefined(Tok))},
+ {(RuleID)Rule::user_defined_character_literal_0utf16_char_constant,
+ TOKEN_GUARD(utf16_char_constant, isCharUserDefined(Tok))},
+ {(RuleID)Rule::user_defined_character_literal_0utf32_char_constant,
+ TOKEN_GUARD(utf32_char_constant, isCharUserDefined(Tok))},
+ {(RuleID)Rule::user_defined_character_literal_0wide_char_constant,
+ TOKEN_GUARD(wide_char_constant, isCharUserDefined(Tok))},
+ {(RuleID)Rule::character_literal_0char_constant,
+ TOKEN_GUARD(char_constant, !isCharUserDefined(Tok))},
+ {(RuleID)Rule::character_literal_0utf8_char_constant,
+ TOKEN_GUARD(utf8_char_constant, !isCharUserDefined(Tok))},
+ {(RuleID)Rule::character_literal_0utf16_char_constant,
+ TOKEN_GUARD(utf16_char_constant, !isCharUserDefined(Tok))},
+ {(RuleID)Rule::character_literal_0utf32_char_constant,
+ TOKEN_GUARD(utf32_char_constant, !isCharUserDefined(Tok))},
+ {(RuleID)Rule::character_literal_0wide_char_constant,
+ TOKEN_GUARD(wide_char_constant, !isCharUserDefined(Tok))},
+ // clang just has one NUMERIC_CONSTANT token for {ud,plain}x{float,int}
+ {(RuleID)Rule::user_defined_integer_literal_0numeric_constant,
+ TOKEN_GUARD(numeric_constant, numKind(Tok) == (Integer | UserDefined))},
+ {(RuleID)Rule::user_defined_floating_point_literal_0numeric_constant,
+ TOKEN_GUARD(numeric_constant, numKind(Tok) == (Floating | UserDefined))},
+ {(RuleID)Rule::integer_literal_0numeric_constant,
+ TOKEN_GUARD(numeric_constant, numKind(Tok) == Integer)},
+ {(RuleID)Rule::floating_point_literal_0numeric_constant,
+ TOKEN_GUARD(numeric_constant, numKind(Tok) == Floating)},
};
+#undef TOKEN_GUARD
+#undef SYMBOL_GUARD
}
Token::Index recoverBrackets(Token::Index Begin, const TokenStream &Tokens) {
diff --git a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
index 4e434b1e037cc..d49fb8fb7cf42 100644
--- a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
+++ b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
@@ -413,8 +413,8 @@ init-declarator-list := init-declarator-list , init-declarator
#! to eliminate these false parses.
init-declarator := non-function-declarator initializer_opt
init-declarator := function-declarator requires-clause_opt
-function-declarator := declarator [guard=FunctionDeclarator]
-non-function-declarator := declarator [guard=NonFunctionDeclarator]
+function-declarator := declarator [guard]
+non-function-declarator := declarator [guard]
declarator := ptr-declarator
declarator := noptr-declarator parameters-and-qualifiers trailing-return-type
ptr-declarator := noptr-declarator
@@ -715,18 +715,18 @@ literal := string-literal
literal := boolean-literal
literal := pointer-literal
literal := user-defined-literal
-integer-literal := NUMERIC_CONSTANT
-character-literal := CHAR_CONSTANT
-character-literal := WIDE_CHAR_CONSTANT
-character-literal := UTF8_CHAR_CONSTANT
-character-literal := UTF16_CHAR_CONSTANT
-character-literal := UTF32_CHAR_CONSTANT
-floating-point-literal := NUMERIC_CONSTANT
-string-literal-chunk := STRING_LITERAL
-string-literal-chunk := WIDE_STRING_LITERAL
-string-literal-chunk := UTF8_STRING_LITERAL
-string-literal-chunk := UTF16_STRING_LITERAL
-string-literal-chunk := UTF32_STRING_LITERAL
+integer-literal := NUMERIC_CONSTANT [guard]
+character-literal := CHAR_CONSTANT [guard]
+character-literal := WIDE_CHAR_CONSTANT [guard]
+character-literal := UTF8_CHAR_CONSTANT [guard]
+character-literal := UTF16_CHAR_CONSTANT [guard]
+character-literal := UTF32_CHAR_CONSTANT [guard]
+floating-point-literal := NUMERIC_CONSTANT [guard]
+string-literal-chunk := STRING_LITERAL [guard]
+string-literal-chunk := WIDE_STRING_LITERAL [guard]
+string-literal-chunk := UTF8_STRING_LITERAL [guard]
+string-literal-chunk := UTF16_STRING_LITERAL [guard]
+string-literal-chunk := UTF32_STRING_LITERAL [guard]
#! Technically, string concatenation happens at phase 6 which is before parsing,
#! so it doesn't belong to the grammar. However, we extend the grammar to
#! support it, to make the pseudoparser fully functional on practical code.
@@ -736,33 +736,33 @@ user-defined-literal := user-defined-integer-literal
user-defined-literal := user-defined-floating-point-literal
user-defined-literal := user-defined-string-literal
user-defined-literal := user-defined-character-literal
-user-defined-integer-literal := NUMERIC_CONSTANT
-user-defined-string-literal-chunk := STRING_LITERAL
-user-defined-string-literal-chunk := WIDE_STRING_LITERAL
-user-defined-string-literal-chunk := UTF8_STRING_LITERAL
-user-defined-string-literal-chunk := UTF16_STRING_LITERAL
-user-defined-string-literal-chunk := UTF32_STRING_LITERAL
+user-defined-integer-literal := NUMERIC_CONSTANT [guard]
+user-defined-string-literal-chunk := STRING_LITERAL [guard]
+user-defined-string-literal-chunk := WIDE_STRING_LITERAL [guard]
+user-defined-string-literal-chunk := UTF8_STRING_LITERAL [guard]
+user-defined-string-literal-chunk := UTF16_STRING_LITERAL [guard]
+user-defined-string-literal-chunk := UTF32_STRING_LITERAL [guard]
user-defined-string-literal := user-defined-string-literal-chunk
user-defined-string-literal := string-literal-chunk user-defined-string-literal
user-defined-string-literal := user-defined-string-literal string-literal-chunk
-user-defined-floating-point-literal := NUMERIC_CONSTANT
-user-defined-character-literal := CHAR_CONSTANT
-user-defined-character-literal := WIDE_CHAR_CONSTANT
-user-defined-character-literal := UTF8_CHAR_CONSTANT
-user-defined-character-literal := UTF16_CHAR_CONSTANT
-user-defined-character-literal := UTF32_CHAR_CONSTANT
+user-defined-floating-point-literal := NUMERIC_CONSTANT [guard]
+user-defined-character-literal := CHAR_CONSTANT [guard]
+user-defined-character-literal := WIDE_CHAR_CONSTANT [guard]
+user-defined-character-literal := UTF8_CHAR_CONSTANT [guard]
+user-defined-character-literal := UTF16_CHAR_CONSTANT [guard]
+user-defined-character-literal := UTF32_CHAR_CONSTANT [guard]
boolean-literal := FALSE
boolean-literal := TRUE
pointer-literal := NULLPTR
#! Contextual keywords -- clang lexer always lexes them as identifier tokens.
#! Placeholders for literal text in the grammar that lex as other things.
-contextual-override := IDENTIFIER [guard=Override]
-contextual-final := IDENTIFIER [guard=Final]
-contextual-zero := NUMERIC_CONSTANT [guard=Zero]
-module-keyword := IDENTIFIER [guard=Module]
-import-keyword := IDENTIFIER [guard=Import]
-export-keyword := IDENTIFIER [guard=Export]
+contextual-override := IDENTIFIER [guard]
+contextual-final := IDENTIFIER [guard]
+contextual-zero := NUMERIC_CONSTANT [guard]
+module-keyword := IDENTIFIER [guard]
+import-keyword := IDENTIFIER [guard]
+export-keyword := IDENTIFIER [guard]
#! greatergreater token -- clang lexer always lexes it as a single token, we
#! split it into two tokens to make the GLR parser aware of the nested-template
diff --git a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
index da4e2dfd7a542..7fd8c3b66b8cd 100644
--- a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
+++ b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
@@ -86,8 +86,8 @@ std::string Grammar::dumpRule(RuleID RID) const {
if (R.RecoveryIndex == I)
OS << " [recover=" << T->AttributeValues[R.Recovery] << "]";
}
- if (R.Guard)
- OS << " [guard=" << T->AttributeValues[R.Guard] << "]";
+ if (R.Guarded)
+ OS << " [guard]";
return Result;
}
diff --git a/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
index 5e1fe9b6a0086..43fba22dd52d7 100644
--- a/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
+++ b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
@@ -76,6 +76,7 @@ class GrammarBuilder {
});
// Add an empty string for the corresponding sentinel unset attribute.
T->AttributeValues.push_back("");
+ UniqueAttributeValues.erase("");
llvm::for_each(UniqueAttributeValues, [&T](llvm::StringRef Name) {
T->AttributeValues.emplace_back();
T->AttributeValues.back() = Name.str();
@@ -258,7 +259,7 @@ class GrammarBuilder {
for (unsigned I = 0; I < Spec.Sequence.size(); ++I) {
for (const auto &KV : Spec.Sequence[I].Attributes) {
if (KV.first == "guard") {
- R.Guard = LookupExtensionID(KV.second);
+ R.Guarded = true;
} else if (KV.first == "recover") {
R.Recovery = LookupExtensionID(KV.second);
R.RecoveryIndex = I;
diff --git a/clang-tools-extra/pseudo/test/cxx/literals.cpp b/clang-tools-extra/pseudo/test/cxx/literals.cpp
new file mode 100644
index 0000000000000..e1cec8985b25f
--- /dev/null
+++ b/clang-tools-extra/pseudo/test/cxx/literals.cpp
@@ -0,0 +1,43 @@
+// RUN: clang-pseudo -grammar=cxx -source=%s --print-forest -forest-abbrev=0 | FileCheck %s --implicit-check-not=ambiguous
+auto list = {
+ 0, // CHECK: := integer-literal
+ 0b1011, // CHECK: := integer-literal
+ 0777, // CHECK: := integer-literal
+ 42_u, // CHECK: := user-defined-integer-literal
+ 0LL, // CHECK: := integer-literal
+ 0h, // CHECK: := user-defined-integer-literal
+ 0., // CHECK: := floating-point-literal
+ .2, // CHECK: := floating-point-literal
+ 2e1, // CHECK: := floating-point-literal
+ 0x42d, // CHECK: := integer-literal
+ 0x42_d, // CHECK: := user-defined-integer-literal
+ 0x42ds, // CHECK: := user-defined-integer-literal
+ 0x1.2p2,// CHECK: := floating-point-literal
+
+ "", // CHECK: literal := string-literal
+ L"", // CHECK: literal := string-literal
+ u8"", // CHECK: literal := string-literal
+ u"", // CHECK: literal := string-literal
+ U"", // CHECK: literal := string-literal
+ R"()", // CHECK: literal := string-literal
+ uR"()", // CHECK: literal := string-literal
+ "a" "b", // CHECK: literal := string-literal
+ u8"a" "b", // CHECK: literal := string-literal
+ u"a" u"b", // CHECK: literal := string-literal
+ "a"_u "b", // CHECK: user-defined-literal := user-defined-string-literal
+ "a"_u u"b", // CHECK: user-defined-literal := user-defined-string-literal
+ R"(a)" "\n", // CHECK: literal := string-literal
+ R"c(a)c"_u u"\n", // CHECK: user-defined-literal := user-defined-string-literal
+
+ 'a', // CHECK: := character-literal
+ 'abc', // CHECK: := character-literal
+ 'abcdef', // CHECK: := character-literal
+ u'a', // CHECK: := character-literal
+ U'a', // CHECK: := character-literal
+ L'a', // CHECK: := character-literal
+ L'abc', // CHECK: := character-literal
+ U'\u1234',// CHECK: := character-literal
+ '\u1234', // CHECK: := character-literal
+ u'a'_u, // CHECK: := user-defined-character-literal
+};
+
diff --git a/clang-tools-extra/pseudo/test/cxx/mixed-designator.cpp b/clang-tools-extra/pseudo/test/cxx/mixed-designator.cpp
index 5a89f4d57a528..d605a3d66a5de 100644
--- a/clang-tools-extra/pseudo/test/cxx/mixed-designator.cpp
+++ b/clang-tools-extra/pseudo/test/cxx/mixed-designator.cpp
@@ -5,16 +5,16 @@ auto x = { 1, .f = 2, [c]{3} };
// CHECK-NEXT: ├─{ := tok[3]
// CHECK-NEXT: ├─initializer-list
// CHECK-NEXT: │ ├─initializer-list
-// CHECK-NEXT: │ │ ├─initializer-list~literal
-// CHECK: │ │ ├─, := tok[5]
+// CHECK-NEXT: │ │ ├─initializer-list~NUMERIC_CONSTANT
+// CHECK-NEXT: │ │ ├─, := tok[5]
// CHECK-NEXT: │ │ └─initializer-list-item
// CHECK-NEXT: │ │ ├─designator
// CHECK-NEXT: │ │ │ ├─. := tok[6]
// CHECK-NEXT: │ │ │ └─IDENTIFIER := tok[7]
// CHECK-NEXT: │ │ └─brace-or-equal-initializer
// CHECK-NEXT: │ │ ├─= := tok[8]
-// CHECK-NEXT: │ │ └─initializer-clause~literal
-// CHECK: │ ├─, := tok[10]
+// CHECK-NEXT: │ │ └─initializer-clause~NUMERIC_CONSTANT
+// CHECK-NEXT: │ ├─, := tok[10]
// CHECK-NEXT: │ └─initializer-list-item
// CHECK-NEXT: │ ├─designator
// CHECK-NEXT: │ │ ├─[ := tok[11]
@@ -22,6 +22,6 @@ auto x = { 1, .f = 2, [c]{3} };
// CHECK-NEXT: │ │ └─] := tok[13]
// CHECK-NEXT: │ └─brace-or-equal-initializer~braced-init-list
// CHECK-NEXT: │ ├─{ := tok[14]
-// CHECK-NEXT: │ ├─initializer-list~literal
+// CHECK-NEXT: │ ├─initializer-list~NUMERIC_CONSTANT
// CHECK: │ └─} := tok[16]
// CHECK-NEXT: └─} := tok[17]
diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
index 9a11db9006d1e..f4f511ff14c6d 100644
--- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -45,6 +45,8 @@ static opt<bool>
desc("Strip directives and select conditional sections"));
static opt<bool> PrintStatistics("print-statistics", desc("Print GLR parser statistics"));
static opt<bool> PrintForest("print-forest", desc("Print parse forest"));
+static opt<bool> ForestAbbrev("forest-abbrev", desc("Abbreviate parse forest"),
+ init(true));
static opt<std::string> HTMLForest("html-forest",
desc("output file for HTML forest"));
static opt<std::string> StartSymbol("start-symbol",
@@ -153,7 +155,7 @@ int main(int argc, char *argv[]) {
glrParse(clang::pseudo::ParseParams{*ParseableStream, Arena, GSS},
*StartSymID, Lang);
if (PrintForest)
- llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true);
+ llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/ForestAbbrev);
if (HTMLForest.getNumOccurrences()) {
std::error_code EC;
diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
index 05419c1a35273..2c3ef265de392 100644
--- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -631,10 +631,10 @@ TEST_F(GLRTest, GuardExtension) {
build(R"bnf(
_ := start
- start := IDENTIFIER [guard=TestOnly]
+ start := IDENTIFIER [guard]
)bnf");
TestLang.Guards.try_emplace(
- extensionID("TestOnly"),
+ ruleFor("start"),
[&](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) {
assert(RHS.size() == 1 &&
RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
@@ -647,7 +647,7 @@ TEST_F(GLRTest, GuardExtension) {
const TokenStream &Succeeded = cook(lex(Input, LOptions), LOptions);
EXPECT_EQ(glrParse({Succeeded, Arena, GSStack}, id("start"), TestLang)
.dumpRecursive(TestLang.G),
- "[ 0, end) start := IDENTIFIER [guard=TestOnly]\n"
+ "[ 0, end) start := IDENTIFIER [guard]\n"
"[ 0, end) └─IDENTIFIER := tok[0]\n");
Input = "notest";
diff --git a/clang-tools-extra/pseudo/unittests/GrammarTest.cpp b/clang-tools-extra/pseudo/unittests/GrammarTest.cpp
index ee9a3ae2904b2..2657531ca7270 100644
--- a/clang-tools-extra/pseudo/unittests/GrammarTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/GrammarTest.cpp
@@ -102,16 +102,11 @@ TEST_F(GrammarTest, RuleIDSorted) {
TEST_F(GrammarTest, Annotation) {
build(R"bnf(
_ := x
-
- x := y [guard=value]
- y := IDENTIFIER [guard=final]
-
+ x := IDENTIFIER [guard]
)bnf");
- ASSERT_TRUE(Diags.empty());
- EXPECT_EQ(G.lookupRule(ruleFor("_")).Guard, 0);
- EXPECT_GT(G.lookupRule(ruleFor("x")).Guard, 0);
- EXPECT_GT(G.lookupRule(ruleFor("y")).Guard, 0);
- EXPECT_NE(G.lookupRule(ruleFor("x")).Guard, G.lookupRule(ruleFor("y")).Guard);
+ ASSERT_THAT(Diags, IsEmpty());
+ EXPECT_FALSE(G.lookupRule(ruleFor("_")).Guarded);
+ EXPECT_TRUE(G.lookupRule(ruleFor("x")).Guarded);
}
TEST_F(GrammarTest, MangleName) {
More information about the cfe-commits
mailing list