[clang] e1db505 - [syntax][pseudo] Introduce the C++ spec grammar.

Haojian Wu via cfe-commits cfe-commits at lists.llvm.org
Fri Feb 4 02:59:12 PST 2022


Author: Haojian Wu
Date: 2022-02-04T11:58:50+01:00
New Revision: e1db505b42f4d0799640988bbdf1ab83a661a46c

URL: https://github.com/llvm/llvm-project/commit/e1db505b42f4d0799640988bbdf1ab83a661a46c
DIFF: https://github.com/llvm/llvm-project/commit/e1db505b42f4d0799640988bbdf1ab83a661a46c.diff

LOG: [syntax][pseudo] Introduce the C++ spec grammar.

Add a dummy clang-pseudo tool (right now it accepts and parses the
grammar file).

Differential Revision: https://reviews.llvm.org/D115856

Added: 
    clang/lib/Tooling/Syntax/Pseudo/cxx.bnf
    clang/test/Syntax/check-cxx-bnf.test
    clang/test/Syntax/lit.local.cfg
    clang/tools/clang-pseudo/CMakeLists.txt
    clang/tools/clang-pseudo/ClangPseudo.cpp

Modified: 
    clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp
    clang/test/CMakeLists.txt
    clang/tools/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp b/clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp
index 40181e049f253..cf3e3e10ec540 100644
--- a/clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp
+++ b/clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp
@@ -92,7 +92,7 @@ class GrammarBuilder {
       return It->second;
     };
     for (const auto &Spec : Specs) {
-      assert(Spec.Sequence.size() < Rule::MaxElements);
+      assert(Spec.Sequence.size() <= Rule::MaxElements);
       Symbols.clear();
       for (const RuleSpec::Element &Elt : Spec.Sequence)
         Symbols.push_back(Lookup(Elt.Symbol));

diff  --git a/clang/lib/Tooling/Syntax/Pseudo/cxx.bnf b/clang/lib/Tooling/Syntax/Pseudo/cxx.bnf
new file mode 100644
index 0000000000000..ee83abfa00626
--- /dev/null
+++ b/clang/lib/Tooling/Syntax/Pseudo/cxx.bnf
@@ -0,0 +1,739 @@
+# This is a C++ grammar from the C++ standard [1].
+#
+# The grammar is a superset of the true grammar requring semantic constraints to
+# resolve ambiguties. The grammar is context-free and ambiguous (beyond the
+# limit of LR(k)). We use general parsing algorithm (e.g GLR) to handle the
+# grammar and generate a transition table which is used to drive the parsing.
+#
+# It aims to align with the ISO C++ grammar as much as possible. We adjust it
+# to fit the need for the grammar-based parser:
+#  - attributes are omitted, which will be handled as comments;
+#  - we don't allow nullable non-terminal symbols. There are few nullable
+#    non-terminals in the spec grammar, they are adjusted to be non-nullable;
+#  - the file merely describes the core C++ grammar. Preprocessor directives and
+#    lexical conversions are omitted as we reuse clang's lexer and run a fake
+#    preprocessor;
+#
+# Guidelines:
+#   - non-terminals are lower_case; terminals (aka tokens) correspond to
+#     clang::TokenKind, written as "IDENTIFIER", "USING", "::" etc;
+#   - optional symbols are supported, with a _opt suffix;
+#
+# [1] https://isocpp.org/files/papers/N4860.pdf
+#
+#
+# _ serves as a "fake" start symbol, coming with real grammar symbols.
+_ := translation-unit
+
+# gram.key
+typedef-name := IDENTIFIER
+typedef-name := simple-template-id
+namespace-name := IDENTIFIER
+namespace-name := namespace-alias
+namespace-alias := IDENTIFIER
+class-name := IDENTIFIER
+class-name := simple-template-id
+enum-name := IDENTIFIER
+template-name := IDENTIFIER
+
+# gram.basic
+#! Custom modifications to eliminate optional declaration-seq
+translation-unit := declaration-seq
+translation-unit := global-module-fragment_opt module-declaration declaration-seq_opt private-module-fragment_opt
+
+# gram.expr
+# expr.prim
+primary-expression := literal
+primary-expression := THIS
+primary-expression := ( expression )
+primary-expression := id-expression
+primary-expression := lambda-expression
+primary-expression := fold-expression
+primary-expression := requires-expression
+id-expression := unqualified-id
+id-expression := qualified-id
+unqualified-id := IDENTIFIER
+unqualified-id := operator-function-id
+unqualified-id := conversion-function-id
+unqualified-id := literal-operator-id
+unqualified-id := ~ type-name
+unqualified-id := ~ decltype-specifier
+unqualified-id := template-id
+qualified-id := nested-name-specifier TEMPLATE_opt unqualified-id
+nested-name-specifier := ::
+nested-name-specifier := type-name ::
+nested-name-specifier := namespace-name ::
+nested-name-specifier := decltype-specifier ::
+nested-name-specifier := nested-name-specifier IDENTIFIER ::
+nested-name-specifier := nested-name-specifier TEMPLATE_opt simple-template-id ::
+lambda-expression := lambda-introducer lambda-declarator_opt compound-statement
+lambda-expression := lambda-introducer < template-parameter-list > requires-clause_opt lambda-declarator_opt compound-statement
+lambda-introducer := [ lambda-capture_opt ]
+lambda-declarator := ( parameter-declaration-clause ) decl-specifier-seq_opt noexcept-specifier_opt trailing-return-type_opt requires-clause_opt
+lambda-capture := capture-default
+lambda-capture := capture-list
+lambda-capture := capture-default , capture-list
+capture-default := &
+capture-default := =
+capture-list := capture
+capture-list := capture-list , capture
+capture := simple-capture
+capture := init-capture
+simple-capture := IDENTIFIER ..._opt
+simple-capture := & IDENTIFIER ..._opt
+simple-capture := THIS
+simple-capture := * THIS
+init-capture := ..._opt IDENTIFIER initializer
+init-capture := & ..._opt IDENTIFIER initializer
+fold-expression := ( cast-expression fold-operator ... )
+fold-expression := ( ... fold-operator cast-expression )
+fold-expression := ( cast-expression fold-operator ... fold-operator cast-expression )
+fold-operator := +
+fold-operator := -
+fold-operator := *
+fold-operator := /
+fold-operator := %
+fold-operator := ^
+fold-operator := |
+fold-operator := <<
+fold-operator := >>
+fold-operator := +=
+fold-operator := -=
+fold-operator := *=
+fold-operator := /=
+fold-operator := %=
+fold-operator := ^=
+fold-operator := &=
+fold-operator := |=
+fold-operator := <<=
+fold-operator := >>=
+fold-operator := =
+fold-operator := ==
+fold-operator := !=
+fold-operator := <
+fold-operator := >
+fold-operator := <=
+fold-operator := >=
+fold-operator := &&
+fold-operator := ||
+fold-operator := ,
+fold-operator := .*
+fold-operator := ->*
+requires-expression := REQUIRES requirement-parameter-list_opt requirement-body
+requirement-parameter-list := ( parameter-declaration-clause_opt )
+requirement-body := { requirement-seq }
+requirement-seq := requirement
+requirement-seq := requirement-seq requirement
+requirement := simple-requirement
+requirement := type-requirement
+requirement := compound-requirement
+requirement := nested-requirement
+simple-requirement := expression ;
+type-requirement := TYPENAME nested-name-specifier_opt type-name ;
+compound-requirement := { expression } NOEXCEPT_opt return-type-requirement_opt ;
+return-type-requirement := -> type-constraint
+nested-requirement := REQUIRES constraint-expression ;
+# expr.post
+postfix-expression := primary-expression
+postfix-expression := postfix-expression [ expr-or-braced-init-list ]
+postfix-expression := postfix-expression ( expression-list_opt )
+postfix-expression := simple-type-specifier ( expression-list_opt )
+postfix-expression := typename-specifier ( expression-list_opt )
+postfix-expression := simple-type-specifier braced-init-list
+postfix-expression := postfix-expression . TEMPLATE_opt id-expression
+postfix-expression := postfix-expression -> TEMPLATE_opt id-expression
+postfix-expression := postfix-expression ++
+postfix-expression := postfix-expression --
+postfix-expression := DYNAMIC_CAST < type-id > ( expression )
+postfix-expression := STATIC_CAST < type-id > ( expression )
+postfix-expression := REINTERPRET_CAST < type-id > ( expression )
+postfix-expression := CONST_CAST < type-id > ( expression )
+postfix-expression := TYPEID ( expression )
+postfix-expression := TYPEID ( type-id )
+expression-list := initializer-list
+# expr.unary
+unary-expression := postfix-expression
+unary-expression := unary-operator cast-expression
+unary-expression := ++ cast-expression
+unary-expression := -- cast-expression
+unary-expression := await-expression
+unary-expression := SIZEOF unary-expression
+unary-expression := SIZEOF ( type-id )
+unary-expression := SIZEOF ... ( IDENTIFIER )
+unary-expression := ALIGNOF ( type-id )
+unary-expression := noexcept-expression
+unary-expression := new-expression
+unary-expression := delete-expression
+unary-operator := *
+unary-operator := &
+unary-operator := +
+unary-operator := -
+unary-operator := !
+unary-operator := ~
+await-expression := CO_AWAIT cast-expression
+noexcept-expression := NOEXCEPT ( expression )
+new-expression := ::_opt NEW new-placement_opt new-type-id new-initializer_opt
+new-expression := ::_opt NEW new-placement_opt ( type-id ) new-initializer_opt
+new-placement := ( expression-list )
+new-type-id := type-specifier-seq new-declarator_opt
+new-declarator := ptr-operator new-declarator_opt
+new-declarator := noptr-new-declarator
+noptr-new-declarator := [ expression_opt ]
+noptr-new-declarator := noptr-new-declarator [ constant-expression ]
+new-initializer := ( expression-list_opt )
+new-initializer := braced-init-list
+delete-expression := ::_opt DELETE cast-expression
+delete-expression := ::_opt DELETE [ ] cast-expression
+cast-expression := unary-expression
+cast-expression := ( type-id ) cast-expression
+# expr.mptr.oper
+pm-expression := cast-expression
+pm-expression := pm-expression .* cast-expression
+pm-expression := pm-expression ->* cast-expression
+# expr.mul
+multiplicative-expression := pm-expression
+multiplicative-expression := multiplicative-expression * pm-expression
+multiplicative-expression := multiplicative-expression / pm-expression
+multiplicative-expression := multiplicative-expression % pm-expression
+# expr.add
+additive-expression := multiplicative-expression
+additive-expression := additive-expression + multiplicative-expression
+additive-expression := additive-expression - multiplicative-expression
+# expr.shift
+shift-expression := additive-expression
+shift-expression := shift-expression << additive-expression
+shift-expression := shift-expression >> additive-expression
+# expr.spaceship
+compare-expression := shift-expression
+compare-expression := compare-expression <=> shift-expression
+# expr.rel
+relational-expression := compare-expression
+relational-expression := relational-expression < compare-expression
+relational-expression := relational-expression > compare-expression
+relational-expression := relational-expression <= compare-expression
+relational-expression := relational-expression >= compare-expression
+# expr.eq
+equality-expression := relational-expression
+equality-expression := equality-expression == relational-expression
+equality-expression := equality-expression != relational-expression
+# expr.bit.and
+and-expression := equality-expression
+and-expression := and-expression & equality-expression
+# expr.xor
+exclusive-or-expression := and-expression
+exclusive-or-expression := exclusive-or-expression ^ and-expression
+# expr.or
+inclusive-or-expression := exclusive-or-expression
+inclusive-or-expression := inclusive-or-expression | exclusive-or-expression
+# expr.log.and
+logical-and-expression := inclusive-or-expression
+logical-and-expression := logical-and-expression && inclusive-or-expression
+# expr.log.or
+logical-or-expression := logical-and-expression
+logical-or-expression := logical-or-expression || logical-and-expression
+# expr.cond
+conditional-expression := logical-or-expression
+conditional-expression := logical-or-expression ? expression : assignment-expression
+# expr.ass
+yield-expression := CO_YIELD assignment-expression
+yield-expression := CO_YIELD braced-init-list
+throw-expression := THROW assignment-expression_opt
+assignment-expression := conditional-expression
+assignment-expression := yield-expression
+assignment-expression := throw-expression
+assignment-expression := logical-or-expression assignment-operator initializer-clause
+assignment-operator := =
+assignment-operator := *=
+assignment-operator := /=
+assignment-operator := %=
+assignment-operator := +=
+assignment-operator := -=
+assignment-operator := >>=
+assignment-operator := <<=
+assignment-operator := &=
+assignment-operator := ^=
+assignment-operator := |=
+# expr.comma
+expression := assignment-expression
+expression := expression , assignment-expression
+# expr.const
+constant-expression := conditional-expression
+
+# gram.stmt
+statement := labeled-statement
+statement := expression-statement
+statement := compound-statement
+statement := selection-statement
+statement := iteration-statement
+statement := jump-statement
+statement := declaration-statement
+statement := try-block
+init-statement := expression-statement
+init-statement := simple-declaration
+condition := expression
+condition := decl-specifier-seq declarator brace-or-equal-initializer
+labeled-statement := IDENTIFIER : statement
+labeled-statement := CASE constant-expression : statement
+labeled-statement := DEFAULT : statement
+expression-statement := expression_opt ;
+compound-statement := { statement-seq_opt }
+statement-seq := statement
+statement-seq := statement-seq statement
+selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement
+selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement ELSE statement
+selection-statement := SWITCH ( init-statement_opt condition ) statement
+iteration-statement := WHILE ( condition ) statement
+iteration-statement := DO statement WHILE ( expression ) ;
+iteration-statement := FOR ( init-statement condition_opt ; expression_opt ) statement
+iteration-statement := FOR ( init-statement_opt for-range-declaration : for-range-initializer ) statement
+for-range-declaration := decl-specifier-seq declarator
+for-range-declaration := decl-specifier-seq ref-qualifier_opt [ identifier-list ]
+for-range-initializer := expr-or-braced-init-list
+jump-statement := BREAK ;
+jump-statement := CONTINUE ;
+jump-statement := RETURN expr-or-braced-init-list_opt ;
+jump-statement := coroutine-return-statement
+jump-statement := GOTO IDENTIFIER ;
+coroutine-return-statement := CO_RETURN expr-or-braced-init-list_opt ;
+declaration-statement := block-declaration
+
+# gram.dcl
+declaration-seq := declaration
+declaration-seq := declaration-seq declaration
+declaration := block-declaration
+declaration := nodeclspec-function-declaration
+declaration := function-definition
+declaration := template-declaration
+declaration := deduction-guide
+declaration := explicit-instantiation
+declaration := explicit-specialization
+declaration := export-declaration
+declaration := linkage-specification
+declaration := namespace-definition
+declaration := empty-declaration
+declaration := module-import-declaration
+block-declaration := simple-declaration
+block-declaration := asm-declaration
+block-declaration := namespace-alias-definition
+block-declaration := using-declaration
+block-declaration := using-enum-declaration
+block-declaration := using-directive
+block-declaration := static_assert-declaration
+block-declaration := alias-declaration
+block-declaration := opaque-enum-declaration
+nodeclspec-function-declaration := declarator ;
+alias-declaration := USING IDENTIFIER = defining-type-id ;
+simple-declaration := decl-specifier-seq init-declarator-list_opt ;
+simple-declaration := decl-specifier-seq ref-qualifier_opt [ identifier-list ] initializer ;
+static_assert-declaration := STATIC_ASSERT ( constant-expression ) ;
+static_assert-declaration := STATIC_ASSERT ( constant-expression , string-literal ) ;
+empty-declaration := ;
+# dcl.spec
+decl-specifier := storage-class-specifier
+decl-specifier := defining-type-specifier
+decl-specifier := function-specifier
+decl-specifier := FRIEND
+decl-specifier := TYPEDEF
+decl-specifier := CONSTEXPR
+decl-specifier := CONSTEVAL
+decl-specifier := CONSTINIT
+decl-specifier := INLINE
+decl-specifier-seq := decl-specifier
+decl-specifier-seq := decl-specifier decl-specifier-seq
+storage-class-specifier := STATIC
+storage-class-specifier := THREAD_LOCAL
+storage-class-specifier := EXTERN
+storage-class-specifier := MUTABLE
+function-specifier := VIRTUAL
+function-specifier := explicit-specifier
+explicit-specifier := EXPLICIT ( constant-expression )
+explicit-specifier := EXPLICIT
+type-specifier := simple-type-specifier
+type-specifier := elaborated-type-specifier
+type-specifier := typename-specifier
+type-specifier := cv-qualifier
+type-specifier-seq := type-specifier
+type-specifier-seq := type-specifier type-specifier-seq
+defining-type-specifier := type-specifier
+defining-type-specifier := class-specifier
+defining-type-specifier := enum-specifier
+defining-type-specifier-seq := defining-type-specifier
+defining-type-specifier-seq := defining-type-specifier defining-type-specifier-seq
+simple-type-specifier := nested-name-specifier_opt type-name
+simple-type-specifier := nested-name-specifier TEMPLATE simple-template-id
+simple-type-specifier := decltype-specifier
+simple-type-specifier := placeholder-type-specifier
+simple-type-specifier := nested-name-specifier_opt template-name
+simple-type-specifier := CHAR
+simple-type-specifier := CHAR8_T
+simple-type-specifier := CHAR16_T
+simple-type-specifier := CHAR32_T
+simple-type-specifier := WCHAR_T
+simple-type-specifier := BOOL
+simple-type-specifier := SHORT
+simple-type-specifier := INT
+simple-type-specifier := LONG
+simple-type-specifier := SIGNED
+simple-type-specifier := UNSIGNED
+simple-type-specifier := FLOAT
+simple-type-specifier := DOUBLE
+simple-type-specifier := VOID
+type-name := class-name
+type-name := enum-name
+type-name := typedef-name
+elaborated-type-specifier := class-key nested-name-specifier_opt IDENTIFIER
+elaborated-type-specifier := class-key simple-template-id
+elaborated-type-specifier := class-key nested-name-specifier TEMPLATE_opt simple-template-id
+elaborated-type-specifier := elaborated-enum-specifier
+elaborated-enum-specifier := ENUM nested-name-specifier_opt IDENTIFIER
+decltype-specifier := DECLTYPE ( expression )
+placeholder-type-specifier := type-constraint_opt AUTO
+placeholder-type-specifier := type-constraint_opt DECLTYPE ( AUTO )
+init-declarator-list := init-declarator
+init-declarator-list := init-declarator-list , init-declarator
+init-declarator := declarator initializer_opt
+init-declarator := declarator requires-clause
+declarator := ptr-declarator
+declarator := noptr-declarator parameters-and-qualifiers trailing-return-type
+ptr-declarator := noptr-declarator
+ptr-declarator := ptr-operator ptr-declarator
+noptr-declarator := declarator-id
+noptr-declarator := noptr-declarator parameters-and-qualifiers
+noptr-declarator := noptr-declarator [ constant-expression_opt ]
+noptr-declarator := ( ptr-declarator )
+parameters-and-qualifiers := ( parameter-declaration-list_opt ) cv-qualifier-seq_opt ref-qualifier_opt noexcept-specifier_opt
+trailing-return-type := -> type-id
+ptr-operator := * cv-qualifier-seq_opt
+ptr-operator := &
+ptr-operator := &&
+ptr-operator := nested-name-specifier * cv-qualifier-seq_opt
+cv-qualifier-seq := cv-qualifier cv-qualifier-seq_opt
+cv-qualifier := CONST
+cv-qualifier := VOLATILE
+ref-qualifier := &
+ref-qualifier := &&
+declarator-id := ..._opt id-expression
+type-id := type-specifier-seq abstract-declarator_opt
+defining-type-id := defining-type-specifier-seq abstract-declarator_opt
+abstract-declarator := ptr-abstract-declarator
+abstract-declarator := noptr-abstract-declarator_opt parameters-and-qualifiers trailing-return-type
+abstract-declarator := abstract-pack-declarator
+ptr-abstract-declarator := noptr-abstract-declarator
+ptr-abstract-declarator := ptr-operator ptr-abstract-declarator_opt
+noptr-abstract-declarator := noptr-abstract-declarator_opt parameters-and-qualifiers
+noptr-abstract-declarator := noptr-abstract-declarator_opt [ constant-expression ]
+noptr-abstract-declarator := ( ptr-abstract-declarator )
+abstract-pack-declarator := noptr-abstract-pack-declarator
+abstract-pack-declarator := ptr-operator abstract-pack-declarator
+noptr-abstract-pack-declarator := noptr-abstract-pack-declarator parameters-and-qualifiers
+noptr-abstract-pack-declarator := noptr-abstract-pack-declarator [ constant-expression_opt ]
+noptr-abstract-pack-declarator := ...
+#! Custom modifications to avoid nullable clause.
+parameter-declaration-clause := parameter-declaration-list
+parameter-declaration-clause := parameter-declaration-list_opt ...
+parameter-declaration-clause := parameter-declaration-list , ...
+parameter-declaration-list := parameter-declaration
+parameter-declaration-list := parameter-declaration-list , parameter-declaration
+parameter-declaration := decl-specifier-seq declarator
+parameter-declaration := decl-specifier-seq declarator = initializer-clause
+parameter-declaration := decl-specifier-seq abstract-declarator_opt
+parameter-declaration := decl-specifier-seq abstract-declarator_opt = initializer-clause
+# dcl.init
+initializer := brace-or-equal-initializer
+initializer := ( expression-list )
+brace-or-equal-initializer := = initializer-clause
+brace-or-equal-initializer := braced-init-list
+initializer-clause := assignment-expression
+initializer-clause := braced-init-list
+braced-init-list := { initializer-list ,_opt }
+braced-init-list := { designated-initializer-list ,_opt }
+braced-init-list := { }
+initializer-list := initializer-clause ..._opt
+initializer-list := initializer-list , initializer-clause ..._opt
+designated-initializer-list := designated-initializer-clause
+designated-initializer-list := designated-initializer-list , designated-initializer-clause
+designated-initializer-clause := designator brace-or-equal-initializer
+designator := . IDENTIFIER
+expr-or-braced-init-list := expression
+expr-or-braced-init-list := braced-init-list
+# dcl.fct
+function-definition := decl-specifier-seq_opt declarator virt-specifier-seq_opt function-body
+function-definition := decl-specifier-seq_opt declarator requires-clause function-body
+function-body := ctor-initializer_opt compound-statement
+function-body := function-try-block
+function-body := = DEFAULT ;
+function-body := = DELETE ;
+# dcl.enum
+enum-specifier := enum-head { enumerator-list_opt }
+enum-specifier := enum-head { enumerator-list , }
+enum-head := enum-key enum-head-name_opt enum-base_opt
+enum-head-name := nested-name-specifier_opt IDENTIFIER
+opaque-enum-declaration := enum-key enum-head-name enum-base_opt ;
+enum-key := ENUM
+enum-key := ENUM CLASS
+enum-key := ENUM STRUCT
+enum-base := : type-specifier-seq
+enumerator-list := enumerator-definition
+enumerator-list := enumerator-list , enumerator-definition
+enumerator-definition := enumerator
+enumerator-definition := enumerator = constant-expression
+enumerator := IDENTIFIER
+using-enum-declaration := USING elaborated-enum-specifier ;
+# basic.namespace
+namespace-definition := named-namespace-definition
+namespace-definition := unnamed-namespace-definition
+namespace-definition := nested-namespace-definition
+named-namespace-definition := INLINE_opt NAMESPACE IDENTIFIER { namespace-body_opt }
+unnamed-namespace-definition := INLINE_opt NAMESPACE { namespace-body_opt }
+nested-namespace-definition := NAMESPACE enclosing-namespace-specifier :: INLINE_opt IDENTIFIER { namespace-body }
+enclosing-namespace-specifier := IDENTIFIER
+enclosing-namespace-specifier := enclosing-namespace-specifier :: INLINE_opt IDENTIFIER
+#! Custom modification to avoid nullable namespace-body.
+namespace-body := declaration-seq
+namespace-alias-definition := NAMESPACE IDENTIFIER = qualified-namespace-specifier ;
+qualified-namespace-specifier := nested-name-specifier_opt namespace-name
+using-directive := USING NAMESPACE nested-name-specifier_opt namespace-name ;
+using-declaration := USING using-declarator-list ;
+using-declarator-list := using-declarator ..._opt
+using-declarator-list := using-declarator-list , using-declarator ..._opt
+using-declarator := TYPENAME_opt nested-name-specifier unqualified-id
+# dcl.asm
+asm-declaration := ASM ( string-literal ) ;
+# dcl.link
+linkage-specification := EXTERN string-literal { declaration-seq_opt }
+linkage-specification := EXTERN string-literal declaration
+
+# gram.module
+module-declaration := export-keyword_opt module-keyword module-name module-partition_opt
+module-name := module-name-qualifier_opt IDENTIFIER
+module-partition := : module-name-qualifier_opt IDENTIFIER
+module-name-qualifier := IDENTIFIER .
+module-name-qualifier := module-name-qualifier IDENTIFIER .
+export-declaration := EXPORT declaration
+export-declaration := EXPORT ( declaration-seq_opt )
+export-declaration := export-keyword module-import-declaration
+module-import-declaration := import-keyword module-name
+module-import-declaration := import-keyword module-partition
+# FIXME: we don't have header-name in the grammar. Handle these in PP?
+# module-import-declaration := import-keyword header-name
+global-module-fragment := module-keyword ; declaration-seq_opt
+private-module-fragment := module-keyword : PRIVATE ; declaration-seq_opt
+
+# gram.class
+class-specifier := class-head { member-specification_opt }
+class-head := class-key class-head-name class-virt-specifier_opt base-clause_opt
+class-head := class-key base-clause_opt
+class-head-name := nested-name-specifier_opt class-name
+class-virt-specifier := contextual-final
+class-key := CLASS
+class-key := STRUCT
+class-key := UNION
+member-specification := member-declaration member-specification_opt
+member-specification := access-specifier : member-declaration member-specification_opt
+member-declaration := decl-specifier-seq_opt member-declarator-list_opt ;
+member-declaration := function-definition
+member-declaration := using-declaration
+member-declaration := using-enum-declaration
+member-declaration := static_assert-declaration
+member-declaration := template-declaration
+member-declaration := explicit-specialization
+member-declaration := deduction-guide
+member-declaration := alias-declaration
+member-declaration := opaque-enum-declaration
+member-declaration := empty-declaration
+member-declarator-list := member-declarator
+member-declarator-list := member-declarator-list , member-declarator
+member-declarator := declarator virt-specifier-seq_opt pure-specifier_opt
+member-declarator := declarator requires-clause
+member-declarator := declarator brace-or-equal-initializer
+member-declarator := IDENTIFIER_opt : constant-expression brace-or-equal-initializer_opt
+virt-specifier-seq := virt-specifier
+virt-specifier-seq := virt-specifier-seq virt-specifier
+virt-specifier := contextual-override
+virt-specifier := contextual-final
+pure-specifier := = contextual-zero
+conversion-function-id := OPERATOR conversion-type-id
+conversion-type-id := type-specifier-seq conversion-declarator_opt
+conversion-declarator := ptr-operator conversion-declarator_opt
+base-clause := : base-specifier-list
+base-specifier-list := base-specifier ..._opt
+base-specifier-list := base-specifier-list , base-specifier ..._opt
+base-specifier := class-or-decltype
+base-specifier := VIRTUAL access-specifier_opt class-or-decltype
+base-specifier := access-specifier VIRTUAL_opt class-or-decltype
+class-or-decltype := nested-name-specifier_opt type-name
+class-or-decltype := nested-name-specifier TEMPLATE simple-template-id
+class-or-decltype := decltype-specifier
+access-specifier := PRIVATE
+access-specifier := PROTECTED
+access-specifier := PUBLIC
+ctor-initializer := : mem-initializer-list
+mem-initializer-list := mem-initializer ..._opt
+mem-initializer-list := mem-initializer-list , mem-initializer ..._opt
+mem-initializer := mem-initializer-id ( expression-list_opt )
+mem-initializer := mem-initializer-id braced-init-list
+mem-initializer-id := class-or-decltype
+mem-initializer-id := IDENTIFIER
+
+# gram.over
+operator-function-id := OPERATOR operator-name
+operator-name := NEW
+operator-name := DELETE
+operator-name := NEW [ ]
+operator-name := DELETE [ ]
+operator-name := CO_AWAIT
+operator-name := ( )
+operator-name := [ ]
+operator-name := ->
+operator-name := ->*
+operator-name := ~
+operator-name := !
+operator-name := +
+operator-name := -
+operator-name := *
+operator-name := /
+operator-name := %
+operator-name := ^
+operator-name := &
+operator-name := |
+operator-name := =
+operator-name := +=
+operator-name := -=
+operator-name := *=
+operator-name := /=
+operator-name := %=
+operator-name := ^=
+operator-name := &=
+operator-name := |=
+operator-name := ==
+operator-name := !=
+operator-name := <
+operator-name := >
+operator-name := <=
+operator-name := >=
+operator-name := <=>
+operator-name := ^^
+operator-name := ||
+operator-name := <<
+operator-name := >>
+operator-name := <<=
+operator-name := >>=
+operator-name := ++
+operator-name := --
+operator-name := ,
+literal-operator-id := OPERATOR string-literal IDENTIFIER
+literal-operator-id := OPERATOR user-defined-string-literal
+
+# gram.temp
+template-declaration := template-head declaration
+template-declaration := template-head concept-definition
+template-head := TEMPLATE < template-parameter-list > requires-clause_opt
+template-parameter-list := template-parameter
+template-parameter-list := template-parameter-list , template-parameter
+requires-clause := REQUIRES constraint-logical-or-expression
+constraint-logical-or-expression := constraint-logical-and-expression
+constraint-logical-or-expression := constraint-logical-or-expression || constraint-logical-and-expression
+constraint-logical-and-expression := primary-expression
+constraint-logical-and-expression := constraint-logical-and-expression && primary-expression
+template-parameter := type-parameter
+template-parameter := parameter-declaration
+type-parameter := type-parameter-key ..._opt IDENTIFIER
+type-parameter := type-parameter-key IDENTIFIER_opt = type-id
+type-parameter := type-constraint ..._opt IDENTIFIER_opt
+type-parameter := type-constraint IDENTIFIER_opt = type-id
+type-parameter := template-head type-parameter-key ..._opt IDENTIFIER_opt
+type-parameter := template-head type-parameter-key IDENTIFIER_opt = id-expression
+type-parameter-key := CLASS
+type-parameter-key := TYPENAME
+type-constraint := nested-name-specifier_opt concept-name
+type-constraint := nested-name-specifier_opt concept-name < template-argument-list_opt >
+simple-template-id := template-name < template-argument-list_opt >
+template-id := simple-template-id
+template-id := operator-function-id < template-argument-list_opt >
+template-id := literal-operator-id < template-argument-list_opt >
+template-argument-list := template-argument ..._opt
+template-argument-list := template-argument-list , template-argument ..._opt
+template-argument := constant-expression
+template-argument := type-id
+template-argument := id-expression
+constraint-expression := logical-or-expression
+deduction-guide := explicit-specifier_opt template-name ( parameter-declaration-list_opt ) -> simple-template-id ;
+concept-definition := CONCEPT concept-name = constraint-expression ;
+concept-name := IDENTIFIER
+typename-specifier := TYPENAME nested-name-specifier IDENTIFIER
+typename-specifier := TYPENAME nested-name-specifier TEMPLATE_opt simple-template-id
+explicit-instantiation := EXTERN_opt TEMPLATE declaration
+explicit-specialization := TEMPLATE < > declaration
+
+# gram.except
+try-block := TRY compound-statement handler-seq
+function-try-block := TRY ctor-initializer_opt compound-statement handler-seq
+handler-seq := handler handler-seq_opt
+handler := CATCH ( exception-declaration ) compound-statement
+exception-declaration := type-specifier-seq declarator
+exception-declaration := type-specifier-seq abstract-declarator_opt
+noexcept-specifier := NOEXCEPT ( constant-expression )
+noexcept-specifier := NOEXCEPT
+
+# gram.cpp
+identifier-list := IDENTIFIER
+identifier-list := identifier-list , IDENTIFIER
+
+# gram.lex
+#! As we use clang lexer, most of lexical symbols are not needed, we only add
+#! needed literals.
+literal := integer-literal
+literal := character-literal
+literal := floating-point-literal
+literal := string-literal
+literal := boolean-literal
+literal := pointer-literal
+literal := user-defined-literal
+integer-literal := NUMERIC_CONSTANT
+character-literal := CHAR_CONSTANT
+character-literal := WIDE_CHAR_CONSTANT
+character-literal := UTF8_CHAR_CONSTANT
+character-literal := UTF16_CHAR_CONSTANT
+character-literal := UTF32_CHAR_CONSTANT
+floating-point-literal := NUMERIC_CONSTANT
+string-literal-chunk := STRING_LITERAL
+string-literal-chunk := WIDE_STRING_LITERAL
+string-literal-chunk := UTF8_STRING_LITERAL
+string-literal-chunk := UTF16_STRING_LITERAL
+string-literal-chunk := UTF32_STRING_LITERAL
+#! Technically, string concatenation happens at phase 6 which is before parsing,
+#! so it doesn't belong to the grammar. However, we extend the grammar to
+#! support it, to make the pseudo parser fully functional on practical code.
+string-literal := string-literal-chunk
+string-literal := string-literal string-literal-chunk
+user-defined-literal := user-defined-integer-literal
+user-defined-literal := user-defined-floating-point-literal
+user-defined-literal := user-defined-string-literal
+user-defined-literal := user-defined-character-literal
+user-defined-integer-literal := NUMERIC_CONSTANT
+user-defined-string-literal-chunk := STRING_LITERAL
+user-defined-string-literal-chunk := WIDE_STRING_LITERAL
+user-defined-string-literal-chunk := UTF8_STRING_LITERAL
+user-defined-string-literal-chunk := UTF16_STRING_LITERAL
+user-defined-string-literal-chunk := UTF32_STRING_LITERAL
+user-defined-string-literal := user-defined-string-literal-chunk
+user-defined-string-literal := string-literal-chunk user-defined-string-literal
+user-defined-string-literal := user-defined-string-literal string-literal-chunk
+user-defined-floating-point-literal := NUMERIC_CONSTANT
+user-defined-character-literal := CHAR_CONSTANT
+user-defined-character-literal := WIDE_CHAR_CONSTANT
+user-defined-character-literal := UTF8_CHAR_CONSTANT
+user-defined-character-literal := UTF16_CHAR_CONSTANT
+user-defined-character-literal := UTF32_CHAR_CONSTANT
+boolean-literal := FALSE
+boolean-literal := TRUE
+pointer-literal := NULLPTR
+
+#! Contextual keywords -- clang lexer always lexes them as identifier tokens.
+#! Placeholders for literal text in the grammar that lex as other things.
+contextual-override := IDENTIFIER
+contextual-final := IDENTIFIER
+contextual-zero := NUMERIC_CONSTANT
+module-keyword := IDENTIFIER
+import-keyword := IDENTIFIER
+export-keyword := IDENTIFIER

diff  --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt
index 07de12b7c190b..05e2d3901613a 100644
--- a/clang/test/CMakeLists.txt
+++ b/clang/test/CMakeLists.txt
@@ -76,6 +76,7 @@ list(APPEND CLANG_TEST_DEPS
   clang-repl
   clang-
diff 
   clang-scan-deps
+  clang-pseudo
   diagtool
   hmaptool
   )

diff  --git a/clang/test/Syntax/check-cxx-bnf.test b/clang/test/Syntax/check-cxx-bnf.test
new file mode 100644
index 0000000000000..fcc0fa6a1ecc7
--- /dev/null
+++ b/clang/test/Syntax/check-cxx-bnf.test
@@ -0,0 +1,2 @@
+// verify clang/lib/Tooling/Syntax/Pseudo/cxx.bnf
+// RUN: clang-pseudo -check-grammar=%cxx-bnf-file

diff  --git a/clang/test/Syntax/lit.local.cfg b/clang/test/Syntax/lit.local.cfg
new file mode 100644
index 0000000000000..5a2e1cd0efd03
--- /dev/null
+++ b/clang/test/Syntax/lit.local.cfg
@@ -0,0 +1,4 @@
+cxx_bnf_file = os.path.join(config.clang_src_dir, 'lib', 'Tooling', 'Syntax',
+                            'Pseudo', 'cxx.bnf')
+config.substitutions.append(('%cxx-bnf-file',
+                             '%s' % (cxx_bnf_file)))

diff  --git a/clang/tools/CMakeLists.txt b/clang/tools/CMakeLists.txt
index b071a776b32ae..933b428d8ca62 100644
--- a/clang/tools/CMakeLists.txt
+++ b/clang/tools/CMakeLists.txt
@@ -14,6 +14,7 @@ add_clang_subdirectory(clang-offload-bundler)
 add_clang_subdirectory(clang-offload-wrapper)
 add_clang_subdirectory(clang-scan-deps)
 add_clang_subdirectory(clang-repl)
+add_clang_subdirectory(clang-pseudo)
 
 add_clang_subdirectory(c-index-test)
 

diff  --git a/clang/tools/clang-pseudo/CMakeLists.txt b/clang/tools/clang-pseudo/CMakeLists.txt
new file mode 100644
index 0000000000000..c03c8c62d4cd5
--- /dev/null
+++ b/clang/tools/clang-pseudo/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(LLVM_LINK_COMPONENTS support)
+
+add_clang_tool(clang-pseudo
+  ClangPseudo.cpp
+  )
+
+set(CLANG_PSEUDO_LIB_DEPS
+  clangBasic
+  clangToolingSyntaxPseudo
+  )
+
+clang_target_link_libraries(clang-pseudo
+  PRIVATE
+  ${CLANG_PSEUDO_LIB_DEPS}
+  )

diff  --git a/clang/tools/clang-pseudo/ClangPseudo.cpp b/clang/tools/clang-pseudo/ClangPseudo.cpp
new file mode 100644
index 0000000000000..6fb8f58fa016c
--- /dev/null
+++ b/clang/tools/clang-pseudo/ClangPseudo.cpp
@@ -0,0 +1,47 @@
+//===-- ClangPseudo.cpp - Clang pseudo parser tool ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Syntax/Pseudo/Grammar.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using clang::syntax::pseudo::Grammar;
+using llvm::cl::desc;
+using llvm::cl::init;
+using llvm::cl::opt;
+
+static opt<std::string>
+    CheckGrammar("check-grammar", desc("Parse and check a BNF grammar file."),
+                 init(""));
+
+int main(int argc, char *argv[]) {
+  llvm::cl::ParseCommandLineOptions(argc, argv, "");
+
+  if (CheckGrammar.getNumOccurrences()) {
+    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
+        llvm::MemoryBuffer::getFile(CheckGrammar);
+    if (std::error_code EC = Text.getError()) {
+      llvm::errs() << "Error: can't read grammar file '" << CheckGrammar
+                   << "': " << EC.message() << "\n";
+      return 1;
+    }
+    std::vector<std::string> Diags;
+    auto RSpecs = Grammar::parseBNF(Text.get()->getBuffer(), Diags);
+
+    if (!Diags.empty()) {
+      llvm::errs() << llvm::join(Diags, "\n");
+      return 2;
+    }
+    llvm::errs() << llvm::formatv("grammar file {0} is parsed successfully\n",
+                                  CheckGrammar);
+    return 0;
+  }
+  return 0;
+}


        


More information about the cfe-commits mailing list