[Mlir-commits] [mlir] 9a4472c - [mlir] Add basic tree-sitter grammar file

Jacques Pienaar llvmlistbot at llvm.org
Thu Apr 28 11:42:52 PDT 2022


Author: Jacques Pienaar
Date: 2022-04-28T11:42:46-07:00
New Revision: 9a4472c56cbb4ea15be3e9a537e4ebdfc200a52c

URL: https://github.com/llvm/llvm-project/commit/9a4472c56cbb4ea15be3e9a537e4ebdfc200a52c
DIFF: https://github.com/llvm/llvm-project/commit/9a4472c56cbb4ea15be3e9a537e4ebdfc200a52c.diff

LOG: [mlir] Add basic tree-sitter grammar file

tree-sitter grammar file that tries to closely matches LangRef (it could use
some tweaking and cleanup, but kept fairly basic). Also updated LangRef in
places where found some issues while doing the nearly direct transcription.

This only adds a grammar file, not all the other parts (npm etc) that
accompanies it. Those I'll propose for separate repo like we do for vscode
extension.

Reviewed By: rriddle

Differential Revision: https://reviews.llvm.org/D124352

Added: 
    mlir/utils/tree-sitter-mlir/README.md
    mlir/utils/tree-sitter-mlir/grammar.js

Modified: 
    mlir/docs/LangRef.md

Removed: 
    


################################################################################
diff  --git a/mlir/docs/LangRef.md b/mlir/docs/LangRef.md
index 597612126af4b..ab2e281700a90 100644
--- a/mlir/docs/LangRef.md
+++ b/mlir/docs/LangRef.md
@@ -201,6 +201,7 @@ Syntax:
 bare-id ::= (letter|[_]) (letter|digit|[_$.])*
 bare-id-list ::= bare-id (`,` bare-id)*
 value-id ::= `%` suffix-id
+alias-name :: = bare-id
 suffix-id ::= (digit+ | ((letter|id-punct) (letter|id-punct|digit)*))
 
 symbol-ref-id ::= `@` (suffix-id | string-literal) (`::` symbol-ref-id)?
@@ -295,7 +296,7 @@ custom-operation     ::= bare-id custom-operation-format
 op-result-list       ::= op-result (`,` op-result)* `=`
 op-result            ::= value-id (`:` integer-literal)
 successor-list       ::= `[` successor (`,` successor)* `]`
-successor            ::= caret-id (`:` bb-arg-list)?
+successor            ::= caret-id (`:` block-arg-list)?
 region-list          ::= `(` region (`,` region)* `)`
 dictionary-attribute ::= `{` (attribute-entry (`,` attribute-entry)*)? `}`
 trailing-location    ::= (`loc` `(` location `)`)?
@@ -645,9 +646,12 @@ type-list-parens ::= `(` `)`
 
 // This is a common way to refer to a value with a specified type.
 ssa-use-and-type ::= ssa-use `:` type
+ssa-use ::= value-use
 
 // Non-empty list of names and types.
 ssa-use-and-type-list ::= ssa-use-and-type (`,` ssa-use-and-type)*
+
+function-type ::= (type | type-list-parens) `->` (type | type-list-parens)
 ```
 
 ### Type Aliases
@@ -693,10 +697,9 @@ pretty-dialect-item-contents ::= pretty-dialect-item-body
                               | '(' pretty-dialect-item-contents+ ')'
                               | '[' pretty-dialect-item-contents+ ']'
                               | '{' pretty-dialect-item-contents+ '}'
-                              | '[^[<({>\])}\0]+'
+                              | '[^\[<({\]>)}\0]+'
 
-dialect-type ::= '!' opaque-dialect-item
-dialect-type ::= '!' pretty-dialect-item
+dialect-type ::= '!' (opaque-dialect-item | pretty-dialect-item)
 ```
 
 Dialect types can be specified in a verbose form, e.g. like this:

diff  --git a/mlir/utils/tree-sitter-mlir/README.md b/mlir/utils/tree-sitter-mlir/README.md
new file mode 100644
index 0000000000000..bde8c36f9a57b
--- /dev/null
+++ b/mlir/utils/tree-sitter-mlir/README.md
@@ -0,0 +1,8 @@
+tree-sitter-mlir
+================
+
+Basic [tree-sitter](https://github.com/tree-sitter/tree-sitter) grammar for
+MLIR following the [lang-ref](https://mlir.llvm.org/docs/LangRef/).
+
+Note: the directory in [LLVM repo](https://github.com/llvm/llvm-project/)
+merely contains the grammar file(s) and not the NPM/generated code.

diff  --git a/mlir/utils/tree-sitter-mlir/grammar.js b/mlir/utils/tree-sitter-mlir/grammar.js
new file mode 100644
index 0000000000000..254af1d1caf3a
--- /dev/null
+++ b/mlir/utils/tree-sitter-mlir/grammar.js
@@ -0,0 +1,261 @@
+module.exports = grammar({
+  name : 'mlir',
+  extras : $ => [/\s/,
+                 $.comment,
+],
+  conflicts : $ => [],
+  rules : {
+    // Top level production:
+    //   (operation | attribute-alias-def | type-alias-def)
+    toplevel : $ => seq(choice(
+                 $.operation,
+                 $.attribute_alias_def,
+                 $.type_alias_def,
+                 )),
+
+    // Common syntax (lang-ref)
+    //  digit     ::= [0-9]
+    //  hex_digit ::= [0-9a-fA-F]
+    //  letter    ::= [a-zA-Z]
+    //  id-punct  ::= [$._-]
+    //
+    //  integer-literal ::= decimal-literal | hexadecimal-literal
+    //  decimal-literal ::= digit+
+    //  hexadecimal-literal ::= `0x` hex_digit+
+    //  float-literal ::= [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
+    //  string-literal  ::= `"` [^"\n\f\v\r]* `"`   TODO: define escaping rules
+    //
+    _digit : $ => /[0-9]/,
+    _hex_digit : $ => /[0-9a-fA-F]/,
+    integer_literal : $ => choice($._decimal_literal, $._hexadecimal_literal),
+    _decimal_literal : $ => repeat1($._digit),
+    _hexadecimal_literal : $ => seq('0x', repeat1($._hex_digit)),
+    float_literal : $ => token(
+                      seq(optional(/[-+]/), repeat1(/[0_9]/),
+                          optional(seq('.', repeat(/[0-9]/),
+                                       optional(seq(/[eE]/, optional(/[-+]/),
+                                                    repeat1(/[0-9]/))))))),
+    string_literal : $ => seq(
+                       '"',
+                       repeat(token.immediate(prec(1, /[^\\"\n\f\v\r]+/))),
+                       '"',
+                       ),
+
+    // Identifiers
+    //   bare-id ::= (letter|[_]) (letter|digit|[_$.])*
+    //   bare-id-list ::= bare-id (`,` bare-id)*
+    //   value-id ::= `%` suffix-id
+    //   suffix-id ::= (digit+ | ((letter|id-punct) (letter|id-punct|digit)*))
+    //   alias-name :: = bare-id
+    //
+    //   symbol-ref-id ::= `@` (suffix-id | string-literal) (`::`
+    //   symbol-ref-id)?
+    //   value-id-list ::= value-id (`,` value-id)*
+    //
+    //   // Uses of value, e.g. in an operand list to an operation.
+    //   value-use ::= value-id
+    //   value-use-list ::= value-use (`,` value-use)*
+    bare_id : $ => seq(token(/[a-zA-Z_]/),
+                       token.immediate(repeat(/[a-zA-Z0-9_$]/))),
+    bare_id_list : $ => seq($.bare_id, repeat(seq(',', $.bare_id))),
+    value_id : $ => seq('%', $._suffix_id),
+    alias_name : $ => $.bare_id,
+    _suffix_id : $ => choice(repeat1(/[0-9]/),
+                             seq(/[a-zA-Z_$.]/, repeat(/[a-zA-Z0-9_$.]/))),
+    symbol_ref_id : $ => seq('@', choice($._suffix_id, $.string_literal),
+                             optional(seq('::', $.symbol_ref_id))),
+    value_use : $ => $.value_id,
+    value_use_list : $ => seq($.value_use, repeat(seq(',', $.value_use))),
+
+    // Operations
+    //   operation            ::= op-result-list? (generic-operation |
+    //                            custom-operation)
+    //                            trailing-location?
+    //   generic-operation    ::= string-literal `(` value-use-list? `)`
+    //   successor-list?
+    //                            region-list? dictionary-attribute? `:`
+    //                            function-type
+    //   custom-operation     ::= bare-id custom-operation-format
+    //   op-result-list       ::= op-result (`,` op-result)* `=`
+    //   op-result            ::= value-id (`:` integer-literal)
+    //   successor-list       ::= `[` successor (`,` successor)* `]`
+    //   successor            ::= caret-id (`:` bb-arg-list)?
+    //   region-list          ::= `(` region (`,` region)* `)`
+    //   dictionary-attribute ::= `{` (attribute-entry (`,` attribute-entry)*)?
+    //                            `}`
+    //   trailing-location    ::= (`loc` `(` location `)`)?
+    operation : $ => seq(optional($.op_result_list),
+                         choice($.generic_operation, $.custom_operation),
+                         optional($.trailing_location)),
+    generic_operation : $ =>
+                          seq($.string_literal, '(', optional($.value_use_list),
+                              ')', optional($.successor_list),
+                              optional($.region_list),
+                              optional($.dictionary_attribute), ':',
+                              $.function_type),
+    // custom-operation rule is defined later in the grammar, post the generic.
+    op_result_list : $ => seq($.op_result, repeat(seq(',', $.op_result)), '='),
+    op_result : $ => seq($.value_id, optional(seq(':', $.integer_literal))),
+    successor_list : $ => seq('[', $.successor, repeat(seq(',', $.successor)),
+                              ']'),
+    successor : $ => seq($.caret_id, optional(seq(':', $.block_arg_list))),
+    region_list : $ => seq('(', $.region, repeat(seq(',', $.region)), ')'),
+    dictionary_attribute : $ => seq(
+                             '{',
+                             optional(seq($.attribute_entry,
+                                          repeat(seq(',', $.attribute_entry)))),
+                             '}'),
+    trailing_location : $ => seq('loc(', $.location, ')'),
+    // TODO: Complete location forms.
+    location : $ => $.string_literal,
+
+    // Blocks
+    //   block           ::= block-label operation+
+    //   block-label     ::= block-id block-arg-list? `:`
+    //   block-id        ::= caret-id
+    //   caret-id        ::= `^` suffix-id
+    //   value-id-and-type ::= value-id `:` type
+    //
+    //   // Non-empty list of names and types.
+    //   value-id-and-type-list ::= value-id-and-type (`,` value-id-and-type)*
+    //
+    //   block-arg-list ::= `(` value-id-and-type-list? `)`
+    block : $ => seq($.block_label, repeat1($.operation)),
+    block_label : $ => seq($._block_id, optional($.block_arg_list), ':'),
+    _block_id : $ => $.caret_id,
+    caret_id : $ => seq('^', $._suffix_id),
+    value_id_and_type : $ => seq($.value_id, ':', $.type),
+    value_id_and_type_list : $ => seq($.value_id_and_type,
+                                      repeat(seq(',', $.value_id_and_type))),
+    block_arg_list : $ => seq('(', optional($.value_id_and_type_list), ')'),
+
+    // Regions
+    //   region      ::= `{` entry-block? block* `}`
+    //   entry-block ::= operation+
+    region : $ => seq('{', optional($.entry_block), repeat($.block), '}'),
+    entry_block : $ => repeat1($.operation),
+
+    // Types
+    //   type ::= type-alias | dialect-type | builtin-type
+    //
+    //   type-list-no-parens ::=  type (`,` type)*
+    //   type-list-parens ::= `(` type-list-no-parens? `)`
+    //
+    //   // This is a common way to refer to a value with a specified type.
+    //   ssa-use-and-type ::= ssa-use `:` type
+    //   ssa-use ::= value-use
+    //
+    //   // Non-empty list of names and types.
+    //   ssa-use-and-type-list ::= ssa-use-and-type (`,` ssa-use-and-type)*
+    //
+    //   function-type ::= (type | type-list-parens) `->` (type |
+    //   type-list-parens)
+    type : $ => choice($.type_alias, $.dialect_type, $.builtin_type),
+    type_list_no_parens : $ => seq($.type, repeat(seq(',', $.type))),
+    type_list_parens : $ => seq('(', optional($.type_list_no_parens), ')'),
+    ssa_use_and_type : $ => seq($.ssa_use, ':', $.type),
+    ssa_use : $ => $.value_use,
+    ssa_use_and_type_list : $ => seq($.ssa_use_and_type,
+                                     repeat(seq(',', $.ssa_use_and_type))),
+    function_type : $ => seq(choice($.type, $.type_list_parens), '->',
+                             choice($.type, $.type_list_parens)),
+
+    // Type aliases
+    //   type-alias-def ::= '!' alias-name '=' 'type' type
+    //   type-alias ::= '!' alias-name
+    type_alias_def : $ => seq('!', $.alias_name, '=', 'type', $.type),
+    type_alias : $ => seq('!', $.alias_name),
+
+    // Dialect Types
+    //   dialect-namespace ::= bare-id
+    //
+    //   opaque-dialect-item ::= dialect-namespace '<' string-literal '>'
+    //
+    //   pretty-dialect-item ::= dialect-namespace '.'
+    //   pretty-dialect-item-lead-ident
+    //                                                 pretty-dialect-item-body?
+    //
+    //   pretty-dialect-item-lead-ident ::= '[A-Za-z][A-Za-z0-9._]*'
+    //   pretty-dialect-item-body ::= '<' pretty-dialect-item-contents+ '>'
+    //   pretty-dialect-item-contents ::= pretty-dialect-item-body
+    //                                 | '(' pretty-dialect-item-contents+ ')'
+    //                                 | '[' pretty-dialect-item-contents+ ']'
+    //                                 | '{' pretty-dialect-item-contents+ '}'
+    //                                 | '[^[<({>\])}\0]+'
+    //
+    //   dialect-type ::= '!' (opaque-dialect-item | pretty-dialect-item)
+    dialect_type : $ => seq(
+                     '!', choice($.opaque_dialect_item, $.pretty_dialect_item)),
+    dialect_namespace : $ => $.bare_id,
+    opaque_dialect_item : $ => seq($.dialect_namespace, '<', $.string_literal,
+                                   '>'),
+    pretty_dialect_item : $ => seq($.dialect_namespace, '.',
+                                   $.pretty_dialect_item_lead_ident,
+                                   optional($.pretty_dialect_item_body)),
+    pretty_dialect_item_lead_ident : $ => $.bare_id,
+    pretty_dialect_item_body : $ => seq('<',
+                                        repeat1($.pretty_dialect_item_contents),
+                                        '>'),
+    // TODO: not sure why prec.left (setting left-associated parsing) needed
+    // here,
+    // left-associated way avoids an ambiguity flagged by generator. It may not
+    // be needed and be only papering over an issue.
+    pretty_dialect_item_contents : $ => prec.left(choice(
+                                     $.pretty_dialect_item_body,
+                                     seq('(',
+                                         repeat1(
+                                             $.pretty_dialect_item_contents),
+                                         ')'),
+                                     seq('[',
+                                         repeat1(
+                                             $.pretty_dialect_item_contents),
+                                         ']'),
+                                     seq('{',
+                                         repeat1(
+                                             $.pretty_dialect_item_contents),
+                                         '}'),
+                                     repeat1(/[^\[<({>\])}\\0]/))),
+    dialect_type : $ => seq(
+                     '!', choice($.opaque_dialect_item, $.pretty_dialect_item)),
+
+    // Builtin types
+    builtin_type : $ => choice(
+                     // TODO: Add builtin types
+                     seq('i', repeat1(/[0-9]/))),
+
+    // Attributes
+    //   attribute-entry ::= (bare-id | string-literal) `=` attribute-value
+    //   attribute-value ::= attribute-alias | dialect-attribute |
+    //   builtin-attribute
+    attribute_entry : $ => seq(choice($.bare_id, $.string_literal), '=',
+                               $.attribute_value),
+    attribute_value : $ => choice($.attribute_alias, $.dialect_attribute,
+                                  $.builtin_attribute),
+
+    // Attribute Value Aliases
+    //   attribute-alias-def ::= '#' alias-name '=' attribute-value
+    //   attribute-alias ::= '#' alias-name
+    attribute_alias_def : $ => seq('#', $.alias_name, '=', $.attribute_value),
+    attribute_alias : $ => seq('#', $.alias_name),
+
+    // Dialect Attribute Values
+    dialect_attribute : $ => seq('#', choice($.opaque_dialect_item,
+                                             $.pretty_dialect_item)),
+
+    // Builtin Attribute Values
+    builtin_attribute : $ => choice(
+                          // TODO
+                          $.function_type,
+                          $.string_literal,
+                          ),
+
+    // Comment (standard BCPL)
+    comment : $ => token(seq('//', /.*/)),
+
+    custom_operation : $ => choice(
+                         // TODO: Just basic/incomplete instance.
+                         seq('func', field('name', $.symbol_ref_id),
+                             $.block_arg_list, '->', $.type, $.region),
+                         ),
+  }
+});


        


More information about the Mlir-commits mailing list