[Mlir-commits] [mlir] [MLIR][Pygments] Refine the pygments MLIR lexer (PR #166406)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Fri Nov 7 02:39:46 PST 2025
https://github.com/PragmaTwice updated https://github.com/llvm/llvm-project/pull/166406
>From 94821d0f1afc07d853abcdb211729661924b04b9 Mon Sep 17 00:00:00 2001
From: PragmaTwice <twice at apache.org>
Date: Wed, 5 Nov 2025 01:08:09 +0800
Subject: [PATCH 1/6] [MLIR][Pygments] Refine the pygments MLIR lexer
---
mlir/utils/pygments/mlir_lexer.py | 136 +++++++++++++++++++++++++-----
1 file changed, 114 insertions(+), 22 deletions(-)
diff --git a/mlir/utils/pygments/mlir_lexer.py b/mlir/utils/pygments/mlir_lexer.py
index 179a058e9110c..ebe29e083387c 100644
--- a/mlir/utils/pygments/mlir_lexer.py
+++ b/mlir/utils/pygments/mlir_lexer.py
@@ -2,37 +2,129 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-from pygments.lexer import RegexLexer
+from pygments.lexer import RegexLexer, bygroups, include, combined
from pygments.token import *
+import re
class MlirLexer(RegexLexer):
+ """Pygments lexer for MLIR.
+
+ This lexer focuses on accurate tokenization of common MLIR constructs:
+ - SSA values (%%... / %...)
+ - attribute and type aliases (#name =, !name =)
+ - types (builtin and dialect types, parametric types)
+ - attribute dictionaries and nested containers to a reasonable depth
+ - numbers (ints, floats with exponents, hex)
+ - strings with common escapes
+ - line comments (// ...)
+ - block labels (^foo) and operations
+ """
+
name = "MLIR"
aliases = ["mlir"]
filenames = ["*.mlir"]
+ flags = re.MULTILINE
+
tokens = {
"root": [
- (r"%[a-zA-Z0-9_]+", Name.Variable),
- (r"@[a-zA-Z_][a-zA-Z0-9_]+", Name.Function),
- (r"\^[a-zA-Z0-9_]+", Name.Label),
- (r"#[a-zA-Z0-9_]+", Name.Constant),
- (r"![a-zA-Z0-9_]+", Keyword.Type),
- (r"[a-zA-Z_][a-zA-Z0-9_]*\.", Name.Entity),
- (r"memref[^.]", Keyword.Type),
- (r"index", Keyword.Type),
- (r"i[0-9]+", Keyword.Type),
- (r"f[0-9]+", Keyword.Type),
+ # Comments
+ (r"//.*?$", Comment.Single),
+
+ # Attribute alias definition: #name =
+ (r"^\s*(#[_A-Za-z0-9\$\-\.]+)(\b)(\s*=)",
+ bygroups(Name.Constant, Text, Operator)),
+
+ # Type alias definition: !name =
+ (r"^\s*(![_A-Za-z0-9\$\-\.]+)(\b)(\s*=)",
+ bygroups(Keyword.Type, Text, Operator)),
+
+ # SSA values (results, uses) - allow many characters MLIR uses
+ (r"%[%_A-Za-z0-9\.\$:\-]+", Name.Variable),
+
+ # attribute refs, constants and named attributes
+ (r"#[_A-Za-z0-9\$\-\.]+\b", Name.Constant),
+
+ # symbol refs / function-like names
+ (r"@[_A-Za-z][_A-Za-z0-9\$\-\.]*\b", Name.Function),
+
+ # blocks
+ (r"\^[A-Za-z0-9_\$\.\-]+", Name.Label),
+
+ # types by exclamation or builtin names
+ (r"![_A-Za-z0-9\$\-\.]+\b", Keyword.Type),
+ (r"\b(bf16|f16|f32|f64|f80|f128|index|none|(u|s)?i[0-9]+)\b", Keyword.Type),
+
+ # container-like dialect types (tensor<...>, memref<...>, vector<...>)
+ (r"\b(complex|memref|tensor|tuple|vector)\s*(<)", bygroups(Keyword.Type, Punctuation), 'angled-type'),
+
+ # affine constructs
+ (r"\b(affine_map|affine_set)\b", Keyword.Reserved),
+
+ # common builtin operators / functions inside affine_map
+ (r"\b(ceildiv|floordiv|mod|symbol)\b", Name.Builtin),
+
+ # operation definitions with assignment: %... = op.name
+ (r"^\s*(%[\%_A-Za-z0-9\:\,\s]+)\s*(=)\s*([A-Za-z0-9_\.\$\-]+)\b",
+ bygroups(Name.Variable, Operator, Name.Function)),
+
+ # operation name without result
+ (r"^\s*([A-Za-z0-9_\.\$\-]+)\b(?=[^<:])", Name.Function),
+
+ # identifiers / bare words
+ (r"\b[_A-Za-z][_A-Za-z0-9\.-]*\b", Name.Other),
+
+ # numbers: hex, float (with exponent), integer
+ (r"\b0x[0-9A-Fa-f]+\b", Number.Hex),
+ (r"\b([0-9]+(\.[0-9]*)?|\.[0-9]+)([eE][+-]?[0-9]+)?\b", Number.Float),
+ (r"\b[0-9]+\b", Number.Integer),
+
+ # strings
+ (r'"', String.Double, 'string'),
+
+ # punctuation and arrow-like tokens
+ (r"->|>=|<=|\>=|\<=|\->|\=>", Operator),
+ (r"[()\[\]{}<>,.:=]", Punctuation),
+
+ # operators
+ (r"[-+*/%]", Operator),
+ ],
+
+ # string state with common escapes
+ 'string': [
+ (r'\\[ntr"\\]', String.Escape),
+ (r'[^"\\]+', String.Double),
+ (r'"', String.Double, '#pop'),
+ ],
+
+ # angled-type content (simple nested handling)
+ 'angled-type': [
+ # match nested '<' and '>'
+ (r"<", Punctuation, '#push'),
+ (r">", Punctuation, '#pop'),
+
+ # dimensions like 3x or 3x3x... and standalone numbers:
+ # - match numbers that are followed by an 'x' (dimension separator)
+ (r"([0-9]+)(?=(?:[xX]))", Number.Integer),
+ # - match bare numbers (sizes)
(r"[0-9]+", Number.Integer),
- (r"[0-9]*\.[0-9]*", Number.Float),
- (r'"[^"]*"', String.Double),
- (r"affine_map", Keyword.Reserved),
- # TODO: this should be within affine maps only
- (r"\+-\*\/", Operator),
- (r"floordiv", Operator.Word),
- (r"ceildiv", Operator.Word),
- (r"mod", Operator.Word),
- (r"()\[\]<>,{}", Punctuation),
- (r"\/\/.*\n", Comment.Single),
- ]
+ # dynamic dimension '?'
+ (r"\?", Name.Constant),
+
+ # the 'x' dimension separator (treat as punctuation)
+ (r"[xX]", Punctuation),
+
+ # element / builtin types inside angle brackets (no word-boundary)
+ (r"(?:bf16|f16|f32|f64|f80|f128|index|none|(?:[us]?i[0-9]+))",
+ Keyword.Type),
+
+ # also allow nested container-like types to be recognized
+ (r"\b(complex|memref|tensor|tuple|vector)\s*(<)",
+ bygroups(Keyword.Type, Punctuation), 'angled-type'),
+
+ # fall back to root rules for anything else
+ include('root'),
+ ],
+
}
>From 72d7512400a63d3f680a87c1f2fc8792a7d1d8ba Mon Sep 17 00:00:00 2001
From: PragmaTwice <twice at apache.org>
Date: Wed, 5 Nov 2025 01:22:47 +0800
Subject: [PATCH 2/6] format
---
mlir/utils/pygments/mlir_lexer.py | 76 ++++++++++++++-----------------
1 file changed, 33 insertions(+), 43 deletions(-)
diff --git a/mlir/utils/pygments/mlir_lexer.py b/mlir/utils/pygments/mlir_lexer.py
index ebe29e083387c..ba0811ef2601b 100644
--- a/mlir/utils/pygments/mlir_lexer.py
+++ b/mlir/utils/pygments/mlir_lexer.py
@@ -31,79 +31,69 @@ class MlirLexer(RegexLexer):
"root": [
# Comments
(r"//.*?$", Comment.Single),
-
# Attribute alias definition: #name =
- (r"^\s*(#[_A-Za-z0-9\$\-\.]+)(\b)(\s*=)",
- bygroups(Name.Constant, Text, Operator)),
-
+ (
+ r"^\s*(#[_A-Za-z0-9\$\-\.]+)(\b)(\s*=)",
+ bygroups(Name.Constant, Text, Operator),
+ ),
# Type alias definition: !name =
- (r"^\s*(![_A-Za-z0-9\$\-\.]+)(\b)(\s*=)",
- bygroups(Keyword.Type, Text, Operator)),
-
+ (
+ r"^\s*(![_A-Za-z0-9\$\-\.]+)(\b)(\s*=)",
+ bygroups(Keyword.Type, Text, Operator),
+ ),
# SSA values (results, uses) - allow many characters MLIR uses
(r"%[%_A-Za-z0-9\.\$:\-]+", Name.Variable),
-
# attribute refs, constants and named attributes
(r"#[_A-Za-z0-9\$\-\.]+\b", Name.Constant),
-
# symbol refs / function-like names
(r"@[_A-Za-z][_A-Za-z0-9\$\-\.]*\b", Name.Function),
-
# blocks
(r"\^[A-Za-z0-9_\$\.\-]+", Name.Label),
-
# types by exclamation or builtin names
(r"![_A-Za-z0-9\$\-\.]+\b", Keyword.Type),
(r"\b(bf16|f16|f32|f64|f80|f128|index|none|(u|s)?i[0-9]+)\b", Keyword.Type),
-
# container-like dialect types (tensor<...>, memref<...>, vector<...>)
- (r"\b(complex|memref|tensor|tuple|vector)\s*(<)", bygroups(Keyword.Type, Punctuation), 'angled-type'),
-
+ (
+ r"\b(complex|memref|tensor|tuple|vector)\s*(<)",
+ bygroups(Keyword.Type, Punctuation),
+ "angled-type",
+ ),
# affine constructs
(r"\b(affine_map|affine_set)\b", Keyword.Reserved),
-
# common builtin operators / functions inside affine_map
(r"\b(ceildiv|floordiv|mod|symbol)\b", Name.Builtin),
-
# operation definitions with assignment: %... = op.name
- (r"^\s*(%[\%_A-Za-z0-9\:\,\s]+)\s*(=)\s*([A-Za-z0-9_\.\$\-]+)\b",
- bygroups(Name.Variable, Operator, Name.Function)),
-
+ (
+ r"^\s*(%[\%_A-Za-z0-9\:\,\s]+)\s*(=)\s*([A-Za-z0-9_\.\$\-]+)\b",
+ bygroups(Name.Variable, Operator, Name.Function),
+ ),
# operation name without result
(r"^\s*([A-Za-z0-9_\.\$\-]+)\b(?=[^<:])", Name.Function),
-
# identifiers / bare words
(r"\b[_A-Za-z][_A-Za-z0-9\.-]*\b", Name.Other),
-
# numbers: hex, float (with exponent), integer
(r"\b0x[0-9A-Fa-f]+\b", Number.Hex),
(r"\b([0-9]+(\.[0-9]*)?|\.[0-9]+)([eE][+-]?[0-9]+)?\b", Number.Float),
(r"\b[0-9]+\b", Number.Integer),
-
# strings
- (r'"', String.Double, 'string'),
-
+ (r'"', String.Double, "string"),
# punctuation and arrow-like tokens
(r"->|>=|<=|\>=|\<=|\->|\=>", Operator),
(r"[()\[\]{}<>,.:=]", Punctuation),
-
# operators
(r"[-+*/%]", Operator),
],
-
# string state with common escapes
- 'string': [
+ "string": [
(r'\\[ntr"\\]', String.Escape),
(r'[^"\\]+', String.Double),
- (r'"', String.Double, '#pop'),
+ (r'"', String.Double, "#pop"),
],
-
# angled-type content (simple nested handling)
- 'angled-type': [
+ "angled-type": [
# match nested '<' and '>'
- (r"<", Punctuation, '#push'),
- (r">", Punctuation, '#pop'),
-
+ (r"<", Punctuation, "#push"),
+ (r">", Punctuation, "#pop"),
# dimensions like 3x or 3x3x... and standalone numbers:
# - match numbers that are followed by an 'x' (dimension separator)
(r"([0-9]+)(?=(?:[xX]))", Number.Integer),
@@ -111,20 +101,20 @@ class MlirLexer(RegexLexer):
(r"[0-9]+", Number.Integer),
# dynamic dimension '?'
(r"\?", Name.Constant),
-
# the 'x' dimension separator (treat as punctuation)
(r"[xX]", Punctuation),
-
# element / builtin types inside angle brackets (no word-boundary)
- (r"(?:bf16|f16|f32|f64|f80|f128|index|none|(?:[us]?i[0-9]+))",
- Keyword.Type),
-
+ (
+ r"(?:bf16|f16|f32|f64|f80|f128|index|none|(?:[us]?i[0-9]+))",
+ Keyword.Type,
+ ),
# also allow nested container-like types to be recognized
- (r"\b(complex|memref|tensor|tuple|vector)\s*(<)",
- bygroups(Keyword.Type, Punctuation), 'angled-type'),
-
+ (
+ r"\b(complex|memref|tensor|tuple|vector)\s*(<)",
+ bygroups(Keyword.Type, Punctuation),
+ "angled-type",
+ ),
# fall back to root rules for anything else
- include('root'),
+ include("root"),
],
-
}
>From 11793af6742f0ba47becea38b8e5164f932d2a08 Mon Sep 17 00:00:00 2001
From: PragmaTwice <twice at apache.org>
Date: Wed, 5 Nov 2025 01:44:40 +0800
Subject: [PATCH 3/6] fix space
---
mlir/utils/pygments/mlir_lexer.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/mlir/utils/pygments/mlir_lexer.py b/mlir/utils/pygments/mlir_lexer.py
index ba0811ef2601b..fed54fadbed4b 100644
--- a/mlir/utils/pygments/mlir_lexer.py
+++ b/mlir/utils/pygments/mlir_lexer.py
@@ -64,11 +64,11 @@ class MlirLexer(RegexLexer):
(r"\b(ceildiv|floordiv|mod|symbol)\b", Name.Builtin),
# operation definitions with assignment: %... = op.name
(
- r"^\s*(%[\%_A-Za-z0-9\:\,\s]+)\s*(=)\s*([A-Za-z0-9_\.\$\-]+)\b",
- bygroups(Name.Variable, Operator, Name.Function),
+ r"^(\s*)(%[\%_A-Za-z0-9\:\,\s]+)(\s*=\s*)([A-Za-z0-9_\.\$\-]+)\b",
+ bygroups(Text, Name.Variable, Operator, Name.Function),
),
# operation name without result
- (r"^\s*([A-Za-z0-9_\.\$\-]+)\b(?=[^<:])", Name.Function),
+ (r"^(\s*)([A-Za-z0-9_\.\$\-]+)\b(?=[^<:])", bygroups(Text, Name.Function)),
# identifiers / bare words
(r"\b[_A-Za-z][_A-Za-z0-9\.-]*\b", Name.Other),
# numbers: hex, float (with exponent), integer
>From 49a5f16e78daa446d6aa90f066a97e8538e44375 Mon Sep 17 00:00:00 2001
From: PragmaTwice <twice at apache.org>
Date: Fri, 7 Nov 2025 12:29:35 +0800
Subject: [PATCH 4/6] fix issues in review
---
mlir/utils/pygments/mlir_lexer.bak.py | 38 ++++++++++++++++++++++
mlir/utils/pygments/mlir_lexer.py | 47 +++++++++++++++++----------
2 files changed, 67 insertions(+), 18 deletions(-)
create mode 100644 mlir/utils/pygments/mlir_lexer.bak.py
diff --git a/mlir/utils/pygments/mlir_lexer.bak.py b/mlir/utils/pygments/mlir_lexer.bak.py
new file mode 100644
index 0000000000000..179a058e9110c
--- /dev/null
+++ b/mlir/utils/pygments/mlir_lexer.bak.py
@@ -0,0 +1,38 @@
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+from pygments.lexer import RegexLexer
+from pygments.token import *
+
+
+class MlirLexer(RegexLexer):
+ name = "MLIR"
+ aliases = ["mlir"]
+ filenames = ["*.mlir"]
+
+ tokens = {
+ "root": [
+ (r"%[a-zA-Z0-9_]+", Name.Variable),
+ (r"@[a-zA-Z_][a-zA-Z0-9_]+", Name.Function),
+ (r"\^[a-zA-Z0-9_]+", Name.Label),
+ (r"#[a-zA-Z0-9_]+", Name.Constant),
+ (r"![a-zA-Z0-9_]+", Keyword.Type),
+ (r"[a-zA-Z_][a-zA-Z0-9_]*\.", Name.Entity),
+ (r"memref[^.]", Keyword.Type),
+ (r"index", Keyword.Type),
+ (r"i[0-9]+", Keyword.Type),
+ (r"f[0-9]+", Keyword.Type),
+ (r"[0-9]+", Number.Integer),
+ (r"[0-9]*\.[0-9]*", Number.Float),
+ (r'"[^"]*"', String.Double),
+ (r"affine_map", Keyword.Reserved),
+ # TODO: this should be within affine maps only
+ (r"\+-\*\/", Operator),
+ (r"floordiv", Operator.Word),
+ (r"ceildiv", Operator.Word),
+ (r"mod", Operator.Word),
+ (r"()\[\]<>,{}", Punctuation),
+ (r"\/\/.*\n", Comment.Single),
+ ]
+ }
diff --git a/mlir/utils/pygments/mlir_lexer.py b/mlir/utils/pygments/mlir_lexer.py
index fed54fadbed4b..3e053a1dd2fa7 100644
--- a/mlir/utils/pygments/mlir_lexer.py
+++ b/mlir/utils/pygments/mlir_lexer.py
@@ -2,7 +2,7 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-from pygments.lexer import RegexLexer, bygroups, include, combined
+from pygments.lexer import RegexLexer, bygroups, include, using
from pygments.token import *
import re
@@ -27,22 +27,40 @@ class MlirLexer(RegexLexer):
flags = re.MULTILINE
+ class VariableList(RegexLexer):
+ """Lexer for lists of SSA variables separated by commas."""
+
+ tokens = {
+ "root": [
+ (r"\s+", Text),
+ (r",", Punctuation),
+ (r"%[_A-Za-z0-9\.\$\-:#]+", Name.Variable),
+ ]
+ }
+
tokens = {
"root": [
# Comments
(r"//.*?$", Comment.Single),
+ # operation name with assignment: %... = op.name
+ (
+ r"^(\s*)(%[\%_A-Za-z0-9\:#\,\s]+)(=)(\s*)([A-Za-z0-9_\.\$\-]+)\b",
+ bygroups(Text, using(VariableList), Operator, Text, Name.Builtin),
+ ),
+ # operation name without result
+ (r"^(\s*)([A-Za-z0-9_\.\$\-]+)\b(?=[^<:])", bygroups(Text, Name.Builtin)),
# Attribute alias definition: #name =
(
- r"^\s*(#[_A-Za-z0-9\$\-\.]+)(\b)(\s*=)",
- bygroups(Name.Constant, Text, Operator),
+ r"^(\s*)(#[_A-Za-z0-9\$\-\.]+)(\b)(\s*=)",
+ bygroups(Text, Name.Constant, Text, Operator),
),
# Type alias definition: !name =
(
- r"^\s*(![_A-Za-z0-9\$\-\.]+)(\b)(\s*=)",
- bygroups(Keyword.Type, Text, Operator),
+ r"^(\s*)(![_A-Za-z0-9\$\-\.]+)(\b)(\s*=)",
+ bygroups(Text, Keyword.Type, Text, Operator),
),
- # SSA values (results, uses) - allow many characters MLIR uses
- (r"%[%_A-Za-z0-9\.\$:\-]+", Name.Variable),
+ # SSA values (results, uses)
+ (r"%[_A-Za-z0-9\.\$\-:#]+", Name.Variable),
# attribute refs, constants and named attributes
(r"#[_A-Za-z0-9\$\-\.]+\b", Name.Constant),
# symbol refs / function-like names
@@ -61,14 +79,7 @@ class MlirLexer(RegexLexer):
# affine constructs
(r"\b(affine_map|affine_set)\b", Keyword.Reserved),
# common builtin operators / functions inside affine_map
- (r"\b(ceildiv|floordiv|mod|symbol)\b", Name.Builtin),
- # operation definitions with assignment: %... = op.name
- (
- r"^(\s*)(%[\%_A-Za-z0-9\:\,\s]+)(\s*=\s*)([A-Za-z0-9_\.\$\-]+)\b",
- bygroups(Text, Name.Variable, Operator, Name.Function),
- ),
- # operation name without result
- (r"^(\s*)([A-Za-z0-9_\.\$\-]+)\b(?=[^<:])", bygroups(Text, Name.Function)),
+ (r"\b(ceildiv|floordiv|mod|symbol)\b", Name.Other),
# identifiers / bare words
(r"\b[_A-Za-z][_A-Za-z0-9\.-]*\b", Name.Other),
# numbers: hex, float (with exponent), integer
@@ -96,13 +107,13 @@ class MlirLexer(RegexLexer):
(r">", Punctuation, "#pop"),
# dimensions like 3x or 3x3x... and standalone numbers:
# - match numbers that are followed by an 'x' (dimension separator)
- (r"([0-9]+)(?=(?:[xX]))", Number.Integer),
+ (r"([0-9]+)(?=(?:x))", Number.Integer),
# - match bare numbers (sizes)
(r"[0-9]+", Number.Integer),
# dynamic dimension '?'
- (r"\?", Name.Constant),
+ (r"\?", Name.Integer),
# the 'x' dimension separator (treat as punctuation)
- (r"[xX]", Punctuation),
+ (r"x", Punctuation),
# element / builtin types inside angle brackets (no word-boundary)
(
r"(?:bf16|f16|f32|f64|f80|f128|index|none|(?:[us]?i[0-9]+))",
>From 0534a170c00bcdcbd78055ba4e3e7d59d088b748 Mon Sep 17 00:00:00 2001
From: PragmaTwice <twice at apache.org>
Date: Fri, 7 Nov 2025 12:37:23 +0800
Subject: [PATCH 5/6] remove useless file
---
mlir/utils/pygments/mlir_lexer.bak.py | 38 ---------------------------
1 file changed, 38 deletions(-)
delete mode 100644 mlir/utils/pygments/mlir_lexer.bak.py
diff --git a/mlir/utils/pygments/mlir_lexer.bak.py b/mlir/utils/pygments/mlir_lexer.bak.py
deleted file mode 100644
index 179a058e9110c..0000000000000
--- a/mlir/utils/pygments/mlir_lexer.bak.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-from pygments.lexer import RegexLexer
-from pygments.token import *
-
-
-class MlirLexer(RegexLexer):
- name = "MLIR"
- aliases = ["mlir"]
- filenames = ["*.mlir"]
-
- tokens = {
- "root": [
- (r"%[a-zA-Z0-9_]+", Name.Variable),
- (r"@[a-zA-Z_][a-zA-Z0-9_]+", Name.Function),
- (r"\^[a-zA-Z0-9_]+", Name.Label),
- (r"#[a-zA-Z0-9_]+", Name.Constant),
- (r"![a-zA-Z0-9_]+", Keyword.Type),
- (r"[a-zA-Z_][a-zA-Z0-9_]*\.", Name.Entity),
- (r"memref[^.]", Keyword.Type),
- (r"index", Keyword.Type),
- (r"i[0-9]+", Keyword.Type),
- (r"f[0-9]+", Keyword.Type),
- (r"[0-9]+", Number.Integer),
- (r"[0-9]*\.[0-9]*", Number.Float),
- (r'"[^"]*"', String.Double),
- (r"affine_map", Keyword.Reserved),
- # TODO: this should be within affine maps only
- (r"\+-\*\/", Operator),
- (r"floordiv", Operator.Word),
- (r"ceildiv", Operator.Word),
- (r"mod", Operator.Word),
- (r"()\[\]<>,{}", Punctuation),
- (r"\/\/.*\n", Comment.Single),
- ]
- }
>From 47fac063762356ef78bce651a80a41af21a032a7 Mon Sep 17 00:00:00 2001
From: PragmaTwice <twice at apache.org>
Date: Fri, 7 Nov 2025 18:39:30 +0800
Subject: [PATCH 6/6] add change-sync notice
---
mlir/utils/pygments/mlir_lexer.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/mlir/utils/pygments/mlir_lexer.py b/mlir/utils/pygments/mlir_lexer.py
index 3e053a1dd2fa7..4cbe0fe236fc4 100644
--- a/mlir/utils/pygments/mlir_lexer.py
+++ b/mlir/utils/pygments/mlir_lexer.py
@@ -59,7 +59,7 @@ class VariableList(RegexLexer):
r"^(\s*)(![_A-Za-z0-9\$\-\.]+)(\b)(\s*=)",
bygroups(Text, Keyword.Type, Text, Operator),
),
- # SSA values (results, uses)
+ # SSA values (uses)
(r"%[_A-Za-z0-9\.\$\-:#]+", Name.Variable),
# attribute refs, constants and named attributes
(r"#[_A-Za-z0-9\$\-\.]+\b", Name.Constant),
@@ -69,6 +69,7 @@ class VariableList(RegexLexer):
(r"\^[A-Za-z0-9_\$\.\-]+", Name.Label),
# types by exclamation or builtin names
(r"![_A-Za-z0-9\$\-\.]+\b", Keyword.Type),
+ # NOTE: please sync changes to corresponding builtin type rule in "angled-type"
(r"\b(bf16|f16|f32|f64|f80|f128|index|none|(u|s)?i[0-9]+)\b", Keyword.Type),
# container-like dialect types (tensor<...>, memref<...>, vector<...>)
(
@@ -100,7 +101,7 @@ class VariableList(RegexLexer):
(r'[^"\\]+', String.Double),
(r'"', String.Double, "#pop"),
],
- # angled-type content (simple nested handling)
+ # angled-type content
"angled-type": [
# match nested '<' and '>'
(r"<", Punctuation, "#push"),
@@ -115,6 +116,7 @@ class VariableList(RegexLexer):
# the 'x' dimension separator (treat as punctuation)
(r"x", Punctuation),
# element / builtin types inside angle brackets (no word-boundary)
+ # NOTE: please sync changes to corresponding builtin type rule in "root"
(
r"(?:bf16|f16|f32|f64|f80|f128|index|none|(?:[us]?i[0-9]+))",
Keyword.Type,
More information about the Mlir-commits
mailing list