[Mlir-commits] [mlir] [mlir] Use MLIR op names when generating FileCheck variables in generate-test-checks.py (PR #160820)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Fri Sep 26 00:24:12 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
Author: Andrzej Warzyński (banach-space)
<details>
<summary>Changes</summary>
Motivation
----------
Improve readability and stability of autogenerated CHECK lines by using
operation-aware FileCheck variable names instead of generic VAL_N.
What changes
------------
- When possible, variable names are derived from the MLIR op name, e.g.
`vector.transfer_read` → `TRANSFER_READ_0`.
- Unknown ops (e.g., from out-of-tree dialects) fall back to the prior
`VAL_N` scheme.
Before
------
```mlir
// CHECK: %[[VAL_4:.*]] = vector.transfer_read ...
// CHECK: %[[VAL_5:.*]] = "val_use"(%[[VAL_4]]) : ...
```
After
-----
```mlir
// CHECK: %[[TRANSFER_READ_0:.*]] = vector.transfer_read ...
// CHECK: %[[VAL_1:.*]] = "val_use"(%[[TRANSFER_READ_0]]) : ...
```
Rationale
---------
Using op-derived names (e.g., `TRANSFER_READ_0`) makes tests easier to
read and audit, while remaining more stable across unrelated edits (e.g.
there will always be fewer `TRANSFER_READ_#N` variables than `VAL_#N`).
The fallback to `VAL_N` preserves compatibility for unknown ops.
---
Full diff: https://github.com/llvm/llvm-project/pull/160820.diff
1 Files Affected:
- (modified) mlir/utils/generate-test-checks.py (+30-4)
``````````diff
diff --git a/mlir/utils/generate-test-checks.py b/mlir/utils/generate-test-checks.py
index f80a1811f418c..02ed4a24e122c 100755
--- a/mlir/utils/generate-test-checks.py
+++ b/mlir/utils/generate-test-checks.py
@@ -31,6 +31,7 @@
import os # Used to advertise this file's name ("autogenerated_note").
import re
import sys
+from collections import Counter
ADVERT_BEGIN = "// NOTE: Assertions have been autogenerated by "
ADVERT_END = """
@@ -45,6 +46,14 @@
SSA_RE_STR = "[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*"
SSA_RE = re.compile(SSA_RE_STR)
+# Regex matching `dialect.op_name`, where `dialect` is an upstream MLIR
+# dialect (e.g. `vector.transfer_read`).
+DIALECTS = "acc|affine|amdgpu|amx|arith|arm_neon|arm_sve|arm_sme|async|bufferization|cf|complex|dlti|emitc|\
+ func|gpu|index|irdl|linalg|llvm|math|memref|ml_program|mpi|nvgpu|nvvm|omp|pdl_interp|pdl|ptr|quant|\
+ rocdl|scf|shape|shard|smt|sparse_tensor|tensor|ub|vcix|vector|wasmssa|x86vector|xegpu|xevm|spirv|tosa|\
+ transform"
+SSA_OP_NAME_RE = re.compile(rf'\b(?:{DIALECTS})[.]([a-z_]+)\b')
+
# Regex matching the left-hand side of an assignment
SSA_RESULTS_STR = r'\s*(%' + SSA_RE_STR + r')(\s*,\s*(%' + SSA_RE_STR + r'))*\s*='
SSA_RESULTS_RE = re.compile(SSA_RESULTS_STR)
@@ -63,7 +72,12 @@
class VariableNamer:
def __init__(self, variable_names):
self.scopes = []
+ # Counter for generic FileCHeck names, e.g. VAL_#N
self.name_counter = 0
+ # Counters for FileCheck names derived from Op names, e.g.
+ # TRANSFER_READ_#N (based on `vector.transfer_read`). Note, there's a
+ # dedicated counter for every Op type present in the input.
+ self.op_name_counter = Counter()
# Number of variable names to still generate in parent scope
self.generate_in_parent_scope_left = 0
@@ -77,7 +91,7 @@ def generate_in_parent_scope(self, n):
self.generate_in_parent_scope_left = n
# Generate a substitution name for the given ssa value name.
- def generate_name(self, source_variable_name, use_ssa_name):
+ def generate_name(self, source_variable_name, use_ssa_name, op_name = ""):
# Compute variable name
variable_name = self.variable_names.pop(0) if len(self.variable_names) > 0 else ''
@@ -86,8 +100,16 @@ def generate_name(self, source_variable_name, use_ssa_name):
# a FileCHeck substation string. As FileCheck requires these
# strings to start with a character, skip MLIR variables starting
# with a digit (e.g. `%0`).
+ #
+ # The next fallback option is to use the op name, if the
+ # corresponding match succeeds.
+ #
+ # If neither worked, use a generic name: `VAL_#N`.
if use_ssa_name and source_variable_name[0].isalpha():
variable_name = source_variable_name.upper()
+ elif op_name != "":
+ variable_name = op_name.upper() + "_" + str(self.op_name_counter[op_name])
+ self.op_name_counter[op_name] += 1
else:
variable_name = "VAL_" + str(self.name_counter)
self.name_counter += 1
@@ -123,6 +145,7 @@ def num_scopes(self):
def clear_names(self):
self.name_counter = 0
self.used_variable_names = set()
+ self.op_name_counter.clear()
class AttributeNamer:
@@ -170,8 +193,10 @@ def process_line(line_chunks, variable_namer, use_ssa_name=False, strict_name_re
# Process the rest that contained an SSA value name.
for chunk in line_chunks:
- m = SSA_RE.match(chunk)
- ssa_name = m.group(0) if m is not None else ''
+ ssa = SSA_RE.match(chunk)
+ op_name_with_dialect = SSA_OP_NAME_RE.search(chunk)
+ ssa_name = ssa.group(0) if ssa is not None else ''
+ op_name = op_name_with_dialect.group(1) if op_name_with_dialect is not None else ''
# Check if an existing variable exists for this name.
variable = None
@@ -185,7 +210,8 @@ def process_line(line_chunks, variable_namer, use_ssa_name=False, strict_name_re
output_line += "%[[" + variable + "]]"
else:
# Otherwise, generate a new variable.
- variable = variable_namer.generate_name(ssa_name, use_ssa_name)
+ variable = variable_namer.generate_name(ssa_name, use_ssa_name,
+ op_name)
if strict_name_re:
# Use stricter regexp for the variable name, if requested.
# Greedy matching may cause issues with the generic '.*'
``````````
</details>
https://github.com/llvm/llvm-project/pull/160820
More information about the Mlir-commits
mailing list