[llvm] 597ac47 - update_test_checks: match IR basic block labels (#88979)

via llvm-commits llvm-commits at lists.llvm.org
Sat May 18 16:39:52 PDT 2024


Author: Nicolai Hähnle
Date: 2024-05-19T01:39:47+02:00
New Revision: 597ac471cc7da97ccf957362a7e9f7a52d6910ee

URL: https://github.com/llvm/llvm-project/commit/597ac471cc7da97ccf957362a7e9f7a52d6910ee
DIFF: https://github.com/llvm/llvm-project/commit/597ac471cc7da97ccf957362a7e9f7a52d6910ee.diff

LOG: update_test_checks: match IR basic block labels (#88979)

Labels are matched using a regexp of the form '^(pattern):', which
requires the addition of a "suffix" concept to NamelessValue.

Aside from that, the key challenge is that block labels are values, and
we typically capture values including the prefix '%'. However, when
labels appear at the start of a basic block, the prefix '%' is not
included, so we must capture block label values *without* the prefix
'%'.

We don't know ahead of time whether an IR value is a label or not. In
most cases, they are prefixed by the word "label" (their type), but this
isn't the case in phi nodes. We solve this issue by leveraging the
two-phase nature of variable generalization: the first pass finds all
occurences of a variable and determines whether the '%' prefix can be
included or not. The second pass does the actual substitution.

This change also unifies the generalization path for assembly with that
for IR and analysis, in the hope that any future changes avoid diverging
those cases future.

I also considered the alternative of trying to detect the phi node case
using more regular expression special cases but ultimately decided
against that because it seemed more fragile, and perhaps the approach of
keeping a tentative prefix that may later be discarded could also be
eventually applied to some metadata and attribute cases.

Note that an early version of this change was reviewed as
https://reviews.llvm.org/D142452, before version numbers were
introduced. This is a substantially updated version of that change.

Added: 
    

Modified: 
    llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/phi-labels.ll.expected
    llvm/test/tools/UpdateTestChecks/update_test_checks/phi-labels.test
    llvm/utils/UpdateTestChecks/asm.py
    llvm/utils/UpdateTestChecks/common.py
    llvm/utils/UpdateTestChecks/isel.py
    llvm/utils/update_analyze_test_checks.py
    llvm/utils/update_cc_test_checks.py
    llvm/utils/update_llc_test_checks.py
    llvm/utils/update_test_checks.py

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/phi-labels.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/phi-labels.ll.expected
index 1d21ebe547f68..5e70a6c89d327 100644
--- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/phi-labels.ll.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/phi-labels.ll.expected
@@ -1,15 +1,15 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -S | FileCheck %s
 
 define i32 @phi_after_label(i1 %cc) {
 ; CHECK-LABEL: define i32 @phi_after_label(
 ; CHECK-SAME: i1 [[CC:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[CC]], label [[THEN:%.*]], label [[END:%.*]]
-; CHECK:       then:
-; CHECK-NEXT:    br label [[END]]
-; CHECK:       end:
-; CHECK-NEXT:    [[R:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[THEN]] ]
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 [[CC]], label %[[THEN:.*]], label %[[END:.*]]
+; CHECK:       [[THEN]]:
+; CHECK-NEXT:    br label %[[END]]
+; CHECK:       [[END]]:
+; CHECK-NEXT:    [[R:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 1, %[[THEN]] ]
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
 entry:
@@ -26,14 +26,14 @@ end:
 define void @phi_before_label(i32 %bound) {
 ; CHECK-LABEL: define void @phi_before_label(
 ; CHECK-SAME: i32 [[BOUND:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[CTR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[CTR_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[CTR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[CTR_NEXT:%.*]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[CTR_NEXT]] = add i32 [[CTR]], 1
 ; CHECK-NEXT:    [[CC:%.*]] = icmp ult i32 [[CTR_NEXT]], [[BOUND]]
-; CHECK-NEXT:    br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
-; CHECK:       end:
+; CHECK-NEXT:    br i1 [[CC]], label %[[LOOP]], label %[[END:.*]]
+; CHECK:       [[END]]:
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -52,11 +52,11 @@ end:
 define i32 @phi_after_label_unnamed(i1 %cc) {
 ; CHECK-LABEL: define i32 @phi_after_label_unnamed(
 ; CHECK-SAME: i1 [[CC:%.*]]) {
-; CHECK-NEXT:    br i1 [[CC]], label [[TMP1:%.*]], label [[TMP2:%.*]]
-; CHECK:       1:
-; CHECK-NEXT:    br label [[TMP2]]
-; CHECK:       2:
-; CHECK-NEXT:    [[R:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ 1, [[TMP1]] ]
+; CHECK-NEXT:    br i1 [[CC]], label %[[BB1:.*]], label %[[BB2:.*]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    br label %[[BB2]]
+; CHECK:       [[BB2]]:
+; CHECK-NEXT:    [[R:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ 1, %[[BB1]] ]
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
 0:

diff  --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/phi-labels.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/phi-labels.test
index 411c84de1dcba..2b0d0cb7f54ba 100644
--- a/llvm/test/tools/UpdateTestChecks/update_test_checks/phi-labels.test
+++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/phi-labels.test
@@ -1,4 +1,4 @@
-# RUN: cp -f %S/Inputs/phi-labels.ll %t.ll && %update_test_checks --version 4 %t.ll
+# RUN: cp -f %S/Inputs/phi-labels.ll %t.ll && %update_test_checks --version 5 %t.ll
 # RUN: 
diff  -u %t.ll %S/Inputs/phi-labels.ll.expected
 ## Check that running the script again does not change the result:
 # RUN: %update_test_checks %t.ll

diff  --git a/llvm/utils/UpdateTestChecks/asm.py b/llvm/utils/UpdateTestChecks/asm.py
index f0c456a1648df..33ede81a41601 100644
--- a/llvm/utils/UpdateTestChecks/asm.py
+++ b/llvm/utils/UpdateTestChecks/asm.py
@@ -605,6 +605,7 @@ def add_checks(
     prefix_list,
     func_dict,
     func_name,
+    ginfo: common.GeneralizerInfo,
     global_vars_seen_dict,
     is_filtered,
 ):
@@ -617,9 +618,7 @@ def add_checks(
         func_dict,
         func_name,
         check_label_format,
-        True,
-        False,
-        1,
+        ginfo,
         global_vars_seen_dict,
         is_filtered=is_filtered,
     )

diff  --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py
index 5595e6f417555..7da16e0f0cb2e 100644
--- a/llvm/utils/UpdateTestChecks/common.py
+++ b/llvm/utils/UpdateTestChecks/common.py
@@ -30,8 +30,9 @@
    in case arguments are split to a separate SAME line.
 4: --check-globals now has a third option ('smart'). The others are now called
    'none' and 'all'. 'smart' is the default.
+5: Basic block labels are matched by FileCheck expressions
 """
-DEFAULT_VERSION = 4
+DEFAULT_VERSION = 5
 
 
 SUPPORTED_ANALYSES = {
@@ -698,6 +699,7 @@ def __init__(
         args_and_sig,
         attrs,
         func_name_separator,
+        ginfo,
     ):
         self.scrub = string
         self.extrascrub = extra
@@ -705,24 +707,27 @@ def __init__(
         self.args_and_sig = args_and_sig
         self.attrs = attrs
         self.func_name_separator = func_name_separator
+        self._ginfo = ginfo
 
     def is_same_except_arg_names(
-        self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs, is_backend
+        self, extrascrub, funcdef_attrs_and_ret, args_and_sig, attrs
     ):
         arg_names = set()
 
         def drop_arg_names(match):
-            arg_names.add(match.group(variable_group_in_ir_value_match))
-            if match.group(attribute_group_in_ir_value_match):
-                attr = match.group(attribute_group_in_ir_value_match)
+            nameless_value = self._ginfo.get_nameless_value_from_match(match)
+            if nameless_value.check_key == "%":
+                arg_names.add(self._ginfo.get_name_from_match(match))
+                substitute = ""
             else:
-                attr = ""
-            return match.group(1) + attr + match.group(match.lastindex)
+                substitute = match.group(2)
+            return match.group(1) + substitute + match.group(match.lastindex)
 
         def repl_arg_names(match):
+            nameless_value = self._ginfo.get_nameless_value_from_match(match)
             if (
-                match.group(variable_group_in_ir_value_match) is not None
-                and match.group(variable_group_in_ir_value_match) in arg_names
+                nameless_value.check_key == "%"
+                and self._ginfo.get_name_from_match(match) in arg_names
             ):
                 return match.group(1) + match.group(match.lastindex)
             return match.group(1) + match.group(2) + match.group(match.lastindex)
@@ -731,17 +736,19 @@ def repl_arg_names(match):
             return False
         if self.attrs != attrs:
             return False
-        ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
-        ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
+
+        regexp = self._ginfo.get_regexp()
+        ans0 = regexp.sub(drop_arg_names, self.args_and_sig)
+        ans1 = regexp.sub(drop_arg_names, args_and_sig)
         if ans0 != ans1:
             return False
-        if is_backend:
+        if self._ginfo.is_asm():
             # Check without replacements, the replacements are not applied to the
             # body for backend checks.
             return self.extrascrub == extrascrub
 
-        es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
-        es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
+        es0 = regexp.sub(repl_arg_names, self.extrascrub)
+        es1 = regexp.sub(repl_arg_names, extrascrub)
         es0 = SCRUB_IR_COMMENT_RE.sub(r"", es0)
         es1 = SCRUB_IR_COMMENT_RE.sub(r"", es1)
         return es0 == es1
@@ -751,7 +758,7 @@ def __str__(self):
 
 
 class FunctionTestBuilder:
-    def __init__(self, run_list, flags, scrubber_args, path):
+    def __init__(self, run_list, flags, scrubber_args, path, ginfo):
         self._verbose = flags.verbose
         self._record_args = flags.function_signature
         self._check_attributes = flags.check_attributes
@@ -770,6 +777,7 @@ def __init__(self, run_list, flags, scrubber_args, path):
         )
         self._scrubber_args = scrubber_args
         self._path = path
+        self._ginfo = ginfo
         # Strip double-quotes if input was read by UTC_ARGS
         self._replace_value_regex = list(
             map(lambda x: x.strip('"'), flags.replace_value_regex)
@@ -804,10 +812,10 @@ def global_var_dict(self):
     def is_filtered(self):
         return bool(self._filters)
 
-    def process_run_line(
-        self, function_re, scrubber, raw_tool_output, prefixes, is_backend
-    ):
-        build_global_values_dictionary(self._global_var_dict, raw_tool_output, prefixes)
+    def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes):
+        build_global_values_dictionary(
+            self._global_var_dict, raw_tool_output, prefixes, self._ginfo
+        )
         for m in function_re.finditer(raw_tool_output):
             if not m:
                 continue
@@ -817,7 +825,7 @@ def process_run_line(
             # beginning of assembly function definition. In most assemblies, that is just a
             # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is
             # False, just assume that separator is an empty string.
-            if is_backend:
+            if self._ginfo.is_asm():
                 # Use ':' as default separator.
                 func_name_separator = (
                     m.group("func_name_separator")
@@ -900,7 +908,6 @@ def process_run_line(
                             funcdef_attrs_and_ret,
                             args_and_sig,
                             attrs,
-                            is_backend,
                         ):
                             self._func_dict[prefix][func].scrub = scrubbed_extra
                             self._func_dict[prefix][func].args_and_sig = args_and_sig
@@ -919,6 +926,7 @@ def process_run_line(
                             args_and_sig,
                             attrs,
                             func_name_separator,
+                            self._ginfo,
                         )
                         self._func_order[prefix].append(func)
                     else:
@@ -959,6 +967,12 @@ def get_failed_prefixes(self):
 
 
 class NamelessValue:
+    """
+    A NamelessValue object represents a type of value in the IR whose "name" we
+    generalize in the generated check lines; where the "name" could be an actual
+    name (as in e.g. `@some_global` or `%x`) or just a number (as in e.g. `%12`
+    or `!4`).
+    """
     def __init__(
         self,
         check_prefix,
@@ -971,12 +985,14 @@ def __init__(
         is_number=False,
         replace_number_with_counter=False,
         match_literally=False,
-        interlaced_with_previous=False
+        interlaced_with_previous=False,
+        ir_suffix=r"",
     ):
         self.check_prefix = check_prefix
         self.check_key = check_key
         self.ir_prefix = ir_prefix
         self.ir_regexp = ir_regexp
+        self.ir_suffix = ir_suffix
         self.global_ir_rhs_regexp = global_ir_rhs_regexp
         self.is_before_functions = is_before_functions
         self.is_number = is_number
@@ -987,15 +1003,10 @@ def __init__(
         self.interlaced_with_previous = interlaced_with_previous
         self.variable_mapping = {}
 
-    # Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'.
+    # Return true if this kind of IR value is defined "locally" to functions,
+    # which we assume is only the case precisely for LLVM IR local values.
     def is_local_def_ir_value(self):
-        return self.ir_prefix == "%"
-
-    # Return the IR prefix and check prefix we use for this kind or IR value,
-    # e.g., (%, TMP) for locals. If the IR prefix is a regex, return the prefix
-    # used in the IR output
-    def get_ir_prefix_from_ir_value_match(self, match):
-        return re.search(self.ir_prefix, match[0])[0], self.check_prefix
+        return self.check_key == "%"
 
     # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals
     def get_ir_regex(self):
@@ -1030,205 +1041,216 @@ def get_value_name(self, var: str, check_prefix: str):
         var = var.replace("-", "_")
         return var.upper()
 
-    # Create a FileCheck variable from regex.
-    def get_value_definition(self, var, match):
-        # for backwards compatibility we check locals with '.*'
-        varname = self.get_value_name(var, self.check_prefix)
-        prefix = self.get_ir_prefix_from_ir_value_match(match)[0]
-        if self.is_number:
-            regex = ""  # always capture a number in the default format
-            capture_start = "[[#"
-        else:
-            regex = self.get_ir_regex()
-            capture_start = "[["
-        if self.is_local_def_ir_value():
-            return capture_start + varname + ":" + prefix + regex + "]]"
-        return prefix + capture_start + varname + ":" + regex + "]]"
-
-    # Use a FileCheck variable.
-    def get_value_use(self, var, match, var_prefix=None):
-        if var_prefix is None:
-            var_prefix = self.check_prefix
-        capture_start = "[[#" if self.is_number else "[["
-        if self.is_local_def_ir_value():
-            return capture_start + self.get_value_name(var, var_prefix) + "]]"
-        prefix = self.get_ir_prefix_from_ir_value_match(match)[0]
-        return prefix + capture_start + self.get_value_name(var, var_prefix) + "]]"
-
-
-# Description of the 
diff erent "unnamed" values we match in the IR, e.g.,
-# (local) ssa values, (debug) metadata, etc.
-ir_nameless_values = [
-    #            check_prefix   check_key  ir_prefix           ir_regexp                global_ir_rhs_regexp
-    NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None),
-    NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None),
-    NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"),
-    NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None),
-    NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True),
-    NamelessValue(
-        r"GLOBNAMED",
-        "@",
-        r"@",
-        r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*",
-        r".+",
-        is_before_functions=True,
-        match_literally=True,
-        interlaced_with_previous=True,
-    ),
-    NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None),
-    NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None),
-    NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None),
-    NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None),
-    NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None),
-    NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None),
-    NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None),
-    NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"),
-    NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None),
-    NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None),
-    NamelessValue(r"META", "!", r"[, (]", r"![0-9]+", None),
-]
+    def get_affixes_from_match(self, match: re.Match):
+        prefix = re.match(self.ir_prefix, match.group(2)).group(0)
+        suffix = re.search(self.ir_suffix + "$", match.group(2)).group(0)
+        return prefix, suffix
 
-global_nameless_values = [
-    nameless_value
-    for nameless_value in ir_nameless_values
-    if nameless_value.global_ir_rhs_regexp is not None
-]
-# global variable names should be matched literally
-global_nameless_values_w_unstable_ids = [
-    nameless_value
-    for nameless_value in global_nameless_values
-    if not nameless_value.match_literally
-]
 
-asm_nameless_values = [
-    NamelessValue(
-        r"MCINST",
-        "Inst#",
-        "<MCInst #",
-        r"\d+",
-        r".+",
-        is_number=True,
-        replace_number_with_counter=True,
-    ),
-    NamelessValue(
-        r"MCREG",
-        "Reg:",
-        "<MCOperand Reg:",
-        r"\d+",
-        r".+",
-        is_number=True,
-        replace_number_with_counter=True,
-    ),
-]
+class GeneralizerInfo:
+    """
+    A GeneralizerInfo object holds information about how check lines should be generalized
+    (e.g., variable names replaced by FileCheck meta variables) as well as per-test-file
+    state (e.g. information about IR global variables).
+    """
 
-analyze_nameless_values = [
-    NamelessValue(
-        r"GRP",
-        "#",
-        r"",
-        r"0x[0-9a-f]+",
-        None,
-        replace_number_with_counter=True,
-    ),
-]
+    MODE_IR = 0
+    MODE_ASM = 1
+    MODE_ANALYZE = 2
 
+    def __init__(
+        self,
+        version,
+        mode,
+        nameless_values: List[NamelessValue],
+        regexp_prefix,
+        regexp_suffix,
+    ):
+        self._version = version
+        self._mode = mode
+        self._nameless_values = nameless_values
+
+        self._regexp_prefix = regexp_prefix
+        self._regexp_suffix = regexp_suffix
+
+        self._regexp, _ = self._build_regexp(False, False)
+        (
+            self._unstable_globals_regexp,
+            self._unstable_globals_values,
+        ) = self._build_regexp(True, True)
+
+    def _build_regexp(self, globals_only, unstable_only):
+        matches = []
+        values = []
+        for nameless_value in self._nameless_values:
+            is_global = nameless_value.global_ir_rhs_regexp is not None
+            if globals_only and not is_global:
+                continue
+            if unstable_only and nameless_value.match_literally:
+                continue
 
-def createOrRegexp(old, new):
-    if not old:
-        return new
-    if not new:
-        return old
-    return old + "|" + new
-
-
-def createPrefixMatch(prefix_str, prefix_re):
-    return "(?:" + prefix_str + "(" + prefix_re + "))"
-
-
-# Build the regexp that matches an "IR value". This can be a local variable,
-# argument, global, or metadata, anything that is "named". It is important that
-# the PREFIX and SUFFIX below only contain a single group, if that changes
-# other locations will need adjustment as well.
-IR_VALUE_REGEXP_PREFIX = r"(\s*)"
-IR_VALUE_REGEXP_STRING = r""
-for nameless_value in ir_nameless_values:
-    match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
-    if nameless_value.global_ir_rhs_regexp is not None:
-        match = "^" + match
-    IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, match)
-IR_VALUE_REGEXP_SUFFIX = r"([,\s\(\)\}]|\Z)"
-IR_VALUE_RE = re.compile(
-    IR_VALUE_REGEXP_PREFIX
-    + r"("
-    + IR_VALUE_REGEXP_STRING
-    + r")"
-    + IR_VALUE_REGEXP_SUFFIX
-)
+            match = f"(?:{nameless_value.ir_prefix}({nameless_value.ir_regexp}){nameless_value.ir_suffix})"
+            if self.is_ir() and not globals_only and is_global:
+                match = "^" + match
+            matches.append(match)
+            values.append(nameless_value)
 
-GLOBAL_VALUE_REGEXP_STRING = r""
-for nameless_value in global_nameless_values_w_unstable_ids:
-    match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
-    GLOBAL_VALUE_REGEXP_STRING = createOrRegexp(GLOBAL_VALUE_REGEXP_STRING, match)
-GLOBAL_VALUE_RE = re.compile(
-    IR_VALUE_REGEXP_PREFIX
-    + r"("
-    + GLOBAL_VALUE_REGEXP_STRING
-    + r")"
-    + IR_VALUE_REGEXP_SUFFIX
-)
+        regexp_string = r"|".join(matches)
 
-# Build the regexp that matches an "ASM value" (currently only for --asm-show-inst comments).
-ASM_VALUE_REGEXP_STRING = ""
-for nameless_value in asm_nameless_values:
-    match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
-    ASM_VALUE_REGEXP_STRING = createOrRegexp(ASM_VALUE_REGEXP_STRING, match)
-ASM_VALUE_REGEXP_SUFFIX = r"([>\s]|\Z)"
-ASM_VALUE_RE = re.compile(
-    r"((?:#|//)\s*)" + "(" + ASM_VALUE_REGEXP_STRING + ")" + ASM_VALUE_REGEXP_SUFFIX
-)
+        return (
+            re.compile(
+                self._regexp_prefix + r"(" + regexp_string + r")" + self._regexp_suffix
+            ),
+            values,
+        )
 
-ANALYZE_VALUE_REGEXP_PREFIX = r"(\s*)"
-ANALYZE_VALUE_REGEXP_STRING = r""
-for nameless_value in analyze_nameless_values:
-    match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
-    ANALYZE_VALUE_REGEXP_STRING = createOrRegexp(ANALYZE_VALUE_REGEXP_STRING, match)
-ANALYZE_VALUE_REGEXP_SUFFIX = r"(\)?:)"
-ANALYZE_VALUE_RE = re.compile(
-    ANALYZE_VALUE_REGEXP_PREFIX
-    + r"("
-    + ANALYZE_VALUE_REGEXP_STRING
-    + r")"
-    + ANALYZE_VALUE_REGEXP_SUFFIX
-)
+    def get_version(self):
+        return self._version
 
-# The entire match is group 0, the prefix has one group (=1), the entire
-# IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
-first_nameless_group_in_ir_value_match = 3
+    def is_ir(self):
+        return self._mode == GeneralizerInfo.MODE_IR
 
-# constants for the group id of special matches
-variable_group_in_ir_value_match = 3
-attribute_group_in_ir_value_match = 4
+    def is_asm(self):
+        return self._mode == GeneralizerInfo.MODE_ASM
 
+    def is_analyze(self):
+        return self._mode == GeneralizerInfo.MODE_ANALYZE
 
-# Check a match for IR_VALUE_RE and inspect it to determine if it was a local
-# value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above.
-def get_idx_from_ir_value_match(match):
-    for i in range(first_nameless_group_in_ir_value_match, match.lastindex):
-        if match.group(i) is not None:
-            return i - first_nameless_group_in_ir_value_match
-    error("Unable to identify the kind of IR value from the match!")
-    return 0
+    def get_nameless_values(self):
+        return self._nameless_values
 
+    def get_regexp(self):
+        return self._regexp
 
-# See get_idx_from_ir_value_match
-def get_name_from_ir_value_match(match):
-    return match.group(
-        get_idx_from_ir_value_match(match) + first_nameless_group_in_ir_value_match
-    )
+    def get_unstable_globals_regexp(self):
+        return self._unstable_globals_regexp
 
+    # The entire match is group 0, the prefix has one group (=1), the entire
+    # IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
+    FIRST_NAMELESS_GROUP_IN_MATCH = 3
 
-def get_nameless_value_from_match(match, nameless_values) -> NamelessValue:
-    return nameless_values[get_idx_from_ir_value_match(match)]
+    def get_match_info(self, match):
+        """
+        Returns (name, nameless_value) for the given match object
+        """
+        if match.re == self._regexp:
+            values = self._nameless_values
+        else:
+            match.re == self._unstable_globals_regexp
+            values = self._unstable_globals_values
+        for i in range(len(values)):
+            g = match.group(i + GeneralizerInfo.FIRST_NAMELESS_GROUP_IN_MATCH)
+            if g is not None:
+                return g, values[i]
+        error("Unable to identify the kind of IR value from the match!")
+        return None, None
+
+    # See get_idx_from_match
+    def get_name_from_match(self, match):
+        return self.get_match_info(match)[0]
+
+    def get_nameless_value_from_match(self, match) -> NamelessValue:
+        return self.get_match_info(match)[1]
+
+
+def make_ir_generalizer(version):
+    values = []
+
+    if version >= 5:
+        values += [
+            NamelessValue(r"BB", "%", r"label %", r"[\w$.-]+?", None),
+            NamelessValue(r"BB", "%", r"^", r"[\w$.-]+?", None, ir_suffix=r":"),
+        ]
+
+    values += [
+        #            check_prefix   check_key  ir_prefix           ir_regexp                global_ir_rhs_regexp
+        NamelessValue(r"TMP", "%", r"%", r"[\w$.-]+?", None),
+        NamelessValue(r"ATTR", "#", r"#", r"[0-9]+", None),
+        NamelessValue(r"ATTR", "#", r"attributes #", r"[0-9]+", r"{[^}]*}"),
+        NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", None),
+        NamelessValue(r"GLOB", "@", r"@", r"[0-9]+", r".+", is_before_functions=True),
+        NamelessValue(
+            r"GLOBNAMED",
+            "@",
+            r"@",
+            r"[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*",
+            r".+",
+            is_before_functions=True,
+            match_literally=True,
+            interlaced_with_previous=True,
+        ),
+        NamelessValue(r"DBG", "!", r"!dbg ", r"![0-9]+", None),
+        NamelessValue(r"DIASSIGNID", "!", r"!DIAssignID ", r"![0-9]+", None),
+        NamelessValue(r"PROF", "!", r"!prof ", r"![0-9]+", None),
+        NamelessValue(r"TBAA", "!", r"!tbaa ", r"![0-9]+", None),
+        NamelessValue(r"TBAA_STRUCT", "!", r"!tbaa.struct ", r"![0-9]+", None),
+        NamelessValue(r"RNG", "!", r"!range ", r"![0-9]+", None),
+        NamelessValue(r"LOOP", "!", r"!llvm.loop ", r"![0-9]+", None),
+        NamelessValue(r"META", "!", r"", r"![0-9]+", r"(?:distinct |)!.*"),
+        NamelessValue(r"ACC_GRP", "!", r"!llvm.access.group ", r"![0-9]+", None),
+        NamelessValue(r"META", "!", r"![a-z.]+ ", r"![0-9]+", None),
+        NamelessValue(r"META", "!", r"[, (]", r"![0-9]+", None),
+    ]
+
+    prefix = r"(\s*)"
+    suffix = r"([,\s\(\)\}]|\Z)"
+
+    # values = [
+    #     nameless_value
+    #     for nameless_value in IR_NAMELESS_VALUES
+    #     if not (globals_only and nameless_value.global_ir_rhs_regexp is None) and
+    #        not (unstable_ids_only and nameless_value.match_literally)
+    # ]
+
+    return GeneralizerInfo(version, GeneralizerInfo.MODE_IR, values, prefix, suffix)
+
+
+def make_asm_generalizer(version):
+    values = [
+        NamelessValue(
+            r"MCINST",
+            "Inst#",
+            "<MCInst #",
+            r"\d+",
+            r".+",
+            is_number=True,
+            replace_number_with_counter=True,
+        ),
+        NamelessValue(
+            r"MCREG",
+            "Reg:",
+            "<MCOperand Reg:",
+            r"\d+",
+            r".+",
+            is_number=True,
+            replace_number_with_counter=True,
+        ),
+    ]
+
+    prefix = r"((?:#|//)\s*)"
+    suffix = r"([>\s]|\Z)"
+
+    return GeneralizerInfo(version, GeneralizerInfo.MODE_ASM, values, prefix, suffix)
+
+
+def make_analyze_generalizer(version):
+    values = [
+        NamelessValue(
+            r"GRP",
+            "#",
+            r"",
+            r"0x[0-9a-f]+",
+            None,
+            replace_number_with_counter=True,
+        ),
+    ]
+
+    prefix = r"(\s*)"
+    suffix = r"(\)?:)"
+
+    return GeneralizerInfo(
+        version, GeneralizerInfo.MODE_ANALYZE, values, prefix, suffix
+    )
 
 
 # Return true if var clashes with the scripted FileCheck check_prefix.
@@ -1385,16 +1407,68 @@ def find_
diff _matching(lhs: List[str], rhs: List[str]) -> List[tuple]:
 NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$")
 
 
+class TestVar:
+    def __init__(self, nameless_value: NamelessValue, prefix: str, suffix: str):
+        self._nameless_value = nameless_value
+
+        self._prefix = prefix
+        self._suffix = suffix
+
+    def seen(self, nameless_value: NamelessValue, prefix: str, suffix: str):
+        if prefix != self._prefix:
+            self._prefix = ""
+        if suffix != self._suffix:
+            self._suffix = ""
+
+    def get_variable_name(self, text):
+        return self._nameless_value.get_value_name(
+            text, self._nameless_value.check_prefix
+        )
+
+    def get_def(self, name, prefix, suffix):
+        if self._nameless_value.is_number:
+            return f"{prefix}[[#{name}:]]{suffix}"
+        if self._prefix:
+            assert self._prefix == prefix
+            prefix = ""
+        if self._suffix:
+            assert self._suffix == suffix
+            suffix = ""
+        return f"{prefix}[[{name}:{self._prefix}{self._nameless_value.get_ir_regex()}{self._suffix}]]{suffix}"
+
+    def get_use(self, name, prefix, suffix):
+        if self._nameless_value.is_number:
+            return f"{prefix}[[#{name}]]{suffix}"
+        if self._prefix:
+            assert self._prefix == prefix
+            prefix = ""
+        if self._suffix:
+            assert self._suffix == suffix
+            suffix = ""
+        return f"{prefix}[[{name}]]{suffix}"
+
+
 class CheckValueInfo:
     def __init__(
         self,
-        nameless_value: NamelessValue,
-        var: str,
+        key,
+        text,
+        name: str,
         prefix: str,
+        suffix: str,
     ):
-        self.nameless_value = nameless_value
-        self.var = var
+        # Key for the value, e.g. '%'
+        self.key = key
+
+        # Text to be matched by the FileCheck variable (without any prefix or suffix)
+        self.text = text
+
+        # Name of the FileCheck variable
+        self.name = name
+
+        # Prefix and suffix that were captured by the NamelessValue regular expression
         self.prefix = prefix
+        self.suffix = suffix
 
 
 # Represent a check line in a way that allows us to compare check lines while
@@ -1433,7 +1507,7 @@ def remap_metavar_names(
     new_mapping = {}
     for line in new_line_infos:
         for value in line.values:
-            new_mapping[value.var] = value.var
+            new_mapping[value.name] = value.name
 
     # Recursively commit to the identity mapping or find a better one
     def recurse(old_begin, old_end, new_begin, new_end):
@@ -1445,7 +1519,7 @@ def recurse(old_begin, old_end, new_begin, new_end):
         def 
diff ify_line(line, mapper):
             values = []
             for value in line.values:
-                mapped = mapper(value.var)
+                mapped = mapper(value.name)
                 values.append(mapped if mapped in committed_names else "?")
             return line.line.strip() + " @@@ " + " @ ".join(values)
 
@@ -1470,29 +1544,29 @@ def 
diff ify_line(line, mapper):
             local_commits = {}
 
             for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values):
-                if new_mapping[rhs_value.var] in committed_names:
+                if new_mapping[rhs_value.name] in committed_names:
                     # The new value has already been committed. If it was mapped
                     # to the same name as the original value, we can consider
                     # committing other values from this line. Otherwise, we
                     # should ignore this line.
-                    if new_mapping[rhs_value.var] == lhs_value.var:
+                    if new_mapping[rhs_value.name] == lhs_value.name:
                         continue
                     else:
                         break
 
-                if rhs_value.var in local_commits:
+                if rhs_value.name in local_commits:
                     # Same, but for a possible commit happening on the same line
-                    if local_commits[rhs_value.var] == lhs_value.var:
+                    if local_commits[rhs_value.name] == lhs_value.name:
                         continue
                     else:
                         break
 
-                if lhs_value.var in committed_names:
+                if lhs_value.name in committed_names:
                     # We can't map this value because the name we would map it to has already been
                     # committed for something else. Give up on this line.
                     break
 
-                local_commits[rhs_value.var] = lhs_value.var
+                local_commits[rhs_value.name] = lhs_value.name
             else:
                 # No reason not to add any commitments for this line
                 for rhs_var, lhs_var in local_commits.items():
@@ -1545,58 +1619,26 @@ def 
diff ify_line(line, mapper):
     return new_mapping
 
 
-def generalize_check_lines_common(
+def generalize_check_lines(
     lines,
-    is_analyze,
+    ginfo: GeneralizerInfo,
     vars_seen,
     global_vars_seen,
-    nameless_values,
-    nameless_value_regex,
-    is_asm,
-    preserve_names,
+    preserve_names=False,
     original_check_lines=None,
+    *,
+    unstable_globals_only=False,
 ):
-    # This gets called for each match that occurs in
-    # a line. We transform variables we haven't seen
-    # into defs, and variables we have seen into uses.
-    def transform_line_vars(match, transform_locals=True):
-        var = get_name_from_ir_value_match(match)
-        nameless_value = get_nameless_value_from_match(match, nameless_values)
-        if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var):
-            warn(
-                "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict"
-                " with scripted FileCheck name." % (var,)
-            )
-        key = (var, nameless_value.check_key)
-        is_local_def = nameless_value.is_local_def_ir_value()
-        if is_local_def and not transform_locals:
-            return None
-        if is_local_def and key in vars_seen:
-            rv = nameless_value.get_value_use(var, match)
-        elif not is_local_def and key in global_vars_seen:
-            # We could have seen a 
diff erent prefix for the global variables first,
-            # ensure we use that one instead of the prefix for the current match.
-            rv = nameless_value.get_value_use(var, match, global_vars_seen[key])
-        else:
-            if is_local_def:
-                vars_seen.add(key)
-            else:
-                global_vars_seen[key] = nameless_value.check_prefix
-            rv = nameless_value.get_value_definition(var, match)
-        # re.sub replaces the entire regex match
-        # with whatever you return, so we have
-        # to make sure to hand it back everything
-        # including the commas and spaces.
-        return match.group(1) + rv + match.group(match.lastindex)
-
-    def transform_non_local_line_vars(match):
-        return transform_line_vars(match, False)
+    if unstable_globals_only:
+        regexp = ginfo.get_unstable_globals_regexp()
+    else:
+        regexp = ginfo.get_regexp()
 
     multiple_braces_re = re.compile(r"({{+)|(}}+)")
     def escape_braces(match_obj):
         return '{{' + re.escape(match_obj.group(0)) + '}}'
 
-    if not is_asm and not is_analyze:
+    if ginfo.is_ir():
         for i, line in enumerate(lines):
             # An IR variable named '%.' matches the FileCheck regex string.
             line = line.replace("%.", "%dot")
@@ -1617,123 +1659,141 @@ def escape_braces(match_obj):
             lines[i] = scrubbed_line
 
     if not preserve_names:
-        if is_asm:
-            for i, _ in enumerate(lines):
-                # It can happen that two matches are back-to-back and for some reason sub
-                # will not replace both of them. For now we work around this by
-                # substituting until there is no more match.
-                changed = True
-                while changed:
-                    (lines[i], changed) = nameless_value_regex.subn(
-                        transform_line_vars, lines[i], count=1
-                    )
-        else:
-            # LLVM IR case. Start by handling global meta variables (global IR variables,
-            # metadata, attributes)
-            for i, _ in enumerate(lines):
-                start = 0
-                while True:
-                    m = nameless_value_regex.search(lines[i][start:])
-                    if m is None:
-                        break
-                    start += m.start()
-                    sub = transform_non_local_line_vars(m)
-                    if sub is not None:
-                        lines[i] = (
-                            lines[i][:start] + sub + lines[i][start + len(m.group(0)) :]
-                        )
-                    start += 1
-
-            # Collect information about new check lines and original check lines (if any)
-            new_line_infos = []
-            for line in lines:
-                filtered_line = ""
-                values = []
-                while True:
-                    m = nameless_value_regex.search(line)
-                    if m is None:
-                        filtered_line += line
-                        break
+        committed_names = set(
+            test_var.get_variable_name(name)
+            for (name, _), test_var in vars_seen.items()
+        )
+        defs = set()
 
-                    var = get_name_from_ir_value_match(m)
-                    nameless_value = get_nameless_value_from_match(m, nameless_values)
-                    var = nameless_value.get_value_name(
-                        var, nameless_value.check_prefix
-                    )
+        # Collect information about new check lines, and generalize global reference
+        new_line_infos = []
+        for line in lines:
+            filtered_line = ""
+            values = []
+            while True:
+                m = regexp.search(line)
+                if m is None:
+                    filtered_line += line
+                    break
 
-                    # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
-                    filtered_line += (
-                        line[: m.start()]
-                        + m.group(1)
-                        + VARIABLE_TAG
-                        + m.group(m.lastindex)
+                name = ginfo.get_name_from_match(m)
+                nameless_value = ginfo.get_nameless_value_from_match(m)
+                prefix, suffix = nameless_value.get_affixes_from_match(m)
+                if may_clash_with_default_check_prefix_name(
+                    nameless_value.check_prefix, name
+                ):
+                    warn(
+                        "Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict"
+                        " with scripted FileCheck name." % (name,)
                     )
-                    line = line[m.end() :]
-                    values.append(
-                        CheckValueInfo(
-                            nameless_value=nameless_value,
-                            var=var,
-                            prefix=nameless_value.get_ir_prefix_from_ir_value_match(m)[
-                                0
-                            ],
-                        )
+
+                # Record the variable as seen and (for locals) accumulate
+                # prefixes/suffixes
+                is_local_def = nameless_value.is_local_def_ir_value()
+                if is_local_def:
+                    vars_dict = vars_seen
+                else:
+                    vars_dict = global_vars_seen
+
+                key = (name, nameless_value.check_key)
+
+                if is_local_def:
+                    test_prefix = prefix
+                    test_suffix = suffix
+                else:
+                    test_prefix = ""
+                    test_suffix = ""
+
+                if key in vars_dict:
+                    vars_dict[key].seen(nameless_value, test_prefix, test_suffix)
+                else:
+                    vars_dict[key] = TestVar(nameless_value, test_prefix, test_suffix)
+                    defs.add(key)
+
+                var = vars_dict[key].get_variable_name(name)
+
+                # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
+                filtered_line += (
+                    line[: m.start()] + m.group(1) + VARIABLE_TAG + m.group(m.lastindex)
+                )
+                line = line[m.end() :]
+
+                values.append(
+                    CheckValueInfo(
+                        key=nameless_value.check_key,
+                        text=name,
+                        name=var,
+                        prefix=prefix,
+                        suffix=suffix,
                     )
-                new_line_infos.append(CheckLineInfo(filtered_line, values))
-
-            orig_line_infos = []
-            for line in original_check_lines or []:
-                filtered_line = ""
-                values = []
-                while True:
-                    m = METAVAR_RE.search(line)
-                    if m is None:
-                        filtered_line += line
-                        break
+                )
 
-                    # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
-                    filtered_line += line[: m.start()] + VARIABLE_TAG
-                    line = line[m.end() :]
-                    values.append(
-                        CheckValueInfo(
-                            nameless_value=None,
-                            var=m.group(1),
-                            prefix=None,
-                        )
+            new_line_infos.append(CheckLineInfo(filtered_line, values))
+
+        committed_names.update(
+            test_var.get_variable_name(name)
+            for (name, _), test_var in global_vars_seen.items()
+        )
+
+        # Collect information about original check lines, if any.
+        orig_line_infos = []
+        for line in original_check_lines or []:
+            filtered_line = ""
+            values = []
+            while True:
+                m = METAVAR_RE.search(line)
+                if m is None:
+                    filtered_line += line
+                    break
+
+                # Replace with a [[@@]] tag, but be sure to keep the spaces and commas.
+                filtered_line += line[: m.start()] + VARIABLE_TAG
+                line = line[m.end() :]
+                values.append(
+                    CheckValueInfo(
+                        key=None,
+                        text=None,
+                        name=m.group(1),
+                        prefix="",
+                        suffix="",
                     )
-                orig_line_infos.append(CheckLineInfo(filtered_line, values))
+                )
+            orig_line_infos.append(CheckLineInfo(filtered_line, values))
 
-            # Compute the variable name mapping
-            committed_names = set(vars_seen)
+        # Compute the variable name mapping
+        mapping = remap_metavar_names(orig_line_infos, new_line_infos, committed_names)
 
-            mapping = remap_metavar_names(
-                orig_line_infos, new_line_infos, committed_names
-            )
+        # Apply the variable name mapping
+        for i, line_info in enumerate(new_line_infos):
+            line_template = line_info.line
+            line = ""
 
-            for i, line_info in enumerate(new_line_infos):
-                line_template = line_info.line
-                line = ""
+            for value in line_info.values:
+                idx = line_template.find(VARIABLE_TAG)
+                line += line_template[:idx]
+                line_template = line_template[idx + len(VARIABLE_TAG) :]
 
-                for value in line_info.values:
-                    idx = line_template.find(VARIABLE_TAG)
-                    line += line_template[:idx]
-                    line_template = line_template[idx + len(VARIABLE_TAG) :]
+                key = (value.text, value.key)
+                if value.key == "%":
+                    vars_dict = vars_seen
+                else:
+                    vars_dict = global_vars_seen
 
-                    key = (mapping[value.var], nameless_value.check_key)
-                    is_local_def = nameless_value.is_local_def_ir_value()
-                    if is_local_def:
-                        if mapping[value.var] in vars_seen:
-                            line += f"[[{mapping[value.var]}]]"
-                        else:
-                            line += f"[[{mapping[value.var]}:{value.prefix}{value.nameless_value.get_ir_regex()}]]"
-                            vars_seen.add(mapping[value.var])
-                    else:
-                        raise RuntimeError("not implemented")
+                if key in defs:
+                    line += vars_dict[key].get_def(
+                        mapping[value.name], value.prefix, value.suffix
+                    )
+                    defs.remove(key)
+                else:
+                    line += vars_dict[key].get_use(
+                        mapping[value.name], value.prefix, value.suffix
+                    )
 
-                line += line_template
+            line += line_template
 
-                lines[i] = line
+            lines[i] = line
 
-    if is_analyze:
+    if ginfo.is_analyze():
         for i, _ in enumerate(lines):
             # Escape multiple {{ or }} as {{}} denotes a FileCheck regex.
             scrubbed_line = multiple_braces_re.sub(escape_braces, lines[i])
@@ -1742,63 +1802,6 @@ def escape_braces(match_obj):
     return lines
 
 
-# Replace IR value defs and uses with FileCheck variables.
-def generalize_check_lines(
-    lines, is_analyze, vars_seen, global_vars_seen, preserve_names, original_check_lines
-):
-    return generalize_check_lines_common(
-        lines,
-        is_analyze,
-        vars_seen,
-        global_vars_seen,
-        ir_nameless_values,
-        IR_VALUE_RE,
-        False,
-        preserve_names,
-        original_check_lines=original_check_lines,
-    )
-
-
-def generalize_global_check_line(line, preserve_names, global_vars_seen):
-    [new_line] = generalize_check_lines_common(
-        [line],
-        False,
-        set(),
-        global_vars_seen,
-        global_nameless_values_w_unstable_ids,
-        GLOBAL_VALUE_RE,
-        False,
-        preserve_names,
-    )
-    return new_line
-
-
-def generalize_asm_check_lines(lines, vars_seen, global_vars_seen):
-    return generalize_check_lines_common(
-        lines,
-        False,
-        vars_seen,
-        global_vars_seen,
-        asm_nameless_values,
-        ASM_VALUE_RE,
-        True,
-        False,
-    )
-
-
-def generalize_analyze_check_lines(lines, vars_seen, global_vars_seen):
-    return generalize_check_lines_common(
-        lines,
-        True,
-        vars_seen,
-        global_vars_seen,
-        analyze_nameless_values,
-        ANALYZE_VALUE_RE,
-        False,
-        False,
-    )
-
-
 def add_checks(
     output_lines,
     comment_marker,
@@ -1806,9 +1809,7 @@ def add_checks(
     func_dict,
     func_name,
     check_label_format,
-    is_backend,
-    is_analyze,
-    version,
+    ginfo,
     global_vars_seen_dict,
     is_filtered,
     preserve_names=False,
@@ -1853,7 +1854,7 @@ def add_checks(
 
             # Add some space between 
diff erent check prefixes, but not after the last
             # check line (before the test code).
-            if is_backend:
+            if ginfo.is_asm():
                 if len(printed_prefixes) != 0:
                     output_lines.append(comment_marker)
 
@@ -1862,11 +1863,11 @@ def add_checks(
 
             global_vars_seen_before = [key for key in global_vars_seen.keys()]
 
-            vars_seen = set()
+            vars_seen = {}
             printed_prefixes.append(checkprefix)
             attrs = str(func_dict[checkprefix][func_name].attrs)
             attrs = "" if attrs == "None" else attrs
-            if version > 1:
+            if ginfo.get_version() > 1:
                 funcdef_attrs_and_ret = func_dict[checkprefix][
                     func_name
                 ].funcdef_attrs_and_ret
@@ -1881,7 +1882,7 @@ def add_checks(
             if args_and_sig:
                 args_and_sig = generalize_check_lines(
                     [args_and_sig],
-                    is_analyze,
+                    ginfo,
                     vars_seen,
                     global_vars_seen,
                     preserve_names,
@@ -1892,7 +1893,7 @@ def add_checks(
                 # Captures in label lines are not supported, thus split into a -LABEL
                 # and a separate -SAME line that contains the arguments with captures.
                 args_and_sig_prefix = ""
-                if version >= 3 and args_and_sig.startswith("("):
+                if ginfo.get_version() >= 3 and args_and_sig.startswith("("):
                     # Ensure the "(" separating function name and arguments is in the
                     # label line. This is required in case of function names that are
                     # prefixes of each other. Otherwise, the label line for "foo" might
@@ -1933,7 +1934,7 @@ def add_checks(
                 continue
 
             # For ASM output, just emit the check lines.
-            if is_backend:
+            if ginfo.is_asm():
                 body_start = 1
                 if is_filtered:
                     # For filtered output we don't add "-NEXT" so don't add extra spaces
@@ -1943,8 +1944,8 @@ def add_checks(
                     output_lines.append(
                         "%s %s:       %s" % (comment_marker, checkprefix, func_body[0])
                     )
-                func_lines = generalize_asm_check_lines(
-                    func_body[body_start:], vars_seen, global_vars_seen
+                func_lines = generalize_check_lines(
+                    func_body[body_start:], ginfo, vars_seen, global_vars_seen
                 )
                 for func_line in func_lines:
                     if func_line.strip() == "":
@@ -1963,9 +1964,9 @@ def add_checks(
                         global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
                 break
             # For analyze output, generalize the output, and emit CHECK-EMPTY lines as well.
-            elif is_analyze:
-                func_body = generalize_analyze_check_lines(
-                    func_body, vars_seen, global_vars_seen
+            elif ginfo.is_analyze():
+                func_body = generalize_check_lines(
+                    func_body, ginfo, vars_seen, global_vars_seen
                 )
                 for func_line in func_body:
                     if func_line.strip() == "":
@@ -1994,7 +1995,7 @@ def add_checks(
             else:
                 func_body = generalize_check_lines(
                     func_body,
-                    False,
+                    ginfo,
                     vars_seen,
                     global_vars_seen,
                     preserve_names,
@@ -2057,13 +2058,14 @@ def add_ir_checks(
     func_name,
     preserve_names,
     function_sig,
-    version,
+    ginfo: GeneralizerInfo,
     global_vars_seen_dict,
     is_filtered,
     original_check_lines={},
 ):
+    assert ginfo.is_ir()
     # Label format is based on IR string.
-    if function_sig and version > 1:
+    if function_sig and ginfo.get_version() > 1:
         function_def_regex = "define %s"
     elif function_sig:
         function_def_regex = "define {{[^@]+}}%s"
@@ -2079,9 +2081,7 @@ def add_ir_checks(
         func_dict,
         func_name,
         check_label_format,
-        False,
-        False,
-        version,
+        ginfo,
         global_vars_seen_dict,
         is_filtered,
         preserve_names,
@@ -2090,8 +2090,15 @@ def add_ir_checks(
 
 
 def add_analyze_checks(
-    output_lines, comment_marker, prefix_list, func_dict, func_name, is_filtered
+    output_lines,
+    comment_marker,
+    prefix_list,
+    func_dict,
+    func_name,
+    ginfo: GeneralizerInfo,
+    is_filtered,
 ):
+    assert ginfo.is_analyze()
     check_label_format = "{} %s-LABEL: '%s%s%s%s'".format(comment_marker)
     global_vars_seen_dict = {}
     return add_checks(
@@ -2101,16 +2108,14 @@ def add_analyze_checks(
         func_dict,
         func_name,
         check_label_format,
-        False,
-        True,
-        1,
+        ginfo,
         global_vars_seen_dict,
         is_filtered,
     )
 
 
-def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes):
-    for nameless_value in itertools.chain(global_nameless_values, asm_nameless_values):
+def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes, ginfo):
+    for nameless_value in ginfo.get_nameless_values():
         if nameless_value.global_ir_rhs_regexp is None:
             continue
 
@@ -2225,6 +2230,7 @@ def add_global_checks(
     comment_marker,
     prefix_list,
     output_lines,
+    ginfo: GeneralizerInfo,
     global_vars_seen_dict,
     preserve_names,
     is_before_functions,
@@ -2232,7 +2238,9 @@ def add_global_checks(
 ):
     printed_prefixes = set()
     output_lines_loc = {}  # Allows GLOB and GLOBNAMED to be sorted correctly
-    for nameless_value in global_nameless_values:
+    for nameless_value in ginfo.get_nameless_values():
+        if nameless_value.global_ir_rhs_regexp is None:
+            continue
         if nameless_value.is_before_functions != is_before_functions:
             continue
         for p in prefix_list:
@@ -2274,8 +2282,13 @@ def add_global_checks(
                                 break
                         if not matched:
                             continue
-                    new_line = generalize_global_check_line(
-                        line, preserve_names, global_vars_seen
+                    [new_line] = generalize_check_lines(
+                        [line],
+                        ginfo,
+                        {},
+                        global_vars_seen,
+                        preserve_names,
+                        unstable_globals_only=True,
                     )
                     new_line = filter_unstable_metadata(new_line)
                     check_line = "%s %s: %s" % (comment_marker, checkprefix, new_line)

diff  --git a/llvm/utils/UpdateTestChecks/isel.py b/llvm/utils/UpdateTestChecks/isel.py
index bdb68e5815a33..855bc50b09f4e 100644
--- a/llvm/utils/UpdateTestChecks/isel.py
+++ b/llvm/utils/UpdateTestChecks/isel.py
@@ -60,6 +60,7 @@ def add_checks(
     prefix_list,
     func_dict,
     func_name,
+    ginfo: common.GeneralizerInfo,
     global_vars_seen_dict,
     is_filtered,
 ):
@@ -72,9 +73,7 @@ def add_checks(
         func_dict,
         func_name,
         check_label_format,
-        True,
-        False,
-        1,
+        ginfo,
         global_vars_seen_dict,
         is_filtered=is_filtered,
     )

diff  --git a/llvm/utils/update_analyze_test_checks.py b/llvm/utils/update_analyze_test_checks.py
index 03053e5447d11..47506626a0a58 100755
--- a/llvm/utils/update_analyze_test_checks.py
+++ b/llvm/utils/update_analyze_test_checks.py
@@ -96,6 +96,7 @@ def main():
             # now, we just ignore all but the last.
             prefix_list.append((check_prefixes, tool_cmd_args))
 
+        ginfo = common.make_analyze_generalizer(version=1)
         builder = common.FunctionTestBuilder(
             run_list=prefix_list,
             flags=type(
@@ -111,6 +112,7 @@ def main():
             ),
             scrubber_args=[],
             path=ti.path,
+            ginfo=ginfo,
         )
 
         for prefixes, opt_args in prefix_list:
@@ -131,7 +133,6 @@ def main():
                         common.scrub_body,
                         raw_tool_output,
                         prefixes,
-                        False,
                     )
             elif re.search(r"LV: Checking a loop in ", raw_tool_outputs) is not None:
                 # Split analysis outputs by "Printing analysis " declarations.
@@ -143,7 +144,6 @@ def main():
                         common.scrub_body,
                         raw_tool_output,
                         prefixes,
-                        False,
                     )
             else:
                 common.warn("Don't know how to deal with this output")
@@ -179,6 +179,7 @@ def main():
                         prefix_list,
                         func_dict,
                         func_name,
+                        ginfo,
                         is_filtered=builder.is_filtered(),
                     )
                 )

diff  --git a/llvm/utils/update_cc_test_checks.py b/llvm/utils/update_cc_test_checks.py
index 28c6bb0409f3a..3ffb07ddf6ad8 100755
--- a/llvm/utils/update_cc_test_checks.py
+++ b/llvm/utils/update_cc_test_checks.py
@@ -270,7 +270,7 @@ def get_function_body(builder, args, filename, clang_args, extra_commands, prefi
             raw_tool_output = common.invoke_tool(extra_args[0], extra_args[1:], f.name)
     if "-emit-llvm" in clang_args:
         builder.process_run_line(
-            common.OPT_FUNCTION_RE, common.scrub_body, raw_tool_output, prefixes, False
+            common.OPT_FUNCTION_RE, common.scrub_body, raw_tool_output, prefixes
         )
         builder.processed_prefixes(prefixes)
     else:
@@ -360,8 +360,13 @@ def main():
 
         # Store only filechecked runlines.
         filecheck_run_list = [i for i in run_list if i[0]]
+        ginfo = common.make_ir_generalizer(version=ti.args.version)
         builder = common.FunctionTestBuilder(
-            run_list=filecheck_run_list, flags=ti.args, scrubber_args=[], path=ti.path
+            run_list=filecheck_run_list,
+            flags=ti.args,
+            scrubber_args=[],
+            path=ti.path,
+            ginfo=ginfo,
         )
 
         for prefixes, args, extra_commands, triple_in_cmd in run_list:
@@ -415,29 +420,18 @@ def main():
 
             # Now generate all the checks.
             def check_generator(my_output_lines, prefixes, func):
-                if "-emit-llvm" in clang_args:
-                    return common.add_ir_checks(
-                        my_output_lines,
-                        "//",
-                        prefixes,
-                        func_dict,
-                        func,
-                        False,
-                        ti.args.function_signature,
-                        ti.args.version,
-                        global_vars_seen_dict,
-                        is_filtered=builder.is_filtered(),
-                    )
-                else:
-                    return asm.add_checks(
-                        my_output_lines,
-                        "//",
-                        prefixes,
-                        func_dict,
-                        func,
-                        global_vars_seen_dict,
-                        is_filtered=builder.is_filtered(),
-                    )
+                return common.add_ir_checks(
+                    my_output_lines,
+                    "//",
+                    prefixes,
+                    func_dict,
+                    func,
+                    False,
+                    ti.args.function_signature,
+                    ginfo,
+                    global_vars_seen_dict,
+                    is_filtered=builder.is_filtered(),
+                )
 
             if ti.args.check_globals != 'none':
                 generated_prefixes.extend(
@@ -446,6 +440,7 @@ def check_generator(my_output_lines, prefixes, func):
                         "//",
                         run_list,
                         output_lines,
+                        ginfo,
                         global_vars_seen_dict,
                         False,
                         True,
@@ -506,6 +501,7 @@ def check_generator(my_output_lines, prefixes, func):
                                         "//",
                                         run_list,
                                         output_lines,
+                                        ginfo,
                                         global_vars_seen_dict,
                                         False,
                                         True,
@@ -525,7 +521,7 @@ def check_generator(my_output_lines, prefixes, func):
                                     mangled,
                                     False,
                                     args.function_signature,
-                                    args.version,
+                                    ginfo,
                                     global_vars_seen_dict,
                                     is_filtered=builder.is_filtered(),
                                 )
@@ -543,6 +539,7 @@ def check_generator(my_output_lines, prefixes, func):
                     "//",
                     run_list,
                     output_lines,
+                    ginfo,
                     global_vars_seen_dict,
                     False,
                     False,

diff  --git a/llvm/utils/update_llc_test_checks.py b/llvm/utils/update_llc_test_checks.py
index 1ed0132781e2b..c8598e74a1345 100755
--- a/llvm/utils/update_llc_test_checks.py
+++ b/llvm/utils/update_llc_test_checks.py
@@ -133,6 +133,7 @@ def main():
         else:
             check_indent = ""
 
+        ginfo = common.make_asm_generalizer(version=1)
         builder = common.FunctionTestBuilder(
             run_list=run_list,
             flags=type(
@@ -148,6 +149,7 @@ def main():
             ),
             scrubber_args=[ti.args],
             path=ti.path,
+            ginfo=ginfo,
         )
 
         for (
@@ -173,9 +175,7 @@ def main():
                 triple = common.get_triple_from_march(march_in_cmd)
 
             scrubber, function_re = output_type.get_run_handler(triple)
-            builder.process_run_line(
-                function_re, scrubber, raw_tool_output, prefixes, True
-            )
+            builder.process_run_line(function_re, scrubber, raw_tool_output, prefixes)
             builder.processed_prefixes(prefixes)
 
         func_dict = builder.finish_and_get_func_dict()
@@ -218,6 +218,7 @@ def main():
                     prefixes,
                     func_dict,
                     func,
+                    ginfo,
                     global_vars_seen_dict,
                     is_filtered=builder.is_filtered(),
                 ),
@@ -243,6 +244,7 @@ def main():
                             run_list,
                             func_dict,
                             func_name,
+                            ginfo,
                             global_vars_seen_dict,
                             is_filtered=builder.is_filtered(),
                         )

diff  --git a/llvm/utils/update_test_checks.py b/llvm/utils/update_test_checks.py
index 04808ce6bb1c6..16f3e618770b2 100755
--- a/llvm/utils/update_test_checks.py
+++ b/llvm/utils/update_test_checks.py
@@ -147,9 +147,14 @@ def main():
             # now, we just ignore all but the last.
             prefix_list.append((check_prefixes, tool_cmd_args, preprocess_cmd))
 
+        ginfo = common.make_ir_generalizer(ti.args.version)
         global_vars_seen_dict = {}
         builder = common.FunctionTestBuilder(
-            run_list=prefix_list, flags=ti.args, scrubber_args=[], path=ti.path
+            run_list=prefix_list,
+            flags=ti.args,
+            scrubber_args=[],
+            path=ti.path,
+            ginfo=ginfo,
         )
 
         tool_binary = ti.args.tool_binary
@@ -172,7 +177,6 @@ def main():
                 common.scrub_body,
                 raw_tool_output,
                 prefixes,
-                False,
             )
             builder.processed_prefixes(prefixes)
 
@@ -217,6 +221,7 @@ def main():
                         ";",
                         prefix_list,
                         output_lines,
+                        ginfo,
                         global_vars_seen_dict,
                         args.preserve_names,
                         True,
@@ -239,7 +244,7 @@ def main():
                         func,
                         False,
                         args.function_signature,
-                        args.version,
+                        ginfo,
                         global_vars_seen_dict,
                         is_filtered=builder.is_filtered(),
                         original_check_lines=original_check_lines.get(func, {}),
@@ -271,7 +276,7 @@ def main():
                             func_name,
                             args.preserve_names,
                             args.function_signature,
-                            args.version,
+                            ginfo,
                             global_vars_seen_dict,
                             is_filtered=builder.is_filtered(),
                             original_check_lines=original_check_lines.get(
@@ -290,6 +295,7 @@ def main():
                                 ";",
                                 prefix_list,
                                 output_lines,
+                                ginfo,
                                 global_vars_seen_dict,
                                 args.preserve_names,
                                 True,
@@ -337,6 +343,7 @@ def main():
                     ";",
                     prefix_list,
                     output_lines,
+                    ginfo,
                     global_vars_seen_dict,
                     args.preserve_names,
                     False,


        


More information about the llvm-commits mailing list