[llvm] update_test_checks: keep meta variables stable by default (PR #76748)
Jannik Silvanus via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 6 03:25:54 PST 2024
Nicolai =?utf-8?q?Hähnle?= <nicolai.haehnle at amd.com>,
Nicolai =?utf-8?q?Hähnle?= <nicolai.haehnle at amd.com>,
Nicolai =?utf-8?q?Hähnle?= <nicolai.haehnle at amd.com>,
Nicolai =?utf-8?q?Hähnle?= <nicolai.haehnle at amd.com>,
Nicolai =?utf-8?q?Hähnle?= <nicolai.haehnle at amd.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/76748 at github.com>
================
@@ -1187,20 +1233,317 @@ def may_clash_with_default_check_prefix_name(check_prefix, var):
)
+def find_diff_matching(lhs: List[str], rhs: List[str]) -> List[int]:
+ """
+ Find a large ordered matching between strings in lhs and rhs.
+
+ Think of this as finding the *unchanged* lines in a diff, where the entries
+ of lhs and rhs are lines of the files being diffed.
+
+ Returns a list of matched (lhs_idx, rhs_idx) pairs.
+ """
+
+ # Collect matches in reverse order.
+ matches = []
+
+ def recurse(lhs_start, lhs_end, rhs_start, rhs_end):
+ if lhs_start == lhs_end or rhs_start == rhs_end:
+ return
+
+ # First, collect a set of candidate matching edges. We limit this to a
+ # constant multiple of the input size to avoid quadratic runtime.
+ patterns = collections.defaultdict(lambda: ([], []))
+
+ for idx in range(lhs_start, lhs_end):
+ patterns[lhs[idx]][0].append(idx)
+ for idx in range(rhs_start, rhs_end):
+ patterns[rhs[idx]][1].append(idx)
+
+ multiple_patterns = []
+
+ candidates = []
+ for pattern in patterns.values():
+ if not pattern[0] or not pattern[1]:
+ continue
+
+ if len(pattern[0]) == len(pattern[1]) == 1:
+ candidates.append((pattern[0][0], pattern[1][0]))
+ else:
+ multiple_patterns.append(pattern)
+
+ multiple_patterns.sort(key=lambda pattern: len(pattern[0]) * len(pattern[1]))
+
+ for pattern in multiple_patterns:
+ if len(candidates) + len(pattern[0]) * len(pattern[1]) > 2 * (len(lhs) + len(rhs)):
+ break
+ for lhs_idx in pattern[0]:
+ for rhs_idx in pattern[1]:
+ candidates.append((lhs_idx, rhs_idx))
+
+ if not candidates:
+ # The LHS and RHS either share nothing in common, or lines are just too
+ # identical. In that case, let's give up and not match anything.
+ return
+
+ # Compute a maximal crossing-free matching via an algorithm that is
+ # inspired by a mixture of dynamic programming and line-sweeping in
+ # discrete geometry.
+ #
+ # I would be surprised if this algorithm didn't exist somewhere in the
+ # literature, but I found it without consciously recalling any
+ # references, so you'll have to make do with the explanation below.
+ # Sorry.
+ #
+ # The underlying graph is bipartite:
+ # - nodes on the LHS represent lines in the original check
+ # - nodes on the RHS represent lines in the new (updated) check
+ #
+ # Nodes are implicitly sorted by the corresponding line number.
+ # Edges (unique_matches) are sorted by the line number on the LHS.
+ #
+ # Here's the geometric intuition for the algorithm.
+ #
+ # * Plot the edges as points in the plane, with the original line
+ # number on the X axis and the updated line number on the Y axis.
+ # * The goal is to find a longest "chain" of points where each point
+ # is strictly above and to the right of the previous point.
+ # * The algorithm proceeds by sweeping a vertical line from left to
+ # right.
+ # * The algorithm maintains a table where `table[N]` answers the
+ # question "What is currently the 'best' way to build a chain of N+1
+ # points to the left of the vertical line". Here, 'best' means
+ # that the last point of the chain is a as low as possible (minimal
+ # Y coordinate).
+ # * `table[N]` is `(y, point_idx)` where `point_idx` is the index of
+ # the last point in the chain and `y` is its Y coordinate
+ # * A key invariant is that the Y values in the table are
+ # monotonically increasing
+ # * Thanks to these properties, the table can be used to answer the
+ # question "What is the longest chain that can be built to the left
+ # of the vertical line using only points below a certain Y value",
+ # using a binary search over the table.
+ # * The algorithm also builds a backlink structure in which every point
+ # links back to the previous point on a best (longest) chain ending
+ # at that point
+ #
+ # The core loop of the algorithm sweeps the line and updates the table
+ # and backlink structure for every point that we cross during the sweep.
+ # Therefore, the algorithm is trivially O(M log M) in the number of
+ # points. Since we only consider lines that are unique, it is log-linear
+ # in the problem size.
+ candidates.sort(key=lambda candidate: (candidate[0], -candidate[1]))
+
+ backlinks = []
+ table = []
+ for _, rhs_idx in candidates:
+ candidate_idx = len(backlinks)
+ ti = bisect.bisect_left(table, rhs_idx, key=lambda entry: entry[0])
+ if ti < len(table):
+ table[ti] = (rhs_idx, candidate_idx)
+ else:
+ table.append((rhs_idx, candidate_idx))
+ if ti > 0:
+ backlinks.append(table[ti - 1][1])
+ else:
+ backlinks.append(None)
+
+ # Commit to names in the matching by walking the backlinks. Recursively
+ # attempt to fill in more matches in-betweem.
+ previous = (lhs_end, rhs_end)
+ match_idx = table[-1][1]
+ while match_idx is not None:
+ current = candidates[match_idx]
+ recurse(current[0] + 1, previous[0], current[1] + 1, previous[1])
+ matches.append(current)
+ previous = current
+ match_idx = backlinks[match_idx]
+ recurse(lhs_start, previous[0], rhs_start, previous[1])
+
+ recurse(0, len(lhs), 0, len(rhs))
+
+ matches.reverse()
+ return matches
+
+
+VARIABLE_TAG = "[[@@]]"
+METAVAR_RE = re.compile(r"\[\[([A-Z0-9_]+)(?::[^]]+)?\]\]")
+NUMERIC_SUFFIX_RE = re.compile(r"[0-9]*$")
+
+
+class CheckValueInfo:
+ def __init__(
+ self,
+ nameless_value: NamelessValue,
+ var: str,
+ prefix: str,
+ ):
+ self.nameless_value = nameless_value
+ self.var = var
+ self.prefix = prefix
+
+
+# Represent a check line in a way that allows us to compare check lines while
+# ignoring some or all of the FileCheck variable names.
+class CheckLineInfo:
+ def __init__(self, line, values):
+ # Line with all FileCheck variable name occurrences replaced by VARIABLE_TAG
+ self.line: str = line
+
+ # Information on each FileCheck variable name occurrences in the line
+ self.values: List[CheckValueInfo] = values
+
+ def __repr__(self):
+ return f"CheckLineInfo(line={self.line}, self.values={self.values})"
+
+
+def remap_metavar_names(
+ old_line_infos: List[CheckLineInfo],
+ new_line_infos: List[CheckLineInfo],
+ committed_names: Set[str],
+) -> Mapping[str, str]:
+ """
+ Map all FileCheck variable names that appear in new_line_infos to new
+ FileCheck variable names in an attempt to reduce the diff from old_line_infos
+ to new_line_infos.
+
+ This is done by:
+ * Matching old check lines and new check lines using a diffing algorithm
+ applied after replacing names with wildcards.
+ * Committing to variable names such that the matched lines become equal
+ (without wildcards) if possible
+ * This is done recursively to handle cases where many lines are equal
+ after wildcard replacement
+ """
+ # Initialize uncommitted identity mappings
+ new_mapping = {}
+ for line in new_line_infos:
+ for value in line.values:
+ new_mapping[value.var] = value.var
+
+ # Recursively commit to the identity mapping or find a better one
+ def recurse(old_begin, old_end, new_begin, new_end):
+ if old_begin == old_end or new_begin == new_end:
+ return
+
+ # Find a matching of lines where uncommitted names are replaced
+ # with a placeholder.
+ def diffify_line(line, mapper):
+ values = []
+ for value in line.values:
+ mapped = mapper(value.var)
+ values.append(mapped if mapped in committed_names else '?')
+ return line.line.strip() + ' @@@ ' + ' @ '.join(values)
+
+ lhs_lines = [
+ diffify_line(line, lambda x: x)
+ for line in old_line_infos[old_begin:old_end]
+ ]
+ rhs_lines = [
+ diffify_line(line, lambda x: new_mapping[x])
+ for line in new_line_infos[new_begin:new_end]
+ ]
+
+ candidate_matches = find_diff_matching(lhs_lines, rhs_lines)
+
+ # Apply commits greedily on a match-by-match basis
+ matches = [(-1,-1)]
+ committed_anything = False
+ for lhs_idx, rhs_idx in candidate_matches:
+ lhs_line = old_line_infos[lhs_idx]
+ rhs_line = new_line_infos[rhs_idx]
+
+ local_commits = {}
+
+ for lhs_value, rhs_value in zip(lhs_line.values, rhs_line.values):
+ if new_mapping[rhs_value.var] in committed_names:
+ # The new value has already been committed. If it was mapped
+ # to the same name as the original value, we can consider
+ # # committing other values from this line. Otherwise, we
----------------
jasilvanus wrote:
Nit: double `#`
https://github.com/llvm/llvm-project/pull/76748
More information about the llvm-commits
mailing list