[llvm] [Utils] Add support for split-file to diff_test_updater (PR #157765)

Thu Sep 11 12:30:16 PDT 2025

================
@@ -1,37 +1,136 @@
 import shutil
+import os
+import shlex
 
 """
 This file provides the `diff_test_updater` function, which is invoked on failed RUN lines when lit is executed with --update-tests.
 It checks whether the failed command is `diff` and, if so, uses heuristics to determine which file is the checked-in reference file and which file is output from the test case.
 The heuristics are currently as follows:
+    - if exactly one file originates from the `split-file` command, that file is the reference file and the other is the output file
     - if exactly one file ends with ".expected" (common pattern in LLVM), that file is the reference file and the other is the output file
     - if exactly one file path contains ".tmp" (e.g. because it contains the expansion of "%t"), that file is the reference file and the other is the output file
 If the command matches one of these patterns the output file content is copied to the reference file to make the test pass.
+If the reference file originated in `split-file`, the output file content is instead copied to the corresponding slice of the test file.
 Otherwise the test is ignored.
 
 Possible improvements:
     - Support stdin patterns like "my_binary %s | diff expected.txt"
-    - Scan RUN lines to see if a file is the source of output from a previous command.
+    - Scan RUN lines to see if a file is the source of output from a previous command (other than `split-file`).
       If it is then it is not a reference file that can be copied to, regardless of name, since the test will overwrite it anyways.
     - Only update the parts that need updating (based on the diff output). Could help avoid noisy updates when e.g. whitespace changes are ignored.
 """
 
 
-def get_source_and_target(a, b):
+class NormalFileTarget:
+    def __init__(self, target):
+        self.target = target
+
+    def copyFrom(self, source):
+        shutil.copy(source, self.target)
+
+    def __str__(self):
+        return self.target
+
+
+class SplitFileTarget:
+    def __init__(self, slice_start_idx, test_path, lines):
+        self.slice_start_idx = slice_start_idx
+        self.test_path = test_path
+        self.lines = lines
+
+    def copyFrom(self, source):
+        lines_before = self.lines[: self.slice_start_idx + 1]
+        self.lines = self.lines[self.slice_start_idx + 1 :]
+        slice_end_idx = None
+        for i, l in enumerate(self.lines):
+            if SplitFileTarget._get_split_line_path(l) != None:
+                slice_end_idx = i
+                break
+        if slice_end_idx is not None:
+            lines_after = self.lines[slice_end_idx:]
+        else:
+            lines_after = []
+        with open(source, "r") as f:
+            new_lines = lines_before + f.readlines() + lines_after
+        with open(self.test_path, "w") as f:
+            for l in new_lines:
+                f.write(l)
+
+    def __str__(self):
+        return f"slice in {self.test_path}"
+
+    @staticmethod
+    def get_target_dir(commands, test_path):
+        for cmd in commands:
+            split = shlex.split(cmd)
+            if "split-file" not in split:
+                continue
+            start_idx = split.index("split-file")
+            split = split[start_idx:]
+            if len(split) < 3:
+                continue
+            if split[1].strip() != test_path:
+                continue
+            return split[2].strip()
+        return None
+
+    @staticmethod
+    def create(path, commands, test_path, target_dir):
+        filename = path.replace(target_dir, "")
+        if filename.startswith(os.sep):
+            filename = filename[len(os.sep) :]
+        with open(test_path, "r") as f:
+            lines = f.readlines()
+        for i, l in enumerate(lines):
+            p = SplitFileTarget._get_split_line_path(l)
+            if p == filename:
+                idx = i
+                break
+        else:
----------------
jroelofs wrote:

for-else is so cool. love it.

https://github.com/llvm/llvm-project/pull/157765