[llvm] aacea0d - [utils] Add script to generate elaborated IR and assembly tests (#89026)

Wed May 8 23:58:59 PDT 2024

Author: Fangrui Song
Date: 2024-05-08T23:58:55-07:00
New Revision: aacea0d0f67401f5a0b74947f3ff179ade9cbf6d

URL: https://github.com/llvm/llvm-project/commit/aacea0d0f67401f5a0b74947f3ff179ade9cbf6d
DIFF: https://github.com/llvm/llvm-project/commit/aacea0d0f67401f5a0b74947f3ff179ade9cbf6d.diff

LOG: [utils] Add script to generate elaborated IR and assembly tests (#89026)

Generally, IR and assembly test files benefit from being cleaned to
remove unnecessary details. However, for tests requiring elaborate
IR or assembly files where cleanup is less practical (e.g., large amount
of debug information output from Clang), the current practice is to
include the C/C++ source file and the generation instructions as
comments.

This is inconvenient when regeneration is needed. This patch adds
`llvm/utils/update_test_body.py` to allow easier regeneration.

`ld.lld --debug-names` tests (#86508) utilize this script for
Clang-generated assembly tests.

Note: `-o pipefail` is standard (since
https://www.austingroupbugs.net/view.php?id=789) but not supported by
dash.

Link:
https://discourse.llvm.org/t/utility-to-generate-elaborated-assembly-ir-tests/78408

Added: 
    llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic-asm.test.expected
    llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic.test.expected
    llvm/test/tools/UpdateTestChecks/update_test_body/basic-asm.test
    llvm/test/tools/UpdateTestChecks/update_test_body/basic.test
    llvm/test/tools/UpdateTestChecks/update_test_body/empty-stdout.test
    llvm/test/tools/UpdateTestChecks/update_test_body/gen-absent.test
    llvm/test/tools/UpdateTestChecks/update_test_body/gen-fail.test
    llvm/test/tools/UpdateTestChecks/update_test_body/gen-unterminated.test
    llvm/test/tools/UpdateTestChecks/update_test_body/lit.local.cfg
    llvm/utils/update_test_body.py

Modified: 
    llvm/docs/TestingGuide.rst
    llvm/test/tools/UpdateTestChecks/lit.local.cfg
    llvm/test/tools/llvm-dwarfdump/X86/formclass4.s
    llvm/test/tools/llvm-dwarfdump/X86/prettyprint_type_units_split_v5.s

Removed: 
    


################################################################################
diff  --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst
index e32e4d1e535ab..e24feb3bf5fa2 100644

--- a/llvm/docs/TestingGuide.rst
+++ b/llvm/docs/TestingGuide.rst
@@ -433,6 +433,87 @@ actually participate in the test besides holding the ``RUN:`` lines.
   putting the extra files in an ``Inputs/`` directory. This pattern is
   deprecated.
 
+Elaborated tests
+----------------
+
+Generally, IR and assembly test files benefit from being cleaned to remove
+unnecessary details. However, for tests requiring elaborate IR or assembly
+files where cleanup is less practical (e.g., large amount of debug information
+output from Clang), you can include generation instructions within
+``split-file`` part called ``gen``. Then, run
+``llvm/utils/update_test_body.py`` on the test file to generate the needed
+content.
+
+.. code-block:: none
+
+    ; RUN: rm -rf %t && split-file %s %t && cd %t
+    ; RUN: opt -S a.ll ... | FileCheck %s
+
+    ; CHECK: hello
+
+    ;--- a.cc
+    int va;
+    ;--- gen
+    clang --target=x86_64-linux -S -emit-llvm -g a.cc -o -
+
+    ;--- a.ll
+    # content generated by the script 'gen'
+
+.. code-block:: bash
+
+   PATH=/path/to/clang_build/bin:$PATH llvm/utils/update_test_body.py path/to/test.ll
+
+The script will prepare extra files with ``split-file``, invoke ``gen``, and
+then rewrite the part after ``gen`` with its stdout.
+
+For convenience, if the test needs one single assembly file, you can also wrap
+``gen`` and its required files with ``.ifdef`` and ``.endif``. Then you can
+skip ``split-file`` in RUN lines.
+
+.. code-block:: none
+
+    # RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o a.o
+    # RUN: ... | FileCheck %s
+
+    # CHECK: hello
+
+    .ifdef GEN
+    #--- a.cc
+    int va;
+    #--- gen
+    clang --target=x86_64-linux -S -g a.cc -o -
+    .endif
+    # content generated by the script 'gen'
+
+.. note::
+
+  Consider specifying an explicit target triple to avoid 
diff erences when
+  regeneration is needed on another machine.
+
+  ``gen`` is invoked with ``PWD`` set to ``/proc/self/cwd``. Clang commands
+  don't need ``-fdebug-compilation-dir=`` since its default value is ``PWD``.
+
+  Check prefixes should be placed before ``.endif`` since the part after
+  ``.endif`` is replaced.
+
+If the test body contains multiple files, you can print ``---`` separators and
+utilize ``split-file`` in ``RUN`` lines.
+
+.. code-block:: none
+
+    # RUN: rm -rf %t && split-file %s %t && cd %t
+    ...
+
+    #--- a.cc
+    int va;
+    #--- b.cc
+    int vb;
+    #--- gen
+    clang --target=x86_64-linux -S -O1 -g a.cc -o -
+    echo '#--- b.s'
+    clang --target=x86_64-linux -S -O1 -g b.cc -o -
+    #--- a.s
+
 Fragile tests
 -------------
 

diff  --git a/llvm/test/tools/UpdateTestChecks/lit.local.cfg b/llvm/test/tools/UpdateTestChecks/lit.local.cfg
index f8ab6b82cde70..2e695490b005e 100644
--- a/llvm/test/tools/UpdateTestChecks/lit.local.cfg
+++ b/llvm/test/tools/UpdateTestChecks/lit.local.cfg
@@ -19,7 +19,8 @@ def add_update_script_substition(
     # Specify an explicit default version in UTC tests, so that the --version
     # embedded in UTC_ARGS does not change in all test expectations every time
     # the default is bumped.
-    extra_args += " --version=1"
+    if name != "%update_test_body":
+        extra_args += " --version=1"
     config.substitutions.append(
         (name, "'%s' %s %s" % (python_exe, script_path, extra_args))
     )
@@ -47,3 +48,7 @@ if os.path.isfile(llvm_mca_path):
     config.available_features.add("llvm-mca-binary")
     mca_arg = "--llvm-mca-binary " + shell_quote(llvm_mca_path)
     add_update_script_substition("%update_test_checks", extra_args=mca_arg)
+
+split_file_path = os.path.join(config.llvm_tools_dir, "split-file")
+if os.path.isfile(split_file_path):
+    add_update_script_substition("%update_test_body")

diff  --git a/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic-asm.test.expected b/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic-asm.test.expected
new file mode 100644
index 0000000000000..05024d8799cde
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic-asm.test.expected
@@ -0,0 +1,13 @@
+# RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
+# RUN: 
diff  -u %S/Inputs/basic-asm.test.expected %t
+
+.ifdef GEN
+#--- a.txt
+.long 0
+#--- b.txt
+.long 1
+#--- gen
+cat a.txt b.txt
+.endif
+.long 0
+.long 1

diff  --git a/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic.test.expected b/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic.test.expected
new file mode 100644
index 0000000000000..80a2676d0a752
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic.test.expected
@@ -0,0 +1,16 @@
+; RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
+; RUN: 
diff  -u %S/Inputs/basic.test.expected %t
+
+;--- a.txt
+ at a = global i32 0
+;--- b.txt
+ at b = global i32 0
+;--- gen
+cat a.txt
+echo ';--- b.ll'
+cat b.txt
+
+;--- a.ll
+ at a = global i32 0
+;--- b.ll
+ at b = global i32 0

diff  --git a/llvm/test/tools/UpdateTestChecks/update_test_body/basic-asm.test b/llvm/test/tools/UpdateTestChecks/update_test_body/basic-asm.test
new file mode 100644
index 0000000000000..3e82a3ffab9ac
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/basic-asm.test
@@ -0,0 +1,11 @@
+# RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
+# RUN: 
diff  -u %S/Inputs/basic-asm.test.expected %t
+
+.ifdef GEN
+#--- a.txt
+.long 0
+#--- b.txt
+.long 1
+#--- gen
+cat a.txt b.txt
+.endif

diff  --git a/llvm/test/tools/UpdateTestChecks/update_test_body/basic.test b/llvm/test/tools/UpdateTestChecks/update_test_body/basic.test
new file mode 100644
index 0000000000000..d99946e2bd92c
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/basic.test
@@ -0,0 +1,13 @@
+; RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
+; RUN: 
diff  -u %S/Inputs/basic.test.expected %t
+
+;--- a.txt
+ at a = global i32 0
+;--- b.txt
+ at b = global i32 0
+;--- gen
+cat a.txt
+echo ';--- b.ll'
+cat b.txt
+
+;--- a.ll

diff  --git a/llvm/test/tools/UpdateTestChecks/update_test_body/empty-stdout.test b/llvm/test/tools/UpdateTestChecks/update_test_body/empty-stdout.test
new file mode 100644
index 0000000000000..9ea9c7bc7ac90
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/empty-stdout.test
@@ -0,0 +1,13 @@
+# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
+# RUN: 
diff  -u %t %s
+
+# CHECK: stdout is empty; forgot -o - ?
+
+.ifdef GEN
+#--- a.txt
+.long 0
+#--- b.txt
+.long 1
+#--- gen
+true
+.endif

diff  --git a/llvm/test/tools/UpdateTestChecks/update_test_body/gen-absent.test b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-absent.test
new file mode 100644
index 0000000000000..c12f22adceb2e
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-absent.test
@@ -0,0 +1,7 @@
+# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
+
+# CHECK: 'gen' does not exist
+
+.ifdef GEN
+#--- a.txt
+.endif

diff  --git a/llvm/test/tools/UpdateTestChecks/update_test_body/gen-fail.test b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-fail.test
new file mode 100644
index 0000000000000..7e1a9365df14b
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-fail.test
@@ -0,0 +1,11 @@
+# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
+
+# CHECK:      log
+# CHECK-NEXT: 'gen' failed
+
+.ifdef GEN
+#--- gen
+echo log >&2
+false  # gen fails due to sh -e
+true
+.endif

diff  --git a/llvm/test/tools/UpdateTestChecks/update_test_body/gen-unterminated.test b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-unterminated.test
new file mode 100644
index 0000000000000..c0026939e414d
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-unterminated.test
@@ -0,0 +1,8 @@
+# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
+
+# CHECK: 'gen' should be followed by another part (---) or .endif
+
+#--- a.txt
+.long 0
+#--- gen
+cat a.txt

diff  --git a/llvm/test/tools/UpdateTestChecks/update_test_body/lit.local.cfg b/llvm/test/tools/UpdateTestChecks/update_test_body/lit.local.cfg
new file mode 100644
index 0000000000000..1bb2464ad957c
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/lit.local.cfg
@@ -0,0 +1,4 @@
+import platform
+
+if platform.system() == "Windows":
+    config.unsupported = True

diff  --git a/llvm/test/tools/llvm-dwarfdump/X86/formclass4.s b/llvm/test/tools/llvm-dwarfdump/X86/formclass4.s
index d0f8857c638f8..5b3cdfc977902 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/formclass4.s
+++ b/llvm/test/tools/llvm-dwarfdump/X86/formclass4.s
@@ -1,15 +1,3 @@
-# Source:
-#   struct e {
-#     enum {} f[16384];
-#     short g;
-#   };
-#   e foo() {
-#     auto E = new e;
-#     return *E;
-#   }
-# Compile with:
-#   clang -O2 -gdwarf-4 -S a.cpp -o a4.s
-
 # RUN: llvm-mc %s -filetype obj -triple x86_64-apple-darwin -o %t.o
 # RUN: llvm-dwarfdump -debug-info -name g %t.o | FileCheck %s
 
@@ -17,6 +5,20 @@
 # CHECK: DW_AT_name ("g")
 # CHECK: DW_AT_data_member_location    (0x4000)
 
+.ifdef GEN
+#--- a.cpp
+struct e {
+  enum {} f[16384];
+  short g;
+};
+e foo() {
+  auto E = new e;
+  return *E;
+}
+#--- gen
+clang --target=x86_64-apple-macosx -O2 -gdwarf-4 -S a.cpp -o -
+.endif
+
 	.section	__TEXT,__text,regular,pure_instructions
 	.macosx_version_min 10, 14
 	.globl	__Z3foov                ## -- Begin function _Z3foov

diff  --git a/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_type_units_split_v5.s b/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_type_units_split_v5.s
index e8bb951750873..81d15cd2be224 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_type_units_split_v5.s
+++ b/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_type_units_split_v5.s
@@ -1,16 +1,16 @@
 # RUN: llvm-mc < %s -filetype obj -triple x86_64 -o - \
 # RUN:   | llvm-dwarfdump - | FileCheck %s
 
-# Generated from:
-#
-#   struct t1 { };
-#   t1 v1;
-#
-# $ clang++ -S -g -fdebug-types-section -gsplit-dwarf -o test.5.split.s -gdwarf-5 -g
-
 # CHECK: DW_TAG_variable
 # CHECK:   DW_AT_type ({{.*}} "t1")
 
+.ifdef GEN
+#--- test.cpp
+struct t1 { };
+t1 v1;
+#--- gen
+clang++ --target=x86_64-linux -S -g -fdebug-types-section -gsplit-dwarf -gdwarf-5 test.cpp -o -
+.endif
 	.text
 	.file	"test.cpp"
 	.section	.debug_types.dwo,"e", at progbits

diff  --git a/llvm/utils/update_test_body.py b/llvm/utils/update_test_body.py
new file mode 100755
index 0000000000000..661b0270d783b
--- /dev/null
+++ b/llvm/utils/update_test_body.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+"""Generate test body using split-file and a custom script.
+
+The script will prepare extra files with `split-file`, invoke `gen`, and then
+rewrite the part after `gen` with its stdout.
+
+https://llvm.org/docs/TestingGuide.html#elaborated-tests
+
+Example:
+PATH=/path/to/clang_build/bin:$PATH llvm/utils/update_test_body.py path/to/test.s
+"""
+import argparse
+import contextlib
+import os
+import re
+import subprocess
+import sys
+import tempfile
+
+
+ at contextlib.contextmanager
+def cd(directory):
+    cwd = os.getcwd()
+    os.chdir(directory)
+    try:
+        yield
+    finally:
+        os.chdir(cwd)
+
+
+def process(args, path):
+    prolog = []
+    seen_gen = False
+    with open(path) as f:
+        for line in f.readlines():
+            line = line.rstrip()
+            prolog.append(line)
+            if (seen_gen and re.match(r"(.|//)---", line)) or line.startswith(".endif"):
+                break
+            if re.match(r"(.|//)--- gen", line):
+                seen_gen = True
+        else:
+            print(
+                "'gen' should be followed by another part (---) or .endif",
+                file=sys.stderr,
+            )
+            return 1
+
+    if not seen_gen:
+        print("'gen' does not exist", file=sys.stderr)
+        return 1
+    with tempfile.TemporaryDirectory(prefix="update_test_body_") as dir:
+        try:
+            # If the last line starts with ".endif", remove it.
+            sub = subprocess.run(
+                ["split-file", "-", dir],
+                input="\n".join(
+                    prolog[:-1] if prolog[-1].startswith(".endif") else prolog
+                ).encode(),
+                capture_output=True,
+                check=True,
+            )
+        except subprocess.CalledProcessError as ex:
+            sys.stderr.write(ex.stderr.decode())
+            return 1
+        with cd(dir):
+            if args.shell:
+                print(f"invoke shell in the temporary directory '{dir}'")
+                subprocess.run([os.environ.get("SHELL", "sh")])
+                return 0
+
+            sub = subprocess.run(
+                ["sh", "-eu", "gen"],
+                capture_output=True,
+                # Don't encode the directory information to the Clang output.
+                # Remove unneeded details (.ident) as well.
+                env=dict(
+                    os.environ,
+                    CCC_OVERRIDE_OPTIONS="#^-fno-ident",
+                    PWD="/proc/self/cwd",
+                ),
+            )
+            sys.stderr.write(sub.stderr.decode())
+            if sub.returncode != 0:
+                print("'gen' failed", file=sys.stderr)
+                return sub.returncode
+            if not sub.stdout:
+                print("stdout is empty; forgot -o - ?", file=sys.stderr)
+                return 1
+            content = sub.stdout.decode()
+
+    with open(path, "w") as f:
+        # Print lines up to '.endif'.
+        print("\n".join(prolog), file=f)
+        # Then print the stdout of 'gen'.
+        f.write(content)
+
+
+parser = argparse.ArgumentParser(
+    description="Generate test body using split-file and a custom script"
+)
+parser.add_argument("files", nargs="+")
+parser.add_argument(
+    "--shell", action="store_true", help="invoke shell instead of 'gen'"
+)
+args = parser.parse_args()
+for path in args.files:
+    retcode = process(args, path)
+    if retcode != 0:
+        sys.exit(retcode)