[llvm] [utils] Add script to generate elaborated assembly tests (PR #89026)

Tue Apr 16 23:36:27 PDT 2024

https://github.com/MaskRay created https://github.com/llvm/llvm-project/pull/89026

Generally, assembly test files benefit from being cleaned to remove
unnecessary details. However, for tests requiring elaborate assembly
files where cleanup is less practical (e.g., large amount of debug
information output from Clang), the current practice is to include the
C/C++ source file and the generation instructions as acomments.

This is inconvenient when regeneration is needed. This patch adds
`llvm/utils/update_test_body.py` to allow easier regeneration.

The impending need is `ld.lld --debug-names` tests where many
Clang-generated assembly tests from Clang are needed.

The script is not called `*asm*` because it may be extended to support
other types of files.


>From 30967cf678e903cff2859a50c24bc5cea2d6f5e7 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Tue, 16 Apr 2024 23:36:17 -0700
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.5-bogner
---
 llvm/docs/TestingGuide.rst                    | 60 ++++++++++++
 .../test/tools/UpdateTestChecks/lit.local.cfg |  7 +-
 .../Inputs/basic.test.expected                | 13 +++
 .../update_test_body/basic.test               | 11 +++
 .../update_test_body/empty-stdout.test        | 13 +++
 .../update_test_body/gen-absent.test          |  7 ++
 .../update_test_body/gen-fail.test            |  8 ++
 .../update_test_body/lit.local.cfg            |  4 +
 .../tools/llvm-dwarfdump/X86/formclass4.s     | 26 +++---
 .../X86/prettyprint_type_units_split_v5.s     | 14 +--
 llvm/utils/update_test_body.py                | 92 +++++++++++++++++++
 11 files changed, 235 insertions(+), 20 deletions(-)
 create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic.test.expected
 create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_body/basic.test
 create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_body/empty-stdout.test
 create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_body/gen-absent.test
 create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_body/gen-fail.test
 create mode 100644 llvm/test/tools/UpdateTestChecks/update_test_body/lit.local.cfg
 create mode 100755 llvm/utils/update_test_body.py

diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst
index e32e4d1e535abb..ad8e5827a20b16 100644
--- a/llvm/docs/TestingGuide.rst
+++ b/llvm/docs/TestingGuide.rst
@@ -433,6 +433,66 @@ actually participate in the test besides holding the ``RUN:`` lines.
   putting the extra files in an ``Inputs/`` directory. This pattern is
   deprecated.
 
+Elaborated assembly tests
+-------------------------
+
+Generally, assembly test files benefit from being cleaned to remove unnecessary
+details. However, for tests requiring elaborate assembly files where cleanup is
+less practical (e.g., large amount of debug information output from Clang),
+you can include generation instructions within ``.ifdef GEN`` and ``.endif``
+directives. Then, run ``llvm/utils/update_test_body.py`` on
+the test file to generate the needed content.
+
+.. code-block:: none
+
+    # RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o a.o
+    # RUN: ... | FileCheck %s
+
+    # CHECK: hello
+
+    .ifdef GEN
+    #--- a.cc
+    int va;
+    #--- gen
+    clang --target=x86_64-linux -S -g a.cc -o -
+    .endif
+    # content generated by the script 'gen'
+
+.. code-block:: bash
+
+   PATH=/path/to/clang_build/bin:$PATH llvm/utils/update_test_body.py path/to/test.s
+
+The script will prepare extra files with ``split-file``, invoke ``gen``, and
+then rewrite the part after ``.endif`` with its stdout.
+
+.. note::
+
+  Consider specifying an explicit target triple to avoid differences when
+  regeneration is needed on another machine.
+
+  Check prefixes should be placed before ``.endif`` since the part after
+  ``.endif`` is replaced.
+
+If you want to generate extra files, you can print ``#---`` separators and
+utilize ``split-file`` in RUN lines.
+
+.. code-block:: none
+
+    # RUN: rm -rf %t && split-file %s %t && cd %t
+    ...
+
+    .ifdef GEN
+    #--- a.cc
+    int va;
+    #--- b.cc
+    int vb;
+    #--- gen
+    echo '#--- a.s'
+    clang -S -g a.cc -o -
+    echo '#--- b.s'
+    clang -S -g b.cc -o -
+    .endif
+
 Fragile tests
 -------------
 
diff --git a/llvm/test/tools/UpdateTestChecks/lit.local.cfg b/llvm/test/tools/UpdateTestChecks/lit.local.cfg
index f8ab6b82cde70d..2e695490b005e2 100644
--- a/llvm/test/tools/UpdateTestChecks/lit.local.cfg
+++ b/llvm/test/tools/UpdateTestChecks/lit.local.cfg
@@ -19,7 +19,8 @@ def add_update_script_substition(
     # Specify an explicit default version in UTC tests, so that the --version
     # embedded in UTC_ARGS does not change in all test expectations every time
     # the default is bumped.
-    extra_args += " --version=1"
+    if name != "%update_test_body":
+        extra_args += " --version=1"
     config.substitutions.append(
         (name, "'%s' %s %s" % (python_exe, script_path, extra_args))
     )
@@ -47,3 +48,7 @@ if os.path.isfile(llvm_mca_path):
     config.available_features.add("llvm-mca-binary")
     mca_arg = "--llvm-mca-binary " + shell_quote(llvm_mca_path)
     add_update_script_substition("%update_test_checks", extra_args=mca_arg)
+
+split_file_path = os.path.join(config.llvm_tools_dir, "split-file")
+if os.path.isfile(split_file_path):
+    add_update_script_substition("%update_test_body")
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic.test.expected b/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic.test.expected
new file mode 100644
index 00000000000000..f86cde34b65771
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic.test.expected
@@ -0,0 +1,13 @@
+# RUN: cp %s %t && %update_test_body %t
+# RUN: diff -u %S/Inputs/basic.test.expected %t
+
+.ifdef GEN
+#--- a.txt
+.long 0
+#--- b.txt
+.long 1
+#--- gen
+cat a.txt b.txt
+.endif
+.long 0
+.long 1
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/basic.test b/llvm/test/tools/UpdateTestChecks/update_test_body/basic.test
new file mode 100644
index 00000000000000..0a7ed4bf189dee
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/basic.test
@@ -0,0 +1,11 @@
+# RUN: cp %s %t && %update_test_body %t
+# RUN: diff -u %S/Inputs/basic.test.expected %t
+
+.ifdef GEN
+#--- a.txt
+.long 0
+#--- b.txt
+.long 1
+#--- gen
+cat a.txt b.txt
+.endif
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/empty-stdout.test b/llvm/test/tools/UpdateTestChecks/update_test_body/empty-stdout.test
new file mode 100644
index 00000000000000..86a5d780dcc920
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/empty-stdout.test
@@ -0,0 +1,13 @@
+# RUN: cp %s %t && %update_test_body %t 2>&1 | FileCheck %s
+# RUN: diff -u %t %s
+
+# CHECK: stdout is empty; forgot -o - ?
+
+.ifdef GEN
+#--- a.txt
+.long 0
+#--- b.txt
+.long 1
+#--- gen
+true
+.endif
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/gen-absent.test b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-absent.test
new file mode 100644
index 00000000000000..6c4aa75c7fa850
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-absent.test
@@ -0,0 +1,7 @@
+# RUN: cp %s %t && %update_test_body %t 2>&1 | FileCheck %s
+
+# CHECK: 'gen' does not exist
+
+.ifdef GEN
+#--- a.txt
+.endif
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/gen-fail.test b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-fail.test
new file mode 100644
index 00000000000000..e2fa46e0441ad8
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-fail.test
@@ -0,0 +1,8 @@
+# RUN: cp %s %t && %update_test_body %t 2>&1 | FileCheck %s
+
+# CHECK: 'gen' failed
+
+.ifdef GEN
+#--- gen
+false
+.endif
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/lit.local.cfg b/llvm/test/tools/UpdateTestChecks/update_test_body/lit.local.cfg
new file mode 100644
index 00000000000000..1bb2464ad957c0
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/lit.local.cfg
@@ -0,0 +1,4 @@
+import platform
+
+if platform.system() == "Windows":
+    config.unsupported = True
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/formclass4.s b/llvm/test/tools/llvm-dwarfdump/X86/formclass4.s
index d0f8857c638f87..9d6f7330069e03 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/formclass4.s
+++ b/llvm/test/tools/llvm-dwarfdump/X86/formclass4.s
@@ -1,15 +1,3 @@
-# Source:
-#   struct e {
-#     enum {} f[16384];
-#     short g;
-#   };
-#   e foo() {
-#     auto E = new e;
-#     return *E;
-#   }
-# Compile with:
-#   clang -O2 -gdwarf-4 -S a.cpp -o a4.s
-
 # RUN: llvm-mc %s -filetype obj -triple x86_64-apple-darwin -o %t.o
 # RUN: llvm-dwarfdump -debug-info -name g %t.o | FileCheck %s
 
@@ -17,6 +5,20 @@
 # CHECK: DW_AT_name ("g")
 # CHECK: DW_AT_data_member_location    (0x4000)
 
+.ifdef GEN
+#--- a.cpp
+struct e {
+  enum {} f[16384];
+  short g;
+};
+e foo() {
+  auto E = new e;
+  return *E;
+}
+#--- gen
+clang -O2 -gdwarf-4 -S a.cpp -o -
+.endif
+
 	.section	__TEXT,__text,regular,pure_instructions
 	.macosx_version_min 10, 14
 	.globl	__Z3foov                ## -- Begin function _Z3foov
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_type_units_split_v5.s b/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_type_units_split_v5.s
index e8bb9517508732..bf1f68b429c89c 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_type_units_split_v5.s
+++ b/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_type_units_split_v5.s
@@ -1,16 +1,16 @@
 # RUN: llvm-mc < %s -filetype obj -triple x86_64 -o - \
 # RUN:   | llvm-dwarfdump - | FileCheck %s
 
-# Generated from:
-#
-#   struct t1 { };
-#   t1 v1;
-#
-# $ clang++ -S -g -fdebug-types-section -gsplit-dwarf -o test.5.split.s -gdwarf-5 -g
-
 # CHECK: DW_TAG_variable
 # CHECK:   DW_AT_type ({{.*}} "t1")
 
+.ifdef GEN
+#--- test.cpp
+struct t1 { };
+t1 v1;
+#--- gen
+clang++ -S -g -fdebug-types-section -gsplit-dwarf -gdwarf-5 test.cpp -o -
+.endif
 	.text
 	.file	"test.cpp"
 	.section	.debug_types.dwo,"e", at progbits
diff --git a/llvm/utils/update_test_body.py b/llvm/utils/update_test_body.py
new file mode 100755
index 00000000000000..60eb6053c729e9
--- /dev/null
+++ b/llvm/utils/update_test_body.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+"""Generate test body using split-file and a custom script.
+Currently, only assembly files are supported by placing generation instructions
+surrounded by .ifdef GEN/.endif directives.
+
+    .ifdef GEN
+    #--- a.cc
+    int va;
+    #--- gen
+    clang --target=aarch64-linux -S -g a.cc -o -
+    .endif
+    # content generated by the script 'gen'
+
+The script will prepare extra files with `split-file`, invoke `gen`, and then
+rewrite the part after `.endif` with its stdout.
+
+Example:
+PATH=/path/to/clang_build/bin:$PATH llvm/utils/update_test_body.py path/to/test.s
+"""
+import contextlib, os, subprocess, sys, tempfile
+
+
+ at contextlib.contextmanager
+def cd(dir):
+    cwd = os.getcwd()
+    os.chdir(dir)
+    try:
+        yield
+    finally:
+        os.chdir(cwd)
+
+
+def process(path):
+    split_file_input = []
+    prolog = []
+    is_split_file_input = False
+    is_prolog = True
+    with open(path) as f:
+        for line in f.readlines():
+            line = line.rstrip()
+            if is_prolog:
+                prolog.append(line)
+            if line.startswith(".endif"):
+                is_split_file_input = is_prolog = False
+            if is_split_file_input:
+                split_file_input.append(line)
+            if line.startswith(".ifdef GEN"):
+                is_split_file_input = True
+
+    if not split_file_input:
+        print("no .ifdef GEN", file=sys.stderr)
+        return
+    if is_split_file_input:
+        print("no .endif", file=sys.stderr)
+        return
+    with tempfile.TemporaryDirectory() as dir:
+        sub = subprocess.run(
+            ["split-file", "-", dir],
+            input="\n".join(split_file_input).encode(),
+            capture_output=True,
+        )
+        if sub.returncode != 0:
+            sys.stderr.write(f"split-file failed\n{sub.stderr.decode()}")
+            return
+        with cd(dir):
+            if not os.path.exists("gen"):
+                print("'gen' does not exist", file=sys.stderr)
+                return
+
+            # Don't encode the directory information to the Clang output.
+            # Remove unneeded details (.ident) as well.
+            env = dict(
+                os.environ, CCC_OVERRIDE_OPTIONS="+-fno-ident", PWD="/proc/self/cwd"
+            )
+            sub = subprocess.run(["sh", "gen"], capture_output=True, env=env)
+            if sub.returncode != 0:
+                sys.stderr.write(f"'gen' failed\n{sub.stderr.decode()}")
+                return
+            if not sub.stdout:
+                print("stdout is empty; forgot -o - ?", file=sys.stderr)
+                return
+            content = sub.stdout.decode()
+
+    with open(path, "w") as f:
+        # Print lines up to '.endif'.
+        print("\n".join(prolog), file=f)
+        # Then print the stdout of 'gen'.
+        f.write(content)
+
+
+for path in sys.argv[1:]:
+    process(path)