[llvm] aacea0d - [utils] Add script to generate elaborated IR and assembly tests (#89026)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 8 23:58:59 PDT 2024
Author: Fangrui Song
Date: 2024-05-08T23:58:55-07:00
New Revision: aacea0d0f67401f5a0b74947f3ff179ade9cbf6d
URL: https://github.com/llvm/llvm-project/commit/aacea0d0f67401f5a0b74947f3ff179ade9cbf6d
DIFF: https://github.com/llvm/llvm-project/commit/aacea0d0f67401f5a0b74947f3ff179ade9cbf6d.diff
LOG: [utils] Add script to generate elaborated IR and assembly tests (#89026)
Generally, IR and assembly test files benefit from being cleaned to
remove unnecessary details. However, for tests requiring elaborate
IR or assembly files where cleanup is less practical (e.g., large amount
of debug information output from Clang), the current practice is to
include the C/C++ source file and the generation instructions as
comments.
This is inconvenient when regeneration is needed. This patch adds
`llvm/utils/update_test_body.py` to allow easier regeneration.
`ld.lld --debug-names` tests (#86508) utilize this script for
Clang-generated assembly tests.
Note: `-o pipefail` is standard (since
https://www.austingroupbugs.net/view.php?id=789) but not supported by
dash.
Link:
https://discourse.llvm.org/t/utility-to-generate-elaborated-assembly-ir-tests/78408
Added:
llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic-asm.test.expected
llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic.test.expected
llvm/test/tools/UpdateTestChecks/update_test_body/basic-asm.test
llvm/test/tools/UpdateTestChecks/update_test_body/basic.test
llvm/test/tools/UpdateTestChecks/update_test_body/empty-stdout.test
llvm/test/tools/UpdateTestChecks/update_test_body/gen-absent.test
llvm/test/tools/UpdateTestChecks/update_test_body/gen-fail.test
llvm/test/tools/UpdateTestChecks/update_test_body/gen-unterminated.test
llvm/test/tools/UpdateTestChecks/update_test_body/lit.local.cfg
llvm/utils/update_test_body.py
Modified:
llvm/docs/TestingGuide.rst
llvm/test/tools/UpdateTestChecks/lit.local.cfg
llvm/test/tools/llvm-dwarfdump/X86/formclass4.s
llvm/test/tools/llvm-dwarfdump/X86/prettyprint_type_units_split_v5.s
Removed:
################################################################################
diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst
index e32e4d1e535ab..e24feb3bf5fa2 100644
--- a/llvm/docs/TestingGuide.rst
+++ b/llvm/docs/TestingGuide.rst
@@ -433,6 +433,87 @@ actually participate in the test besides holding the ``RUN:`` lines.
putting the extra files in an ``Inputs/`` directory. This pattern is
deprecated.
+Elaborated tests
+----------------
+
+Generally, IR and assembly test files benefit from being cleaned to remove
+unnecessary details. However, for tests requiring elaborate IR or assembly
+files where cleanup is less practical (e.g., large amount of debug information
+output from Clang), you can include generation instructions within
+``split-file`` part called ``gen``. Then, run
+``llvm/utils/update_test_body.py`` on the test file to generate the needed
+content.
+
+.. code-block:: none
+
+ ; RUN: rm -rf %t && split-file %s %t && cd %t
+ ; RUN: opt -S a.ll ... | FileCheck %s
+
+ ; CHECK: hello
+
+ ;--- a.cc
+ int va;
+ ;--- gen
+ clang --target=x86_64-linux -S -emit-llvm -g a.cc -o -
+
+ ;--- a.ll
+ # content generated by the script 'gen'
+
+.. code-block:: bash
+
+ PATH=/path/to/clang_build/bin:$PATH llvm/utils/update_test_body.py path/to/test.ll
+
+The script will prepare extra files with ``split-file``, invoke ``gen``, and
+then rewrite the part after ``gen`` with its stdout.
+
+For convenience, if the test needs one single assembly file, you can also wrap
+``gen`` and its required files with ``.ifdef`` and ``.endif``. Then you can
+skip ``split-file`` in RUN lines.
+
+.. code-block:: none
+
+ # RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o a.o
+ # RUN: ... | FileCheck %s
+
+ # CHECK: hello
+
+ .ifdef GEN
+ #--- a.cc
+ int va;
+ #--- gen
+ clang --target=x86_64-linux -S -g a.cc -o -
+ .endif
+ # content generated by the script 'gen'
+
+.. note::
+
+ Consider specifying an explicit target triple to avoid
diff erences when
+ regeneration is needed on another machine.
+
+ ``gen`` is invoked with ``PWD`` set to ``/proc/self/cwd``. Clang commands
+ don't need ``-fdebug-compilation-dir=`` since its default value is ``PWD``.
+
+ Check prefixes should be placed before ``.endif`` since the part after
+ ``.endif`` is replaced.
+
+If the test body contains multiple files, you can print ``---`` separators and
+utilize ``split-file`` in ``RUN`` lines.
+
+.. code-block:: none
+
+ # RUN: rm -rf %t && split-file %s %t && cd %t
+ ...
+
+ #--- a.cc
+ int va;
+ #--- b.cc
+ int vb;
+ #--- gen
+ clang --target=x86_64-linux -S -O1 -g a.cc -o -
+ echo '#--- b.s'
+ clang --target=x86_64-linux -S -O1 -g b.cc -o -
+ #--- a.s
+
Fragile tests
-------------
diff --git a/llvm/test/tools/UpdateTestChecks/lit.local.cfg b/llvm/test/tools/UpdateTestChecks/lit.local.cfg
index f8ab6b82cde70..2e695490b005e 100644
--- a/llvm/test/tools/UpdateTestChecks/lit.local.cfg
+++ b/llvm/test/tools/UpdateTestChecks/lit.local.cfg
@@ -19,7 +19,8 @@ def add_update_script_substition(
# Specify an explicit default version in UTC tests, so that the --version
# embedded in UTC_ARGS does not change in all test expectations every time
# the default is bumped.
- extra_args += " --version=1"
+ if name != "%update_test_body":
+ extra_args += " --version=1"
config.substitutions.append(
(name, "'%s' %s %s" % (python_exe, script_path, extra_args))
)
@@ -47,3 +48,7 @@ if os.path.isfile(llvm_mca_path):
config.available_features.add("llvm-mca-binary")
mca_arg = "--llvm-mca-binary " + shell_quote(llvm_mca_path)
add_update_script_substition("%update_test_checks", extra_args=mca_arg)
+
+split_file_path = os.path.join(config.llvm_tools_dir, "split-file")
+if os.path.isfile(split_file_path):
+ add_update_script_substition("%update_test_body")
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic-asm.test.expected b/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic-asm.test.expected
new file mode 100644
index 0000000000000..05024d8799cde
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic-asm.test.expected
@@ -0,0 +1,13 @@
+# RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
+# RUN:
diff -u %S/Inputs/basic-asm.test.expected %t
+
+.ifdef GEN
+#--- a.txt
+.long 0
+#--- b.txt
+.long 1
+#--- gen
+cat a.txt b.txt
+.endif
+.long 0
+.long 1
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic.test.expected b/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic.test.expected
new file mode 100644
index 0000000000000..80a2676d0a752
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/Inputs/basic.test.expected
@@ -0,0 +1,16 @@
+; RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
+; RUN:
diff -u %S/Inputs/basic.test.expected %t
+
+;--- a.txt
+ at a = global i32 0
+;--- b.txt
+ at b = global i32 0
+;--- gen
+cat a.txt
+echo ';--- b.ll'
+cat b.txt
+
+;--- a.ll
+ at a = global i32 0
+;--- b.ll
+ at b = global i32 0
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/basic-asm.test b/llvm/test/tools/UpdateTestChecks/update_test_body/basic-asm.test
new file mode 100644
index 0000000000000..3e82a3ffab9ac
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/basic-asm.test
@@ -0,0 +1,11 @@
+# RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
+# RUN:
diff -u %S/Inputs/basic-asm.test.expected %t
+
+.ifdef GEN
+#--- a.txt
+.long 0
+#--- b.txt
+.long 1
+#--- gen
+cat a.txt b.txt
+.endif
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/basic.test b/llvm/test/tools/UpdateTestChecks/update_test_body/basic.test
new file mode 100644
index 0000000000000..d99946e2bd92c
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/basic.test
@@ -0,0 +1,13 @@
+; RUN: cp %s %t && %update_test_body %t 2>&1 | count 0
+; RUN:
diff -u %S/Inputs/basic.test.expected %t
+
+;--- a.txt
+ at a = global i32 0
+;--- b.txt
+ at b = global i32 0
+;--- gen
+cat a.txt
+echo ';--- b.ll'
+cat b.txt
+
+;--- a.ll
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/empty-stdout.test b/llvm/test/tools/UpdateTestChecks/update_test_body/empty-stdout.test
new file mode 100644
index 0000000000000..9ea9c7bc7ac90
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/empty-stdout.test
@@ -0,0 +1,13 @@
+# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
+# RUN:
diff -u %t %s
+
+# CHECK: stdout is empty; forgot -o - ?
+
+.ifdef GEN
+#--- a.txt
+.long 0
+#--- b.txt
+.long 1
+#--- gen
+true
+.endif
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/gen-absent.test b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-absent.test
new file mode 100644
index 0000000000000..c12f22adceb2e
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-absent.test
@@ -0,0 +1,7 @@
+# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
+
+# CHECK: 'gen' does not exist
+
+.ifdef GEN
+#--- a.txt
+.endif
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/gen-fail.test b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-fail.test
new file mode 100644
index 0000000000000..7e1a9365df14b
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-fail.test
@@ -0,0 +1,11 @@
+# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
+
+# CHECK: log
+# CHECK-NEXT: 'gen' failed
+
+.ifdef GEN
+#--- gen
+echo log >&2
+false # gen fails due to sh -e
+true
+.endif
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/gen-unterminated.test b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-unterminated.test
new file mode 100644
index 0000000000000..c0026939e414d
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/gen-unterminated.test
@@ -0,0 +1,8 @@
+# RUN: cp %s %t && not %update_test_body %t 2>&1 | FileCheck %s
+
+# CHECK: 'gen' should be followed by another part (---) or .endif
+
+#--- a.txt
+.long 0
+#--- gen
+cat a.txt
diff --git a/llvm/test/tools/UpdateTestChecks/update_test_body/lit.local.cfg b/llvm/test/tools/UpdateTestChecks/update_test_body/lit.local.cfg
new file mode 100644
index 0000000000000..1bb2464ad957c
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_test_body/lit.local.cfg
@@ -0,0 +1,4 @@
+import platform
+
+if platform.system() == "Windows":
+ config.unsupported = True
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/formclass4.s b/llvm/test/tools/llvm-dwarfdump/X86/formclass4.s
index d0f8857c638f8..5b3cdfc977902 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/formclass4.s
+++ b/llvm/test/tools/llvm-dwarfdump/X86/formclass4.s
@@ -1,15 +1,3 @@
-# Source:
-# struct e {
-# enum {} f[16384];
-# short g;
-# };
-# e foo() {
-# auto E = new e;
-# return *E;
-# }
-# Compile with:
-# clang -O2 -gdwarf-4 -S a.cpp -o a4.s
-
# RUN: llvm-mc %s -filetype obj -triple x86_64-apple-darwin -o %t.o
# RUN: llvm-dwarfdump -debug-info -name g %t.o | FileCheck %s
@@ -17,6 +5,20 @@
# CHECK: DW_AT_name ("g")
# CHECK: DW_AT_data_member_location (0x4000)
+.ifdef GEN
+#--- a.cpp
+struct e {
+ enum {} f[16384];
+ short g;
+};
+e foo() {
+ auto E = new e;
+ return *E;
+}
+#--- gen
+clang --target=x86_64-apple-macosx -O2 -gdwarf-4 -S a.cpp -o -
+.endif
+
.section __TEXT,__text,regular,pure_instructions
.macosx_version_min 10, 14
.globl __Z3foov ## -- Begin function _Z3foov
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_type_units_split_v5.s b/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_type_units_split_v5.s
index e8bb951750873..81d15cd2be224 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_type_units_split_v5.s
+++ b/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_type_units_split_v5.s
@@ -1,16 +1,16 @@
# RUN: llvm-mc < %s -filetype obj -triple x86_64 -o - \
# RUN: | llvm-dwarfdump - | FileCheck %s
-# Generated from:
-#
-# struct t1 { };
-# t1 v1;
-#
-# $ clang++ -S -g -fdebug-types-section -gsplit-dwarf -o test.5.split.s -gdwarf-5 -g
-
# CHECK: DW_TAG_variable
# CHECK: DW_AT_type ({{.*}} "t1")
+.ifdef GEN
+#--- test.cpp
+struct t1 { };
+t1 v1;
+#--- gen
+clang++ --target=x86_64-linux -S -g -fdebug-types-section -gsplit-dwarf -gdwarf-5 test.cpp -o -
+.endif
.text
.file "test.cpp"
.section .debug_types.dwo,"e", at progbits
diff --git a/llvm/utils/update_test_body.py b/llvm/utils/update_test_body.py
new file mode 100755
index 0000000000000..661b0270d783b
--- /dev/null
+++ b/llvm/utils/update_test_body.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+"""Generate test body using split-file and a custom script.
+
+The script will prepare extra files with `split-file`, invoke `gen`, and then
+rewrite the part after `gen` with its stdout.
+
+https://llvm.org/docs/TestingGuide.html#elaborated-tests
+
+Example:
+PATH=/path/to/clang_build/bin:$PATH llvm/utils/update_test_body.py path/to/test.s
+"""
+import argparse
+import contextlib
+import os
+import re
+import subprocess
+import sys
+import tempfile
+
+
+ at contextlib.contextmanager
+def cd(directory):
+ cwd = os.getcwd()
+ os.chdir(directory)
+ try:
+ yield
+ finally:
+ os.chdir(cwd)
+
+
+def process(args, path):
+ prolog = []
+ seen_gen = False
+ with open(path) as f:
+ for line in f.readlines():
+ line = line.rstrip()
+ prolog.append(line)
+ if (seen_gen and re.match(r"(.|//)---", line)) or line.startswith(".endif"):
+ break
+ if re.match(r"(.|//)--- gen", line):
+ seen_gen = True
+ else:
+ print(
+ "'gen' should be followed by another part (---) or .endif",
+ file=sys.stderr,
+ )
+ return 1
+
+ if not seen_gen:
+ print("'gen' does not exist", file=sys.stderr)
+ return 1
+ with tempfile.TemporaryDirectory(prefix="update_test_body_") as dir:
+ try:
+ # If the last line starts with ".endif", remove it.
+ sub = subprocess.run(
+ ["split-file", "-", dir],
+ input="\n".join(
+ prolog[:-1] if prolog[-1].startswith(".endif") else prolog
+ ).encode(),
+ capture_output=True,
+ check=True,
+ )
+ except subprocess.CalledProcessError as ex:
+ sys.stderr.write(ex.stderr.decode())
+ return 1
+ with cd(dir):
+ if args.shell:
+ print(f"invoke shell in the temporary directory '{dir}'")
+ subprocess.run([os.environ.get("SHELL", "sh")])
+ return 0
+
+ sub = subprocess.run(
+ ["sh", "-eu", "gen"],
+ capture_output=True,
+ # Don't encode the directory information to the Clang output.
+ # Remove unneeded details (.ident) as well.
+ env=dict(
+ os.environ,
+ CCC_OVERRIDE_OPTIONS="#^-fno-ident",
+ PWD="/proc/self/cwd",
+ ),
+ )
+ sys.stderr.write(sub.stderr.decode())
+ if sub.returncode != 0:
+ print("'gen' failed", file=sys.stderr)
+ return sub.returncode
+ if not sub.stdout:
+ print("stdout is empty; forgot -o - ?", file=sys.stderr)
+ return 1
+ content = sub.stdout.decode()
+
+ with open(path, "w") as f:
+ # Print lines up to '.endif'.
+ print("\n".join(prolog), file=f)
+ # Then print the stdout of 'gen'.
+ f.write(content)
+
+
+parser = argparse.ArgumentParser(
+ description="Generate test body using split-file and a custom script"
+)
+parser.add_argument("files", nargs="+")
+parser.add_argument(
+ "--shell", action="store_true", help="invoke shell instead of 'gen'"
+)
+args = parser.parse_args()
+for path in args.files:
+ retcode = process(args, path)
+ if retcode != 0:
+ sys.exit(retcode)
More information about the llvm-commits
mailing list