[libc-commits] [libc] [libc][test] Add timeout with diagnostics to LibcTest (PR #202307)
Jeff Bailey via libc-commits
libc-commits at lists.llvm.org
Mon Jun 8 03:07:19 PDT 2026
https://github.com/kaladron created https://github.com/llvm/llvm-project/pull/202307
Implemented a 30-second timeout mechanism in the custom LibcTest runner. If a test hangs, it gathers diagnostics (wchan, stack, backtrace, strace), terminates the process, and reports the findings.
The threshold is configurable via the 'libc_diag_threshold' parameter.
Assisted-by: Automated tooling, human reviewed.
>From 6bc934ed56fc79e61b699ab65557d70b1374b5dc Mon Sep 17 00:00:00 2001
From: Jeff Bailey <jbailey at raspberryginger.com>
Date: Mon, 8 Jun 2026 11:03:46 +0100
Subject: [PATCH] [libc][test] Add timeout with diagnostics to LibcTest
Implemented a 30-second timeout mechanism in the custom LibcTest runner.
If a test hangs, it gathers diagnostics (wchan, stack, backtrace,
strace), terminates the process, and reports the findings.
The threshold is configurable via the 'libc_diag_threshold' parameter.
Assisted-by: Automated tooling, human reviewed.
---
libc/utils/libctest/format.py | 135 ++++++++++++++++++++++++++++++++--
1 file changed, 127 insertions(+), 8 deletions(-)
diff --git a/libc/utils/libctest/format.py b/libc/utils/libctest/format.py
index ab82bf36b8eab..c2e2a496b3465 100644
--- a/libc/utils/libctest/format.py
+++ b/libc/utils/libctest/format.py
@@ -24,6 +24,7 @@
import os
import shlex
+import subprocess
import sys
import lit.formats
@@ -119,6 +120,61 @@ def _getParamsPath(self, test_path):
return None
+ def _run_diagnostics(self, pid):
+ import subprocess
+ diag_out = []
+ diag_out.append(f"--- Process {pid} diagnostics ---")
+
+ # wchan
+ diag_out.append("--- wchan ---")
+ try:
+ with open(f"/proc/{pid}/wchan", "r") as f:
+ diag_out.append(f.read().strip())
+ except Exception as e:
+ diag_out.append(f"Failed to read wchan: {e}")
+
+ # stack
+ diag_out.append("--- stack ---")
+ try:
+ with open(f"/proc/{pid}/stack", "r") as f:
+ diag_out.append(f.read())
+ except Exception as e:
+ diag_out.append(f"Failed to read stack: {e}")
+
+ # lldb
+ diag_out.append("--- LLDB Backtrace ---")
+ lldb_cmd = ["lldb", "-p", str(pid), "--batch", "-o", "thread backtrace all", "-o", "quit"]
+ try:
+ out = subprocess.check_output(lldb_cmd, stderr=subprocess.STDOUT, text=True, timeout=10)
+ diag_out.append(out)
+ except Exception as e:
+ diag_out.append(f"Failed to run lldb: {e}")
+
+ # gdb fallback
+ diag_out.append("--- GDB Backtrace ---")
+ gdb_cmd = ["gdb", "-p", str(pid), "--batch", "-ex", "thread apply all bt", "-ex", "quit"]
+ try:
+ out = subprocess.check_output(gdb_cmd, stderr=subprocess.STDOUT, text=True, timeout=10)
+ diag_out.append(out)
+ except Exception as e2:
+ diag_out.append(f"Failed to run gdb: {e2}")
+
+ # strace
+ diag_out.append("--- Strace (2 seconds) ---")
+ strace_cmd = ["timeout", "2", "strace", "-p", str(pid)]
+ try:
+ out = subprocess.check_output(strace_cmd, stderr=subprocess.STDOUT, text=True)
+ diag_out.append(out)
+ except subprocess.CalledProcessError as e:
+ if e.returncode == 124:
+ diag_out.append(e.output)
+ else:
+ diag_out.append(f"Strace failed with exit code {e.returncode}: {e.output}")
+ except Exception as e:
+ diag_out.append(f"Failed to run strace: {e}")
+
+ return "\n".join(diag_out)
+
def execute(self, test, litConfig):
"""
Execute a test by running the test executable.
@@ -169,6 +225,16 @@ def execute(self, test, litConfig):
env.update(extra_env)
timeout = test.config.maxIndividualTestTime
+ diag_threshold_str = litConfig.params.get("libc_diag_threshold", "30.0")
+ try:
+ diag_threshold = float(diag_threshold_str)
+ except ValueError:
+ diag_threshold = 30.0
+
+ # Determine watch timeout
+ watch_timeout = diag_threshold
+ if timeout and timeout < diag_threshold:
+ watch_timeout = timeout
test_cmd_template = getattr(test.config, "libc_test_cmd", "")
if test_cmd_template:
@@ -195,15 +261,68 @@ def execute(self, test, litConfig):
else:
cmd_args = [test_path] + test_args
+ hit_timeout = False
+ hit_diag = False
+ out = b""
+ err = b""
+
+ p = subprocess.Popen(
+ cmd_args,
+ cwd=exec_dir,
+ env=env,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ )
+
try:
- out, err, exit_code = lit.util.executeCommand(
- cmd_args, cwd=exec_dir, env=env, timeout=timeout
- )
- except lit.util.ExecuteCommandTimeoutException as e:
- return (
- lit.Test.TIMEOUT,
- f"{e.out}\n--\n" f"Reached timeout of {timeout} seconds",
- )
+ stdout_data, stderr_data = p.communicate(timeout=watch_timeout)
+ out += stdout_data if stdout_data else b""
+ err += stderr_data if stderr_data else b""
+ exit_code = p.wait()
+ except subprocess.TimeoutExpired as e:
+ out += e.stdout if e.stdout else b""
+ err += e.stderr if e.stderr else b""
+
+ if watch_timeout == diag_threshold:
+ hit_diag = True
+ diag_info = self._run_diagnostics(p.pid)
+
+ lit.util.killProcessAndChildren(p.pid)
+ try:
+ stdout_data, stderr_data = p.communicate(timeout=5)
+ out += stdout_data if stdout_data else b""
+ err += stderr_data if stderr_data else b""
+ except subprocess.TimeoutExpired as e2:
+ p.kill()
+ out += e2.stdout if e2.stdout else b""
+ err += e2.stderr if e2.stderr else b""
+ p.communicate()
+
+ exit_code = p.wait()
+ hit_timeout = True
+ else:
+ lit.util.killProcessAndChildren(p.pid)
+ try:
+ stdout_data, stderr_data = p.communicate(timeout=5)
+ out += stdout_data if stdout_data else b""
+ err += stderr_data if stderr_data else b""
+ except subprocess.TimeoutExpired as e2:
+ p.kill()
+ out += e2.stdout if e2.stdout else b""
+ err += e2.stderr if e2.stderr else b""
+ p.communicate()
+ exit_code = p.wait()
+ hit_timeout = True
+
+ out = out.decode("utf-8", errors="replace")
+ err = err.decode("utf-8", errors="replace")
+
+ if hit_timeout:
+ report = f"Reached timeout of {watch_timeout} seconds.\n"
+ if hit_diag:
+ report += f"\nDiagnostics gathered:\n{diag_info}\n"
+ return lit.Test.TIMEOUT, report + out + err
if not exit_code:
return lit.Test.PASS, ""
More information about the libc-commits
mailing list