[llvm] 009b9f4 - [lit] Fix lit hang on pool join when exception is thrown (#131881)

via llvm-commits llvm-commits at lists.llvm.org
Tue May 6 10:05:06 PDT 2025


Author: David Garcia Orozco
Date: 2025-05-06T17:05:03Z
New Revision: 009b9f4fb8f029f98767b5cb2c1f939119953503

URL: https://github.com/llvm/llvm-project/commit/009b9f4fb8f029f98767b5cb2c1f939119953503
DIFF: https://github.com/llvm/llvm-project/commit/009b9f4fb8f029f98767b5cb2c1f939119953503.diff

LOG: [lit] Fix lit hang on pool join when exception is thrown (#131881)

Fixes #133914

When using the internal shell with a timeout set lit will hang on the
following call if an exception is thrown and not immediately caught
https://github.com/llvm/llvm-project/blob/19970535f92c0f2dcda01b7fc60f95945166e424/llvm/utils/lit/lit/run.py#L93

This can occur when using the internal lit shell and trying to run a
program that does not exist. In this case `_executeShCmd` will throw an
internal shell error, which will not be caught by the function directly
calling it, `executeShCmd`, rather it is caught one function higher in
the call stack in `executeScriptInternal`. Because that exception is
percolated up the call stack instead of being immediately caught lit
will hang until the test timeout expires. This patch changes the
location where we catch this exception to `executeShCmd` instead to
avoid this.

For more background on what causes this hang see:

https://stackoverflow.com/questions/15314189/python-multiprocessing-pool-hangs-at-join

Added: 
    llvm/utils/lit/tests/Inputs/timeout-hang/lit.cfg
    llvm/utils/lit/tests/Inputs/timeout-hang/run-nonexistent.txt
    llvm/utils/lit/tests/timeout-hang.py

Modified: 
    llvm/utils/lit/lit/TestRunner.py

Removed: 
    


################################################################################
diff  --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py
index dab83cbc20624..16e9c7fbf45c5 100644
--- a/llvm/utils/lit/lit/TestRunner.py
+++ b/llvm/utils/lit/lit/TestRunner.py
@@ -201,7 +201,14 @@ def executeShCmd(cmd, shenv, results, timeout=0):
     timeoutHelper = TimeoutHelper(timeout)
     if timeout > 0:
         timeoutHelper.startTimer()
-    finalExitCode = _executeShCmd(cmd, shenv, results, timeoutHelper)
+    try:
+        finalExitCode = _executeShCmd(cmd, shenv, results, timeoutHelper)
+    except InternalShellError:
+        e = sys.exc_info()[1]
+        finalExitCode = 127
+        results.append(
+            ShellCommandResult(e.command, "", e.message, finalExitCode, False)
+        )
     timeoutHelper.cancel()
     timeoutInfo = None
     if timeoutHelper.timeoutReached():
@@ -1105,15 +1112,10 @@ def executeScriptInternal(
 
     results = []
     timeoutInfo = None
-    try:
-        shenv = ShellEnvironment(cwd, test.config.environment)
-        exitCode, timeoutInfo = executeShCmd(
-            cmd, shenv, results, timeout=litConfig.maxIndividualTestTime
-        )
-    except InternalShellError:
-        e = sys.exc_info()[1]
-        exitCode = 127
-        results.append(ShellCommandResult(e.command, "", e.message, exitCode, False))
+    shenv = ShellEnvironment(cwd, test.config.environment)
+    exitCode, timeoutInfo = executeShCmd(
+        cmd, shenv, results, timeout=litConfig.maxIndividualTestTime
+    )
 
     out = err = ""
     for i, result in enumerate(results):

diff  --git a/llvm/utils/lit/tests/Inputs/timeout-hang/lit.cfg b/llvm/utils/lit/tests/Inputs/timeout-hang/lit.cfg
new file mode 100644
index 0000000000000..1019d94898b6d
--- /dev/null
+++ b/llvm/utils/lit/tests/Inputs/timeout-hang/lit.cfg
@@ -0,0 +1,7 @@
+import lit.formats
+
+config.name = "timeout-hang"
+config.suffixes = [".txt"]
+config.test_format = lit.formats.ShTest()
+config.test_source_root = None
+config.test_exec_root = None

diff  --git a/llvm/utils/lit/tests/Inputs/timeout-hang/run-nonexistent.txt b/llvm/utils/lit/tests/Inputs/timeout-hang/run-nonexistent.txt
new file mode 100644
index 0000000000000..fd7ed210baee0
--- /dev/null
+++ b/llvm/utils/lit/tests/Inputs/timeout-hang/run-nonexistent.txt
@@ -0,0 +1 @@
+RUN: nonexistent

diff  --git a/llvm/utils/lit/tests/timeout-hang.py b/llvm/utils/lit/tests/timeout-hang.py
new file mode 100644
index 0000000000000..486f07983708f
--- /dev/null
+++ b/llvm/utils/lit/tests/timeout-hang.py
@@ -0,0 +1,27 @@
+# REQUIRES: lit-max-individual-test-time
+
+# Python has some issues dealing with exceptions when multiprocessing,
+# which can cause hangs. Previously this could occur when we encountered
+# an internal shell exception, and had a timeout set.
+
+# This test runs a lit test that tries to launch a non-existent file,
+# throwing an exception. We expect this to fail immediately, rather than
+# timeout.
+
+# DEFINE: %{timeout}=1
+
+# RUN: not %{lit} %{inputs}/timeout-hang/run-nonexistent.txt \
+# RUN: --timeout=%{timeout} --param external=0 | %{python} %s %{timeout}
+
+import sys
+import re
+
+timeout_time = float(sys.argv[1])
+testing_time = float(re.search(r"Testing Time: (.*)s", sys.stdin.read()).group(1))
+
+if testing_time < timeout_time:
+    print("Testing took less than timeout")
+    sys.exit(0)
+else:
+    print("Testing took as long or longer than timeout")
+    sys.exit(1)


        


More information about the llvm-commits mailing list