[Lldb-commits] [lldb] r266624 - fix a race is the LLDB test suite results collection

Todd Fiala via lldb-commits lldb-commits at lists.llvm.org
Mon Apr 18 09:09:21 PDT 2016


Author: tfiala
Date: Mon Apr 18 11:09:21 2016
New Revision: 266624

URL: http://llvm.org/viewvc/llvm-project?rev=266624&view=rev
Log:
fix a race is the LLDB test suite results collection

The race boiled down to this:

If a test worker queue is able to run the test inferior and
clean up before the dosep.py listener socket is spun up, and
the worker queue is the last one (as would be the case when
there's only one test rerunning in the rerun queue), then
the test suite will exit the main loop before having a chance
to process any test events coming from the test inferior or
the worker queue job control.

I found this race to be far more likely on fast hardware.
Our Linux CI is one such example.  While it will show
up primarily during meta test events generated by
a worker thread when a test inferior times out or
exits with an exceptional exit (e.g. seg fault), it only
requires that the OS takes longer to hook up the
listener socket than it takes for the final test inferior
and worker thread to shut down.

See:
http://reviews.llvm.org/D19214

reviewed by:
Pavel Labath

Modified:
    lldb/trunk/packages/Python/lldbsuite/test/dosep.py
    lldb/trunk/packages/Python/lldbsuite/test/dotest_channels.py
    lldb/trunk/packages/Python/lldbsuite/test/issue_verification/TestRerunTimeout.py.park
    lldb/trunk/packages/Python/lldbsuite/test/result_formatter.py

Modified: lldb/trunk/packages/Python/lldbsuite/test/dosep.py
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/packages/Python/lldbsuite/test/dosep.py?rev=266624&r1=266623&r2=266624&view=diff
==============================================================================
--- lldb/trunk/packages/Python/lldbsuite/test/dosep.py (original)
+++ lldb/trunk/packages/Python/lldbsuite/test/dosep.py Mon Apr 18 11:09:21 2016
@@ -109,13 +109,17 @@ def setup_global_variables(
         global GET_WORKER_INDEX
         GET_WORKER_INDEX = get_worker_index_use_pid
 
-def report_test_failure(name, command, output):
+def report_test_failure(name, command, output, timeout):
     global output_lock
     with output_lock:
         if not (RESULTS_FORMATTER and RESULTS_FORMATTER.is_using_terminal()):
             print(file=sys.stderr)
             print(output, file=sys.stderr)
-            print("[%s FAILED]" % name, file=sys.stderr)
+            if timeout:
+                timeout_str = " (TIMEOUT)"
+            else:
+                timeout_str = ""
+            print("[%s FAILED]%s" % (name, timeout_str), file=sys.stderr)
             print("Command invoked: %s" % ' '.join(command), file=sys.stderr)
         update_progress(name)
 
@@ -211,7 +215,7 @@ class DoTestProcessDriver(process_contro
             # only stderr does.
             report_test_pass(self.file_name, output[1])
         else:
-            report_test_failure(self.file_name, command, output[1])
+            report_test_failure(self.file_name, command, output[1], was_timeout)
 
         # Save off the results for the caller.
         self.results = (

Modified: lldb/trunk/packages/Python/lldbsuite/test/dotest_channels.py
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/packages/Python/lldbsuite/test/dotest_channels.py?rev=266624&r1=266623&r2=266624&view=diff
==============================================================================
--- lldb/trunk/packages/Python/lldbsuite/test/dotest_channels.py (original)
+++ lldb/trunk/packages/Python/lldbsuite/test/dotest_channels.py Mon Apr 18 11:09:21 2016
@@ -55,6 +55,14 @@ class UnpicklingForwardingReaderChannel(
             # unpickled results.
             raise Exception("forwarding function must be set")
 
+        # Initiate all connections by sending an ack.  This allows
+        # the initiators of the socket to await this to ensure
+        # that this end is up and running (and therefore already
+        # into the async map).
+        ack_bytes = bytearray()
+        ack_bytes.append(chr(42))
+        file_object.send(ack_bytes)
+
     def deserialize_payload(self):
         """Unpickles the collected input buffer bytes and forwards."""
         if len(self.ibuffer) > 0:

Modified: lldb/trunk/packages/Python/lldbsuite/test/issue_verification/TestRerunTimeout.py.park
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/packages/Python/lldbsuite/test/issue_verification/TestRerunTimeout.py.park?rev=266624&r1=266623&r2=266624&view=diff
==============================================================================
--- lldb/trunk/packages/Python/lldbsuite/test/issue_verification/TestRerunTimeout.py.park (original)
+++ lldb/trunk/packages/Python/lldbsuite/test/issue_verification/TestRerunTimeout.py.park Mon Apr 18 11:09:21 2016
@@ -3,19 +3,21 @@ from __future__ import print_function
 
 import time
 
-import lldbsuite.test.lldbtest as lldbtest
+import lldbsuite.test.decorators as decorators
 import rerun_base
 
 
 class RerunTimeoutTestCase(rerun_base.RerunBaseTestCase):
-    @lldbtest.no_debug_info_test
+    @decorators.no_debug_info_test
     def test_timeout_rerun_succeeds(self):
-        """Tests that timeout logic kicks in and is picked up."""
+        """Tests that the timeout logic kicks in and that this timeout is picked up."""
         if not self.should_generate_issue():
             # We pass this time.
             return
+
         # We time out this time.
         while True:
+            # noinspection PyBroadException
             try:
                 time.sleep(1)
             except:

Modified: lldb/trunk/packages/Python/lldbsuite/test/result_formatter.py
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/packages/Python/lldbsuite/test/result_formatter.py?rev=266624&r1=266623&r2=266624&view=diff
==============================================================================
--- lldb/trunk/packages/Python/lldbsuite/test/result_formatter.py (original)
+++ lldb/trunk/packages/Python/lldbsuite/test/result_formatter.py Mon Apr 18 11:09:21 2016
@@ -76,6 +76,18 @@ def create_results_formatter(config):
 
         sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         sock.connect(("localhost", port))
+
+        # Wait for the ack from the listener side.
+        # This is needed to prevent a race condition
+        # in the main dosep.py processing loop: we
+        # can't allow a worker queue thread to die
+        # that has outstanding messages to a listener
+        # socket before the listener socket asyncore
+        # listener socket gets spun up; otherwise,
+        # we lose the test result info.
+        read_bytes = sock.recv(1)
+        # print("\n** socket creation: received ack: {}".format(ord(read_bytes[0])), file=sys.stderr)
+
         return (sock, lambda: socket_closer(sock))
 
     default_formatter_name = None




More information about the lldb-commits mailing list