[Lldb-commits] [lldb] r254430 - Revert "Fix race during process interruption"

Pavel Labath via lldb-commits lldb-commits at lists.llvm.org
Tue Dec 1 09:59:56 PST 2015


Author: labath
Date: Tue Dec  1 11:59:56 2015
New Revision: 254430

URL: http://llvm.org/viewvc/llvm-project?rev=254430&view=rev
Log:
Revert "Fix race during process interruption"

The android buildbot gets quite flaky after this change. I'm reverting it while I investigate.

Modified:
    lldb/trunk/include/lldb/Target/Process.h
    lldb/trunk/packages/Python/lldbsuite/test/expression_command/timeout/TestCallWithTimeout.py
    lldb/trunk/packages/Python/lldbsuite/test/functionalities/attach_resume/TestAttachResume.py
    lldb/trunk/source/Target/Process.cpp

Modified: lldb/trunk/include/lldb/Target/Process.h
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/include/lldb/Target/Process.h?rev=254430&r1=254429&r2=254430&view=diff
==============================================================================
--- lldb/trunk/include/lldb/Target/Process.h (original)
+++ lldb/trunk/include/lldb/Target/Process.h Tue Dec  1 11:59:56 2015
@@ -1355,7 +1355,6 @@ public:
 
     Error
     ResumeSynchronous (Stream *stream);
-
     //------------------------------------------------------------------
     /// Halts a running process.
     ///
@@ -1368,15 +1367,12 @@ public:
     /// @param[in] clear_thread_plans
     ///     If true, when the process stops, clear all thread plans.
     ///
-    /// @param[in] use_run_lock
-    ///     Whether to release the run lock after the stop.
-    ///
     /// @return
     ///     Returns an error object.  If the error is empty, the process is halted.
     ///     otherwise the halt has failed.
     //------------------------------------------------------------------
     Error
-    Halt (bool clear_thread_plans = false, bool use_run_lock = true);
+    Halt (bool clear_thread_plans = false);
 
     //------------------------------------------------------------------
     /// Detaches from a running or stopped process.
@@ -1687,8 +1683,9 @@ public:
     /// DoHalt must produce one and only one stop StateChanged event if it actually
     /// stops the process.  If the stop happens through some natural event (for
     /// instance a SIGSTOP), then forwarding that event will do.  Otherwise, you must 
-    /// generate the event manually. This function is called from the context of the
-    /// private state thread.
+    /// generate the event manually.  Note also, the private event thread is stopped when 
+    /// DoHalt is run to prevent the events generated while halting to trigger
+    /// other state changes before the halt is complete.
     ///
     /// @param[out] caused_stop
     ///     If true, then this Halt caused the stop, otherwise, the 
@@ -2864,16 +2861,12 @@ public:
     // Returns the process state when it is stopped. If specified, event_sp_ptr
     // is set to the event which triggered the stop. If wait_always = false,
     // and the process is already stopped, this function returns immediately.
-    // If the process is hijacked and use_run_lock is true (the default), then this
-    // function releases the run lock after the stop. Setting use_run_lock to false
-    // will avoid this behavior.
     lldb::StateType
     WaitForProcessToStop(const TimeValue *timeout,
                          lldb::EventSP *event_sp_ptr = nullptr,
                          bool wait_always = true,
                          Listener *hijack_listener = nullptr,
-                         Stream *stream = nullptr,
-                         bool use_run_lock = true);
+                         Stream *stream = nullptr);
 
     uint32_t
     GetIOHandlerID () const
@@ -3288,6 +3281,12 @@ protected:
         std::string m_exit_string;
     };
 
+    bool 
+    HijackPrivateProcessEvents (Listener *listener);
+    
+    void 
+    RestorePrivateProcessEvents ();
+    
     bool
     PrivateStateThreadIsValid () const
     {
@@ -3373,6 +3372,7 @@ protected:
     std::vector<PreResumeCallbackAndBaton> m_pre_resume_actions;
     ProcessRunLock              m_public_run_lock;
     ProcessRunLock              m_private_run_lock;
+    Predicate<bool>             m_currently_handling_event; // This predicate is set in HandlePrivateEvent while all its business is being done.
     ArchSpec::StopInfoOverrideCallbackType m_stop_info_override_callback;
     bool                        m_currently_handling_do_on_removals;
     bool                        m_resume_requested;         // If m_currently_handling_event or m_currently_handling_do_on_removals are true, Resume will only request a resume, using this flag to check.
@@ -3436,9 +3436,6 @@ protected:
     void
     HandlePrivateEvent (lldb::EventSP &event_sp);
 
-    Error
-    HaltPrivate();
-
     lldb::StateType
     WaitForProcessStopPrivate (const TimeValue *timeout, lldb::EventSP &event_sp);
 

Modified: lldb/trunk/packages/Python/lldbsuite/test/expression_command/timeout/TestCallWithTimeout.py
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/packages/Python/lldbsuite/test/expression_command/timeout/TestCallWithTimeout.py?rev=254430&r1=254429&r2=254430&view=diff
==============================================================================
--- lldb/trunk/packages/Python/lldbsuite/test/expression_command/timeout/TestCallWithTimeout.py (original)
+++ lldb/trunk/packages/Python/lldbsuite/test/expression_command/timeout/TestCallWithTimeout.py Tue Dec  1 11:59:56 2015
@@ -57,7 +57,7 @@ class ExprCommandWithTimeoutsTestCase(Te
 
         frame = thread.GetFrameAtIndex(0)
         
-        value = frame.EvaluateExpression ("wait_a_while (200000)", options)
+        value = frame.EvaluateExpression ("wait_a_while (50000)", options)
         self.assertTrue (value.IsValid())
         self.assertFalse (value.GetError().Success())
 
@@ -65,7 +65,7 @@ class ExprCommandWithTimeoutsTestCase(Te
         interp = self.dbg.GetCommandInterpreter()
 
         result = lldb.SBCommandReturnObject()
-        return_value = interp.HandleCommand ("expr -t 100 -u true -- wait_a_while(200000)", result)
+        return_value = interp.HandleCommand ("expr -t 100 -u true -- wait_a_while(50000)", result)
         self.assertTrue (return_value == lldb.eReturnStatusFailed)
 
         # Okay, now do it again with long enough time outs:

Modified: lldb/trunk/packages/Python/lldbsuite/test/functionalities/attach_resume/TestAttachResume.py
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/packages/Python/lldbsuite/test/functionalities/attach_resume/TestAttachResume.py?rev=254430&r1=254429&r2=254430&view=diff
==============================================================================
--- lldb/trunk/packages/Python/lldbsuite/test/functionalities/attach_resume/TestAttachResume.py (original)
+++ lldb/trunk/packages/Python/lldbsuite/test/functionalities/attach_resume/TestAttachResume.py Tue Dec  1 11:59:56 2015
@@ -20,6 +20,7 @@ class AttachResumeTestCase(TestBase):
     @skipIfRemote
     @expectedFailureFreeBSD('llvm.org/pr19310')
     @expectedFailureWindows("llvm.org/pr24778")
+    @expectedFlakeyLinux('llvm.org/pr19310')
     def test_attach_continue_interrupt_detach(self):
         """Test attach/continue/interrupt/detach"""
         self.build()
@@ -51,9 +52,6 @@ class AttachResumeTestCase(TestBase):
         self.runCmd("process interrupt")
         lldbutil.expect_state_changes(self, listener, [lldb.eStateStopped])
 
-        # Second interrupt should have no effect.
-        self.expect("process interrupt", patterns=["Process is not running"], error=True)
-
         # check that this breakpoint is auto-cleared on detach (r204752)
         self.runCmd("br set -f main.cpp -l %u" % (line_number('main.cpp', '// Set breakpoint here')))
 

Modified: lldb/trunk/source/Target/Process.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/source/Target/Process.cpp?rev=254430&r1=254429&r2=254430&view=diff
==============================================================================
--- lldb/trunk/source/Target/Process.cpp (original)
+++ lldb/trunk/source/Target/Process.cpp Tue Dec  1 11:59:56 2015
@@ -759,6 +759,7 @@ Process::Process(lldb::TargetSP target_s
     m_next_event_action_ap(),
     m_public_run_lock (),
     m_private_run_lock (),
+    m_currently_handling_event(false),
     m_stop_info_override_callback (NULL),
     m_finalizing (false),
     m_finalize_called (false),
@@ -991,8 +992,7 @@ Process::WaitForProcessToStop (const Tim
                                EventSP *event_sp_ptr,
                                bool wait_always,
                                Listener *hijack_listener,
-                               Stream *stream,
-                               bool use_run_lock)
+                               Stream *stream)
 {
     // We can't just wait for a "stopped" event, because the stopped event may have restarted the target.
     // We have to actually check each event, and in the case of a stopped event check the restarted flag
@@ -1019,7 +1019,7 @@ Process::WaitForProcessToStop (const Tim
                         __FUNCTION__);
         // We need to toggle the run lock as this won't get done in
         // SetPublicState() if the process is hijacked.
-        if (hijack_listener && use_run_lock)
+        if (hijack_listener)
             m_public_run_lock.SetStopped();
         return state;
     }
@@ -1042,7 +1042,7 @@ Process::WaitForProcessToStop (const Tim
         case eStateUnloaded:
             // We need to toggle the run lock as this won't get done in
             // SetPublicState() if the process is hijacked.
-            if (hijack_listener && use_run_lock)
+            if (hijack_listener)
                 m_public_run_lock.SetStopped();
             return state;
         case eStateStopped:
@@ -1052,7 +1052,7 @@ Process::WaitForProcessToStop (const Tim
             {
                 // We need to toggle the run lock as this won't get done in
                 // SetPublicState() if the process is hijacked.
-                if (hijack_listener && use_run_lock)
+                if (hijack_listener)
                     m_public_run_lock.SetStopped();
                 return state;
             }
@@ -1308,6 +1308,23 @@ Process::RestoreProcessEvents ()
     RestoreBroadcaster();
 }
 
+bool
+Process::HijackPrivateProcessEvents (Listener *listener)
+{
+    if (listener != NULL)
+    {
+        return m_private_state_broadcaster.HijackBroadcaster(listener, eBroadcastBitStateChanged | eBroadcastBitInterrupt);
+    }
+    else
+        return false;
+}
+
+void
+Process::RestorePrivateProcessEvents ()
+{
+    m_private_state_broadcaster.RestoreBroadcaster();
+}
+
 StateType
 Process::WaitForStateChangedEvents (const TimeValue *timeout, EventSP &event_sp, Listener *hijack_listener)
 {
@@ -3814,50 +3831,101 @@ Process::PrivateResume ()
 }
 
 Error
-Process::Halt (bool clear_thread_plans, bool use_run_lock)
+Process::Halt (bool clear_thread_plans)
 {
-    if (! StateIsRunningState(m_public_state.GetValue()))
-        return Error("Process is not running.");
-
     // Don't clear the m_clear_thread_plans_on_stop, only set it to true if
     // in case it was already set and some thread plan logic calls halt on its
     // own.
     m_clear_thread_plans_on_stop |= clear_thread_plans;
     
+    // First make sure we aren't in the middle of handling an event, or we might restart.  This is pretty weak, since
+    // we could just straightaway get another event.  It just narrows the window...
+    m_currently_handling_event.WaitForValueEqualTo(false);
+
+    
+    // Pause our private state thread so we can ensure no one else eats
+    // the stop event out from under us.
     Listener halt_listener ("lldb.process.halt_listener");
-    HijackProcessEvents(&halt_listener);
+    HijackPrivateProcessEvents(&halt_listener);
 
     EventSP event_sp;
+    Error error (WillHalt());
     
-    SendAsyncInterrupt();
-
-    if (m_public_state.GetValue() == eStateAttaching)
-    {
-        // Don't hijack and eat the eStateExited as the code that was doing
-        // the attach will be waiting for this event...
-        RestoreProcessEvents();
-        SetExitStatus(SIGKILL, "Cancelled async attach.");
-        Destroy (false);
-        return Error();
-    }
-
-    // Wait for 10 second for the process to stop.
-    TimeValue timeout_time;
-    timeout_time = TimeValue::Now();
-    timeout_time.OffsetWithSeconds(10);
-    StateType state = WaitForProcessToStop(&timeout_time, &event_sp, true, &halt_listener,
-                                           nullptr, use_run_lock);
-    RestoreProcessEvents();
-
-    if (state == eStateInvalid || ! event_sp)
+    bool restored_process_events = false;
+    if (error.Success())
     {
-        // We timed out and didn't get a stop event...
-        return Error("Halt timed out. State = %s", StateAsCString(GetState()));
+        
+        bool caused_stop = false;
+        
+        // Ask the process subclass to actually halt our process
+        error = DoHalt(caused_stop);
+        if (error.Success())
+        {
+            if (m_public_state.GetValue() == eStateAttaching)
+            {
+                // Don't hijack and eat the eStateExited as the code that was doing
+                // the attach will be waiting for this event...
+                RestorePrivateProcessEvents();
+                restored_process_events = true;
+                SetExitStatus(SIGKILL, "Cancelled async attach.");
+                Destroy (false);
+            }
+            else
+            {
+                // If "caused_stop" is true, then DoHalt stopped the process. If
+                // "caused_stop" is false, the process was already stopped.
+                // If the DoHalt caused the process to stop, then we want to catch
+                // this event and set the interrupted bool to true before we pass
+                // this along so clients know that the process was interrupted by
+                // a halt command.
+                if (caused_stop)
+                {
+                    // Wait for 1 second for the process to stop.
+                    TimeValue timeout_time;
+                    timeout_time = TimeValue::Now();
+                    timeout_time.OffsetWithSeconds(10);
+                    bool got_event = halt_listener.WaitForEvent (&timeout_time, event_sp);
+                    StateType state = ProcessEventData::GetStateFromEvent(event_sp.get());
+                    
+                    if (!got_event || state == eStateInvalid)
+                    {
+                        // We timeout out and didn't get a stop event...
+                        error.SetErrorStringWithFormat ("Halt timed out. State = %s", StateAsCString(GetState()));
+                    }
+                    else
+                    {
+                        if (StateIsStoppedState (state, false))
+                        {
+                            // We caused the process to interrupt itself, so mark this
+                            // as such in the stop event so clients can tell an interrupted
+                            // process from a natural stop
+                            ProcessEventData::SetInterruptedInEvent (event_sp.get(), true);
+                        }
+                        else
+                        {
+                            Log *log(lldb_private::GetLogIfAllCategoriesSet (LIBLLDB_LOG_PROCESS));
+                            if (log)
+                                log->Printf("Process::Halt() failed to stop, state is: %s", StateAsCString(state));
+                            error.SetErrorString ("Did not get stopped event after halt.");
+                        }
+                    }
+                }
+                DidHalt();
+            }
+        }
     }
+    // Resume our private state thread before we post the event (if any)
+    if (!restored_process_events)
+        RestorePrivateProcessEvents();
 
-    BroadcastEvent(event_sp);
+    // Post any event we might have consumed. If all goes well, we will have
+    // stopped the process, intercepted the event and set the interrupted
+    // bool in the event.  Post it to the private event queue and that will end up
+    // correctly setting the state.
+    if (event_sp)
+        m_private_state_broadcaster.BroadcastEvent(event_sp);
 
-    return Error();
+    return error;
 }
 
 Error
@@ -4393,6 +4461,8 @@ Process::HandlePrivateEvent (EventSP &ev
     Log *log(lldb_private::GetLogIfAllCategoriesSet (LIBLLDB_LOG_PROCESS));
     m_resume_requested = false;
     
+    m_currently_handling_event.SetValue(true, eBroadcastNever);
+    
     const StateType new_state = Process::ProcessEventData::GetStateFromEvent(event_sp.get());
     
     // First check to see if anybody wants a shot at this event:
@@ -4419,6 +4489,7 @@ Process::HandlePrivateEvent (EventSP &ev
                 {
                     // FIXME: should cons up an exited event, and discard this one.
                     SetExitStatus(0, m_next_event_action_ap->GetExitString());
+                    m_currently_handling_event.SetValue(false, eBroadcastAlways);
                     SetNextEventAction(NULL);
                     return;
                 }
@@ -4508,22 +4579,7 @@ Process::HandlePrivateEvent (EventSP &ev
                          StateAsCString (GetState ()));
         }
     }
-}
-
-Error
-Process::HaltPrivate()
-{
-    EventSP event_sp;
-    Error error (WillHalt());
-    if (error.Fail())
-        return error;
-
-    // Ask the process subclass to actually halt our process
-    bool caused_stop;
-    error = DoHalt(caused_stop);
-
-    DidHalt();
-    return error;
+    m_currently_handling_event.SetValue(false, eBroadcastAlways);
 }
 
 thread_result_t
@@ -4546,7 +4602,6 @@ Process::RunPrivateStateThread (bool is_
                      __FUNCTION__, static_cast<void*>(this), GetID());
 
     bool exit_now = false;
-    bool interrupt_requested = false;
     while (!exit_now)
     {
         EventSP event_sp;
@@ -4592,16 +4647,7 @@ Process::RunPrivateStateThread (bool is_
                     log->Printf ("Process::%s (arg = %p, pid = %" PRIu64 ") woke up with an interrupt - Halting.",
                                  __FUNCTION__, static_cast<void*>(this),
                                  GetID());
-                Error error = HaltPrivate();
-                if (error.Fail() && log)
-                    log->Printf ("Process::%s (arg = %p, pid = %" PRIu64 ") failed to halt the process: %s",
-                                 __FUNCTION__, static_cast<void*>(this),
-                                 GetID(), error.AsCString());
-                // Halt should generate a stopped event. Make a note of the fact that we were
-                // doing the interrupt, so we can set the interrupted flag after we receive the
-                // event. We deliberately set this to true even if HaltPrivate failed, so that we
-                // can interrupt on the next natural stop.
-                interrupt_requested = true;
+                Halt();
             }
             continue;
         }
@@ -4616,23 +4662,6 @@ Process::RunPrivateStateThread (bool is_
                 m_clear_thread_plans_on_stop = false;
                 m_thread_list.DiscardThreadPlans();
             }
-
-            if (interrupt_requested)
-            {
-                if (StateIsStoppedState (internal_state, true))
-                {
-                    // We requested the interrupt, so mark this as such in the stop event so
-                    // clients can tell an interrupted process from a natural stop
-                    ProcessEventData::SetInterruptedInEvent (event_sp.get(), true);
-                    interrupt_requested = false;
-                }
-                else if (log)
-                {
-                    log->Printf("Process::%s interrupt_requested, but a non-stopped state '%s' received.",
-                            __FUNCTION__, StateAsCString(internal_state));
-                }
-            }
-
             HandlePrivateEvent (event_sp);
         }
 
@@ -5715,9 +5744,7 @@ Process::RunThreadPlan (ExecutionContext
                     {
                         // This is probably an overabundance of caution, I don't think I should ever get a stopped & restarted
                         // event here.  But if I do, the best thing is to Halt and then get out of here.
-                        const bool clear_thread_plans = false;
-                        const bool use_run_lock = false;
-                        Halt(clear_thread_plans, use_run_lock);
+                        Halt();
                     }
 
                     errors.Printf("Didn't get running event after initial resume, got %s instead.",
@@ -5811,9 +5838,7 @@ Process::RunThreadPlan (ExecutionContext
                     bool keep_going = false;
                     if (event_sp->GetType() == eBroadcastBitInterrupt)
                     {
-                        const bool clear_thread_plans = false;
-                        const bool use_run_lock = false;
-                        Halt(clear_thread_plans, use_run_lock);
+                        Halt();
                         return_value = eExpressionInterrupted;
                         errors.Printf ("Execution halted by user interrupt.");
                         if (log)
@@ -5990,9 +6015,7 @@ Process::RunThreadPlan (ExecutionContext
                     {
                         if (log)
                             log->Printf ("Process::RunThreadPlan(): Running Halt.");
-                        const bool clear_thread_plans = false;
-                        const bool use_run_lock = false;
-                        Halt(clear_thread_plans, use_run_lock);
+                        halt_error = Halt();
                     }
                     if (halt_error.Success())
                     {




More information about the lldb-commits mailing list