[llvm] 3300bc3 - [llvm-exegesis] Fix race condition in subprocess mode (#72778)

via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 20 01:10:46 PST 2023


Author: Aiden Grossman
Date: 2023-11-20T01:10:42-08:00
New Revision: 3300bc34f7bccf29c14221fa4b651f7bc82c46d5

URL: https://github.com/llvm/llvm-project/commit/3300bc34f7bccf29c14221fa4b651f7bc82c46d5
DIFF: https://github.com/llvm/llvm-project/commit/3300bc34f7bccf29c14221fa4b651f7bc82c46d5.diff

LOG: [llvm-exegesis] Fix race condition in subprocess mode (#72778)

If there were some scheduler effects where something like the parent
process got interrupted while the child process continued to run, there
would be nothing blocking it from exiting before the parent process
issued a PTRACE_ATTACH call. This would cause transient failures as this
occurred pretty rarely. This patch removes the possibility of a
transient failure by ensuring that the parent process attaches to the
child process before sending the counter file descriptor through the
socket, ensuring that the child process has at most progressed to being
blocked in the read call for the counter file descriptor.

Added: 
    

Modified: 
    llvm/test/tools/llvm-exegesis/X86/latency/memory-annotations-livein.s
    llvm/test/tools/llvm-exegesis/X86/latency/memory-annotations.s
    llvm/test/tools/llvm-exegesis/X86/latency/subprocess-abnormal-exit-code.s
    llvm/test/tools/llvm-exegesis/X86/latency/subprocess-preserved-registers.s
    llvm/test/tools/llvm-exegesis/X86/latency/subprocess-segfault.s
    llvm/test/tools/llvm-exegesis/X86/latency/subprocess.s
    llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/llvm-exegesis/X86/latency/memory-annotations-livein.s b/llvm/test/tools/llvm-exegesis/X86/latency/memory-annotations-livein.s
index 5d9171959b6afed..51b48c933efc063 100644
--- a/llvm/test/tools/llvm-exegesis/X86/latency/memory-annotations-livein.s
+++ b/llvm/test/tools/llvm-exegesis/X86/latency/memory-annotations-livein.s
@@ -8,9 +8,6 @@
 # CHECK: measurements:
 # CHECK-NEXT: value: {{.*}}, per_snippet_value: {{.*}}
 
-# TODO: Sometimes transiently fails on PTRACE_ATTACH
-# ALLOW_RETRIES: 2
-
 # LLVM-EXEGESIS-MEM-DEF test1 4096 2147483647
 # LLVM-EXEGESIS-MEM-MAP test1 1048576
 # LLVM-EXEGESIS-LIVEIN R14

diff  --git a/llvm/test/tools/llvm-exegesis/X86/latency/memory-annotations.s b/llvm/test/tools/llvm-exegesis/X86/latency/memory-annotations.s
index 735deb40243c65a..04f79ad919d6df6 100644
--- a/llvm/test/tools/llvm-exegesis/X86/latency/memory-annotations.s
+++ b/llvm/test/tools/llvm-exegesis/X86/latency/memory-annotations.s
@@ -10,9 +10,6 @@
 # CHECK: measurements:
 # CHECK-NEXT: value: {{.*}}, per_snippet_value: {{.*}}
 
-# TODO: Sometimes transiently fails on PTRACE_ATTACH
-# ALLOW_RETRIES: 2
-
 # LLVM-EXEGESIS-MEM-DEF test1 4096 2147483647
 # LLVM-EXEGESIS-MEM-MAP test1 1048576
 

diff  --git a/llvm/test/tools/llvm-exegesis/X86/latency/subprocess-abnormal-exit-code.s b/llvm/test/tools/llvm-exegesis/X86/latency/subprocess-abnormal-exit-code.s
index edd23654be67fcd..849f1a2e552f182 100644
--- a/llvm/test/tools/llvm-exegesis/X86/latency/subprocess-abnormal-exit-code.s
+++ b/llvm/test/tools/llvm-exegesis/X86/latency/subprocess-abnormal-exit-code.s
@@ -4,9 +4,6 @@
 
 # CHECK: error: 'Child benchmarking process exited with non-zero exit code: Child process returned with unknown exit code'
 
-# TODO: Sometimes transiently fails on PTRACE_ATTACH
-# ALLOW_RETRIES: 2
-
 movl $60, %eax
 movl $127, %edi
 syscall

diff  --git a/llvm/test/tools/llvm-exegesis/X86/latency/subprocess-preserved-registers.s b/llvm/test/tools/llvm-exegesis/X86/latency/subprocess-preserved-registers.s
index 320d1d7db38f422..cf91efa5ce14553 100644
--- a/llvm/test/tools/llvm-exegesis/X86/latency/subprocess-preserved-registers.s
+++ b/llvm/test/tools/llvm-exegesis/X86/latency/subprocess-preserved-registers.s
@@ -2,10 +2,6 @@
 
 # RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -snippets-file=%s -execution-mode=subprocess | FileCheck %s
 
-# See comment in ./subprocess-abnormal-exit-code.s on the transient
-# PTRACE_ATTACH failure.
-# ALLOW_RETRIES: 2
-
 # Check that the value of the registers preserved in subprocess mode while
 # making the ioctl system call are actually preserved correctly.
 

diff  --git a/llvm/test/tools/llvm-exegesis/X86/latency/subprocess-segfault.s b/llvm/test/tools/llvm-exegesis/X86/latency/subprocess-segfault.s
index bc01c607f0721ca..92b08e1c18c62ec 100644
--- a/llvm/test/tools/llvm-exegesis/X86/latency/subprocess-segfault.s
+++ b/llvm/test/tools/llvm-exegesis/X86/latency/subprocess-segfault.s
@@ -4,8 +4,5 @@
 
 # CHECK: error:           'The benchmarking subprocess sent unexpected signal: Segmentation fault'
 
-# TODO: Sometimes transiently fails on PTRACE_ATTACH
-# ALLOW_RETRIES: 2
-
 # LLVM-EXEGESIS-DEFREG RBX 0
 movq (%rbx), %rax

diff  --git a/llvm/test/tools/llvm-exegesis/X86/latency/subprocess.s b/llvm/test/tools/llvm-exegesis/X86/latency/subprocess.s
index d5c5a136b92f23e..272780af02409ef 100644
--- a/llvm/test/tools/llvm-exegesis/X86/latency/subprocess.s
+++ b/llvm/test/tools/llvm-exegesis/X86/latency/subprocess.s
@@ -5,9 +5,6 @@
 # CHECK: measurements:
 # CHECK-NEXT: value: {{.*}}, per_snippet_value: {{.*}}
 
-# TODO: Sometimes transiently fails on PTRACE_ATTACH
-# ALLOW_RETRIES: 2
-
 # LLVM-EXEGESIS-DEFREG XMM1 42
 # LLVM-EXEGESIS-DEFREG XMM2 42
 # LLVM-EXEGESIS-DEFREG XMM3 42

diff  --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index 7ec24eb2f866f86..19d1867fac0e2b7 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -292,13 +292,13 @@ class SubProcessFunctionExecutorImpl
 
     close(PipeFiles[0]);
 
-    int CounterFileDescriptor = Counter->getFileDescriptor();
-    Error SendError =
-        sendFileDescriptorThroughSocket(PipeFiles[1], CounterFileDescriptor);
-
-    if (SendError)
-      return SendError;
-
+    // Make sure to attach to the process (and wait for the sigstop to be
+    // delivered and for the process to continue) before we write to the counter
+    // file descriptor. Attaching to the process before writing to the socket
+    // ensures that the subprocess at most has blocked on the read call. If we
+    // attach afterwards, the subprocess might exit before we get to the attach
+    // call due to effects like scheduler contention, introducing transient
+    // failures.
     if (ptrace(PTRACE_ATTACH, ParentOrChildPID, NULL, NULL) != 0)
       return make_error<Failure>("Failed to attach to the child process: " +
                                  Twine(strerror(errno)));
@@ -314,6 +314,13 @@ class SubProcessFunctionExecutorImpl
           "Failed to continue execution of the child process: " +
           Twine(strerror(errno)));
 
+    int CounterFileDescriptor = Counter->getFileDescriptor();
+    Error SendError =
+        sendFileDescriptorThroughSocket(PipeFiles[1], CounterFileDescriptor);
+
+    if (SendError)
+      return SendError;
+
     int ChildStatus;
     if (wait(&ChildStatus) == -1) {
       return make_error<Failure>(


        


More information about the llvm-commits mailing list