[libc-commits] [libc] [libc] implement cached process/thread identity (PR #95965)

Schrodinger ZHU Yifan via libc-commits libc-commits at lists.llvm.org
Wed Jun 19 12:44:21 PDT 2024


https://github.com/SchrodingerZhu updated https://github.com/llvm/llvm-project/pull/95965

>From 81d97d32a743164a5a27580f60d13c5847d1b1b0 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Tue, 18 Jun 2024 11:13:09 -0700
Subject: [PATCH 1/5] [libc] implement cached process/thread identity

---
 libc/config/linux/aarch64/entrypoints.txt     |  1 +
 libc/config/linux/riscv/entrypoints.txt       |  1 +
 libc/config/linux/x86_64/entrypoints.txt      |  1 +
 libc/docs/dev/undefined_behavior.rst          | 21 ++++++++++++
 libc/spec/posix.td                            | 15 +++-----
 libc/src/__support/OSUtil/CMakeLists.txt      |  9 +++++
 .../src/__support/OSUtil/linux/CMakeLists.txt | 13 +++++++
 libc/src/__support/OSUtil/linux/pid.cpp       | 20 +++++++++++
 libc/src/__support/OSUtil/pid.h               | 32 +++++++++++++++++
 libc/src/__support/threads/CMakeLists.txt     | 19 +++++++++++
 .../__support/threads/linux/CMakeLists.txt    |  1 +
 libc/src/__support/threads/linux/rwlock.h     |  9 +++--
 libc/src/__support/threads/linux/thread.cpp   |  2 ++
 libc/src/__support/threads/thread.h           | 13 ++++++-
 libc/src/__support/threads/tid.h              | 34 +++++++++++++++++++
 libc/src/unistd/CMakeLists.txt                | 10 ++++++
 libc/src/unistd/getpid.h                      |  4 +--
 libc/src/unistd/gettid.cpp                    | 17 ++++++++++
 libc/src/unistd/gettid.h                      | 21 ++++++++++++
 libc/src/unistd/linux/CMakeLists.txt          |  4 +--
 libc/src/unistd/linux/fork.cpp                | 28 +++++++++------
 libc/src/unistd/linux/getpid.cpp              | 10 ++----
 libc/test/src/unistd/CMakeLists.txt           | 10 ++++++
 libc/test/src/unistd/gettid_test.cpp          | 15 ++++++++
 24 files changed, 272 insertions(+), 38 deletions(-)
 create mode 100644 libc/src/__support/OSUtil/linux/pid.cpp
 create mode 100644 libc/src/__support/OSUtil/pid.h
 create mode 100644 libc/src/__support/threads/tid.h
 create mode 100644 libc/src/unistd/gettid.cpp
 create mode 100644 libc/src/unistd/gettid.h
 create mode 100644 libc/test/src/unistd/gettid_test.cpp

diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index dfed6acbdf257..a2643bda8277f 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -290,6 +290,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.unistd.geteuid
     libc.src.unistd.getpid
     libc.src.unistd.getppid
+    libc.src.unistd.gettid
     libc.src.unistd.getuid
     libc.src.unistd.isatty
     libc.src.unistd.link
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index e12d6b3957e51..ae0c64045da88 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -295,6 +295,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.unistd.geteuid
     libc.src.unistd.getpid
     libc.src.unistd.getppid
+    libc.src.unistd.gettid
     libc.src.unistd.getuid
     libc.src.unistd.isatty
     libc.src.unistd.link
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index cfe35167ca32e..0380d4a3734e8 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -308,6 +308,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.unistd.geteuid
     libc.src.unistd.getpid
     libc.src.unistd.getppid
+    libc.src.unistd.gettid
     libc.src.unistd.getuid
     libc.src.unistd.isatty
     libc.src.unistd.link
diff --git a/libc/docs/dev/undefined_behavior.rst b/libc/docs/dev/undefined_behavior.rst
index c97a539ca8da4..9f31aeff9ab97 100644
--- a/libc/docs/dev/undefined_behavior.rst
+++ b/libc/docs/dev/undefined_behavior.rst
@@ -89,3 +89,24 @@ The C23 standard states that if the value of the ``rnd`` argument of the
 the value of a math rounding direction macro, the direction of rounding is
 unspecified. LLVM's libc chooses to use the ``FP_INT_TONEAREST`` rounding
 direction in this case.
+
+Cached ``getpid/gettid``
+------------------------
+Since version ``2.25``, glibc removes its cache mechanism for ``getpid/gettid`` 
+(See the history section in https://man7.org/linux/man-pages/man2/getpid.2.html).
+LLVM's libc still implements the cache as it is useful for fast deadlock detection.
+The cache mechanism is also implemented in MUSL and bionic.
+
+Unwrapped ``SYS_clone/SYS_fork/SYS_vfork``
+------------------------------------------
+It is highly discouraged to use unwrapped ``SYS_clone/SYS_fork/SYS_vfork``. 
+First, calling such syscalls without provided libc wrappers ignores 
+all the ``pthread_atfork`` entries as libc can no longer detect the ``fork``. 
+Second, libc relies on the ``fork/clone`` wrappers to correctly maintain cache for
+process id and thread id, and other important process-specific states such as the list 
+of robust mutexes. Third, even if the user is to call ``exec*`` functions immediately, 
+there can still be other unexpected issues. For instance, there can be signal handlers 
+inherited from parent process triggered inside the instruction window between ``fork`` 
+and ``exec*``. As libc failed to maintain its internal states correctly, even though the
+functions used inside the signal handlers are marked as ``async-signal-safe`` (such as
+``getpid``), they will still return wrong values or lead to other even worse situations.
diff --git a/libc/spec/posix.td b/libc/spec/posix.td
index d14047548e104..c3996a213f60b 100644
--- a/libc/spec/posix.td
+++ b/libc/spec/posix.td
@@ -512,6 +512,11 @@ def POSIX : StandardSpec<"POSIX"> {
           RetValSpec<PidT>,
           [ArgSpec<VoidType>]
         >,
+        FunctionSpec<
+          "gettid",
+          RetValSpec<PidT>,
+          [ArgSpec<VoidType>]
+        >,
         FunctionSpec<
           "getuid",
           RetValSpec<UidT>,
@@ -567,16 +572,6 @@ def POSIX : StandardSpec<"POSIX"> {
           RetValSpec<IntType>,
           [ArgSpec<ConstCharPtr>]
         >,
-        FunctionSpec<
-          "getpid",
-          RetValSpec<IntType>,
-          [ArgSpec<VoidType>]
-        >,
-        FunctionSpec<
-          "getppid",
-          RetValSpec<IntType>,
-          [ArgSpec<VoidType>]
-        >,
         FunctionSpec<
           "link",
           RetValSpec<IntType>,
diff --git a/libc/src/__support/OSUtil/CMakeLists.txt b/libc/src/__support/OSUtil/CMakeLists.txt
index 94d1042ccbb4a..2db72f40c6f18 100644
--- a/libc/src/__support/OSUtil/CMakeLists.txt
+++ b/libc/src/__support/OSUtil/CMakeLists.txt
@@ -15,3 +15,12 @@ add_object_library(
   DEPENDS
     ${target_os_util}
 )
+
+if(TARGET libc.src.__support.OSUtil.${LIBC_TARGET_OS}.pid)
+  add_object_library(
+    pid
+    ALIAS
+    DEPENDS
+      .${LIBC_TARGET_OS}.pid
+  )
+endif()
diff --git a/libc/src/__support/OSUtil/linux/CMakeLists.txt b/libc/src/__support/OSUtil/linux/CMakeLists.txt
index 78b117fd19439..ddc51aadd0d02 100644
--- a/libc/src/__support/OSUtil/linux/CMakeLists.txt
+++ b/libc/src/__support/OSUtil/linux/CMakeLists.txt
@@ -22,3 +22,16 @@ add_object_library(
     libc.hdr.types.struct_flock64
     libc.hdr.types.struct_f_owner_ex
 )
+
+add_object_library(
+  pid
+  SRCS
+    pid.cpp
+  HDRS
+    ../pid.h
+  DEPENDS
+    libc.src.__support.OSUtil.osutil
+    libc.src.__support.common
+    libc.hdr.types.pid_t
+    libc.include.sys_syscall
+)
diff --git a/libc/src/__support/OSUtil/linux/pid.cpp b/libc/src/__support/OSUtil/linux/pid.cpp
new file mode 100644
index 0000000000000..47ef00565cbe9
--- /dev/null
+++ b/libc/src/__support/OSUtil/linux/pid.cpp
@@ -0,0 +1,20 @@
+//===------------ pid_t utilities implementation ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/OSUtil/pid.h"
+#include "src/__support/OSUtil/syscall.h"
+#include <sys/syscall.h>
+
+namespace LIBC_NAMESPACE {
+
+pid_t ProcessIdentity::cache = -1;
+pid_t ProcessIdentity::get_uncached() {
+  return syscall_impl<pid_t>(SYS_getpid);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/__support/OSUtil/pid.h b/libc/src/__support/OSUtil/pid.h
new file mode 100644
index 0000000000000..5fda6726308e1
--- /dev/null
+++ b/libc/src/__support/OSUtil/pid.h
@@ -0,0 +1,32 @@
+//===------------ pid_t utilities -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_OSUTIL_PID_H
+#define LLVM_LIBC_SRC___SUPPORT_OSUTIL_PID_H
+#include "hdr/types/pid_t.h"
+#include "src/__support/macros/attributes.h"
+#include "src/__support/macros/optimization.h"
+namespace LIBC_NAMESPACE {
+
+class ProcessIdentity {
+  static pid_t cache;
+  static pid_t get_uncached();
+
+public:
+  LIBC_INLINE static void invalidate_cache() { cache = -1; }
+  LIBC_INLINE static void refresh_cache() { cache = get_uncached(); }
+  LIBC_INLINE static pid_t get() {
+    if (LIBC_UNLIKELY(cache < 0))
+      return get_uncached();
+    return cache;
+  }
+};
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC___SUPPORT_OSUTIL_PID_H
diff --git a/libc/src/__support/threads/CMakeLists.txt b/libc/src/__support/threads/CMakeLists.txt
index 9ea0b59befe7a..ef8769ff9498e 100644
--- a/libc/src/__support/threads/CMakeLists.txt
+++ b/libc/src/__support/threads/CMakeLists.txt
@@ -45,6 +45,7 @@ add_header_library(
     libc.src.__support.CPP.optional
     libc.src.__support.CPP.string_view
     libc.src.__support.CPP.stringstream
+    libc.hdr.types.pid_t
 )
 
 if(TARGET libc.src.__support.threads.${LIBC_TARGET_OS}.thread)
@@ -80,3 +81,21 @@ if(TARGET libc.src.__support.threads.${LIBC_TARGET_OS}.CndVar)
     .${LIBC_TARGET_OS}.CndVar
   )
 endif()
+
+set(tid_dep)
+if (LLVM_LIBC_FULL_BUILD)
+  list(APPEND tid_dep libc.src.__support.thread)
+else()
+  list(APPEND tid_dep libc.src.__support.OSUtil.osutil)
+  list(APPEND tid_dep libc.include.sys_syscall)
+endif()
+
+add_header_library(
+  tid
+  HDRS
+    tid.h
+  DEPENDS
+    libc.src.__support.common
+    libc.hdr.types.pid_t
+    ${tid_dep}
+)
diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt
index 95e509b7a825d..832617afb6068 100644
--- a/libc/src/__support/threads/linux/CMakeLists.txt
+++ b/libc/src/__support/threads/linux/CMakeLists.txt
@@ -55,6 +55,7 @@ add_header_library(
     libc.src.__support.common
     libc.src.__support.OSUtil.osutil
     libc.src.__support.CPP.limits
+    libc.src.__support.threads.tid
   COMPILE_OPTIONS
     -DLIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT=${LIBC_CONF_RWLOCK_DEFAULT_SPIN_COUNT}
     ${monotonicity_flags}
diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index 201fe92c37fc0..1faeeec1bb025 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -22,6 +22,7 @@
 #include "src/__support/threads/linux/futex_word.h"
 #include "src/__support/threads/linux/raw_mutex.h"
 #include "src/__support/threads/sleep.h"
+#include "src/__support/threads/tid.h"
 
 #ifndef LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT
 #define LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT 100
@@ -335,8 +336,6 @@ class RwLock {
   LIBC_INLINE Role get_preference() const {
     return static_cast<Role>(preference);
   }
-  // TODO: use cached thread id once implemented.
-  LIBC_INLINE static pid_t gettid() { return syscall_impl<pid_t>(SYS_gettid); }
 
   template <Role role> LIBC_INLINE LockResult try_lock(RwState &old) {
     if constexpr (role == Role::Reader) {
@@ -358,7 +357,7 @@ class RwLock {
         if (LIBC_LIKELY(old.compare_exchange_weak_with(
                 state, old.set_writer_bit(), cpp::MemoryOrder::ACQUIRE,
                 cpp::MemoryOrder::RELAXED))) {
-          writer_tid.store(gettid(), cpp::MemoryOrder::RELAXED);
+          writer_tid.store(gettid_inline(), cpp::MemoryOrder::RELAXED);
           return LockResult::Success;
         }
         // Notice that old is updated by the compare_exchange_weak_with
@@ -393,7 +392,7 @@ class RwLock {
             unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
     // Phase 1: deadlock detection.
     // A deadlock happens if this is a RAW/WAW lock in the same thread.
-    if (writer_tid.load(cpp::MemoryOrder::RELAXED) == gettid())
+    if (writer_tid.load(cpp::MemoryOrder::RELAXED) == gettid_inline())
       return LockResult::Deadlock;
 
 #if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
@@ -519,7 +518,7 @@ class RwLock {
     if (old.has_active_writer()) {
       // The lock is held by a writer.
       // Check if we are the owner of the lock.
-      if (writer_tid.load(cpp::MemoryOrder::RELAXED) != gettid())
+      if (writer_tid.load(cpp::MemoryOrder::RELAXED) != gettid_inline())
         return LockResult::PermissionDenied;
       // clear writer tid.
       writer_tid.store(0, cpp::MemoryOrder::RELAXED);
diff --git a/libc/src/__support/threads/linux/thread.cpp b/libc/src/__support/threads/linux/thread.cpp
index 1d986ff38cfff..493f2df7f8301 100644
--- a/libc/src/__support/threads/linux/thread.cpp
+++ b/libc/src/__support/threads/linux/thread.cpp
@@ -517,4 +517,6 @@ void thread_exit(ThreadReturnValue retval, ThreadStyle style) {
   __builtin_unreachable();
 }
 
+pid_t Thread::get_uncached_tid() { return syscall_impl<pid_t>(SYS_gettid); }
+
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/__support/threads/thread.h b/libc/src/__support/threads/thread.h
index acfe33879f878..e53ff9995f678 100644
--- a/libc/src/__support/threads/thread.h
+++ b/libc/src/__support/threads/thread.h
@@ -9,6 +9,7 @@
 #ifndef LLVM_LIBC_SRC___SUPPORT_THREADS_THREAD_H
 #define LLVM_LIBC_SRC___SUPPORT_THREADS_THREAD_H
 
+#include "hdr/types/pid_t.h"
 #include "src/__support/CPP/atomic.h"
 #include "src/__support/CPP/optional.h"
 #include "src/__support/CPP/string_view.h"
@@ -99,7 +100,7 @@ struct alignas(STACK_ALIGNMENT) ThreadAttributes {
   uintptr_t tls;                // Address to the thread TLS memory
   uintptr_t tls_size;           // The size of area pointed to by |tls|.
   unsigned char owned_stack; // Indicates if the thread owns this stack memory
-  int tid;
+  pid_t tid;
   ThreadStyle style;
   ThreadReturnValue retval;
   ThreadAtExitCallbackMgr *atexit_callback_mgr;
@@ -224,6 +225,16 @@ struct Thread {
 
   // Return the name of the thread in |name|. Return the error number of error.
   int get_name(cpp::StringStream &name) const;
+
+  static pid_t get_uncached_tid();
+
+  LIBC_INLINE void refresh_tid() { this->attrib->tid = get_uncached_tid(); }
+  LIBC_INLINE void invalidate_tid() { this->attrib->tid = -1; }
+  LIBC_INLINE pid_t get_tid() {
+    if (LIBC_UNLIKELY(this->attrib->tid < 0))
+      return get_uncached_tid();
+    return this->attrib->tid;
+  }
 };
 
 extern LIBC_THREAD_LOCAL Thread self;
diff --git a/libc/src/__support/threads/tid.h b/libc/src/__support/threads/tid.h
new file mode 100644
index 0000000000000..622f91543af7e
--- /dev/null
+++ b/libc/src/__support/threads/tid.h
@@ -0,0 +1,34 @@
+//===--- Tid wrapper --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_THREADS_TID_H
+#define LLVM_LIBC_SRC___SUPPORT_THREADS_TID_H
+
+// This header is for internal usage which automatically dispatches full build
+// and overlay build behaviors.
+
+#include "hdr/types/pid_t.h"
+#include "src/__support/common.h"
+#ifdef LIBC_FULL_BUILD
+#include "src/__support/threads/thread.h"
+#else
+#include "src/__support/OSUtil/syscall.h"
+#include <sys/syscall.h>
+#endif // LIBC_FULL_BUILD
+
+namespace LIBC_NAMESPACE {
+LIBC_INLINE pid_t gettid_inline() {
+#ifdef LIBC_FULL_BUILD
+  return self.get_tid();
+#else
+  return syscall_impl<pid_t>(SYS_gettid);
+#endif
+}
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC___SUPPORT_THREADS_TID_H
diff --git a/libc/src/unistd/CMakeLists.txt b/libc/src/unistd/CMakeLists.txt
index 77db76518350c..959595478bb7f 100644
--- a/libc/src/unistd/CMakeLists.txt
+++ b/libc/src/unistd/CMakeLists.txt
@@ -318,3 +318,13 @@ add_entrypoint_external(
 add_entrypoint_external(
   opterr
 )
+
+add_entrypoint_object(
+  gettid
+  SRCS
+    gettid.cpp
+  HDRS
+    gettid.h
+  DEPENDS
+    libc.src.__support.threads.tid
+)
diff --git a/libc/src/unistd/getpid.h b/libc/src/unistd/getpid.h
index 5890dbf63681a..d7f589da81913 100644
--- a/libc/src/unistd/getpid.h
+++ b/libc/src/unistd/getpid.h
@@ -9,11 +9,11 @@
 #ifndef LLVM_LIBC_SRC_UNISTD_GETPID_H
 #define LLVM_LIBC_SRC_UNISTD_GETPID_H
 
-#include <unistd.h>
+#include "hdr/types/pid_t.h"
 
 namespace LIBC_NAMESPACE {
 
-pid_t getpid();
+pid_t getpid(void);
 
 } // namespace LIBC_NAMESPACE
 
diff --git a/libc/src/unistd/gettid.cpp b/libc/src/unistd/gettid.cpp
new file mode 100644
index 0000000000000..040a1323dcad1
--- /dev/null
+++ b/libc/src/unistd/gettid.cpp
@@ -0,0 +1,17 @@
+//===-- Implementation file for gettid --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/unistd/gettid.h"
+#include "src/__support/common.h"
+#include "src/__support/threads/tid.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(pid_t, gettid, (void)) { return gettid_inline(); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/unistd/gettid.h b/libc/src/unistd/gettid.h
new file mode 100644
index 0000000000000..01691b74895f7
--- /dev/null
+++ b/libc/src/unistd/gettid.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for gettid ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_UNISTD_GETTID_H
+#define LLVM_LIBC_SRC_UNISTD_GETTID_H
+
+#include "hdr/types/pid_t.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+pid_t gettid(void);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_UNISTD_GETTID_H
diff --git a/libc/src/unistd/linux/CMakeLists.txt b/libc/src/unistd/linux/CMakeLists.txt
index 7d831f9c29c74..4648870c708de 100644
--- a/libc/src/unistd/linux/CMakeLists.txt
+++ b/libc/src/unistd/linux/CMakeLists.txt
@@ -101,6 +101,7 @@ add_entrypoint_object(
     libc.include.sys_syscall
     libc.src.__support.threads.fork_callbacks
     libc.src.__support.OSUtil.osutil
+    libc.src.__support.OSUtil.pid
     libc.src.__support.threads.thread
     libc.src.errno.errno
 )
@@ -190,8 +191,7 @@ add_entrypoint_object(
     ../getpid.h
   DEPENDS
     libc.include.unistd
-    libc.include.sys_syscall
-    libc.src.__support.OSUtil.osutil
+    libc.src.__support.OSUtil.pid
 )
 
 add_entrypoint_object(
diff --git a/libc/src/unistd/linux/fork.cpp b/libc/src/unistd/linux/fork.cpp
index 6fa2b990b19a9..98721e2f5e1d8 100644
--- a/libc/src/unistd/linux/fork.cpp
+++ b/libc/src/unistd/linux/fork.cpp
@@ -8,12 +8,13 @@
 
 #include "src/unistd/fork.h"
 
+#include "src/__support/OSUtil/pid.h"
 #include "src/__support/OSUtil/syscall.h" // For internal syscall function.
 #include "src/__support/common.h"
 #include "src/__support/threads/fork_callbacks.h"
 #include "src/__support/threads/thread.h" // For thread self object
-
 #include "src/errno/libc_errno.h"
+
 #include <signal.h>      // For SIGCHLD
 #include <sys/syscall.h> // For syscall numbers.
 
@@ -24,6 +25,13 @@ namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(pid_t, fork, (void)) {
   invoke_prepare_callbacks();
+
+  // Invalidate tid/pid cache before fork to avoid post fork signal handler from
+  // getting wrong values. gettid() is not async-signal-safe, but let's provide
+  // our best efforts here.
+  self.invalidate_tid();
+  ProcessIdentity::invalidate_cache();
+
 #ifdef SYS_fork
   pid_t ret = LIBC_NAMESPACE::syscall_impl<pid_t>(SYS_fork);
 #elif defined(SYS_clone)
@@ -31,15 +39,6 @@ LLVM_LIBC_FUNCTION(pid_t, fork, (void)) {
 #else
 #error "fork and clone syscalls not available."
 #endif
-  if (ret == 0) {
-    // Return value is 0 in the child process.
-    // The child is created with a single thread whose self object will be a
-    // copy of parent process' thread which called fork. So, we have to fix up
-    // the child process' self object with the new process' tid.
-    self.attrib->tid = LIBC_NAMESPACE::syscall_impl<pid_t>(SYS_gettid);
-    invoke_child_callbacks();
-    return 0;
-  }
 
   if (ret < 0) {
     // Error case, a child process was not created.
@@ -47,6 +46,15 @@ LLVM_LIBC_FUNCTION(pid_t, fork, (void)) {
     return -1;
   }
 
+  // Refresh tid/pid cache after fork
+  self.refresh_tid();
+  ProcessIdentity::refresh_cache();
+
+  if (ret == 0) {
+    invoke_child_callbacks();
+    return 0;
+  }
+
   invoke_parent_callbacks();
   return ret;
 }
diff --git a/libc/src/unistd/linux/getpid.cpp b/libc/src/unistd/linux/getpid.cpp
index 85730738ca2ef..7446f2208d596 100644
--- a/libc/src/unistd/linux/getpid.cpp
+++ b/libc/src/unistd/linux/getpid.cpp
@@ -7,16 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/unistd/getpid.h"
-
-#include "src/__support/OSUtil/syscall.h" // For internal syscall function.
+#include "src/__support/OSUtil/pid.h"
 #include "src/__support/common.h"
-
-#include <sys/syscall.h> // For syscall numbers.
-
 namespace LIBC_NAMESPACE {
 
-LLVM_LIBC_FUNCTION(pid_t, getpid, ()) {
-  return LIBC_NAMESPACE::syscall_impl<pid_t>(SYS_getpid);
-}
+LLVM_LIBC_FUNCTION(pid_t, getpid, (void)) { return ProcessIdentity::get(); }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/test/src/unistd/CMakeLists.txt b/libc/test/src/unistd/CMakeLists.txt
index de3e8d9ccbb62..a526fa992eec1 100644
--- a/libc/test/src/unistd/CMakeLists.txt
+++ b/libc/test/src/unistd/CMakeLists.txt
@@ -376,6 +376,16 @@ add_libc_unittest(
     libc.src.unistd.getpid
 )
 
+add_libc_unittest(
+  gettid_test
+  SUITE
+    libc_unistd_unittests
+  SRCS
+    gettid_test.cpp
+  DEPENDS
+    libc.src.unistd.gettid
+)
+
 add_libc_unittest(
   getppid_test
   SUITE
diff --git a/libc/test/src/unistd/gettid_test.cpp b/libc/test/src/unistd/gettid_test.cpp
new file mode 100644
index 0000000000000..c2330f4002279
--- /dev/null
+++ b/libc/test/src/unistd/gettid_test.cpp
@@ -0,0 +1,15 @@
+//===-- Unittests for gettid ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/unistd/gettid.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcGetTidTest, SmokeTest) {
+  // gettid always succeeds. So, we just call it as a smoke test.
+  ASSERT_GT(LIBC_NAMESPACE::gettid(), 0);
+}

>From 084ad55f547b52b58debad971580842bf8979232 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Tue, 18 Jun 2024 11:31:55 -0700
Subject: [PATCH 2/5] initialize pid cache in startup

---
 libc/startup/linux/CMakeLists.txt | 1 +
 libc/startup/linux/do_start.cpp   | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/libc/startup/linux/CMakeLists.txt b/libc/startup/linux/CMakeLists.txt
index 68f68ff45aa9e..f42fa3baeb290 100644
--- a/libc/startup/linux/CMakeLists.txt
+++ b/libc/startup/linux/CMakeLists.txt
@@ -96,6 +96,7 @@ add_object_library(
     libc.include.sys_syscall
     libc.src.__support.threads.thread
     libc.src.__support.OSUtil.osutil
+    libc.src.__support.OSUtil.pid
     libc.src.stdlib.exit
     libc.src.stdlib.atexit
     libc.src.unistd.environ
diff --git a/libc/startup/linux/do_start.cpp b/libc/startup/linux/do_start.cpp
index 55fd575f7ad0b..bc24a054726c3 100644
--- a/libc/startup/linux/do_start.cpp
+++ b/libc/startup/linux/do_start.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "startup/linux/do_start.h"
+#include "src/__support/OSUtil/pid.h"
 #include "src/__support/OSUtil/syscall.h"
 #include "src/__support/threads/thread.h"
 #include "src/stdlib/atexit.h"
@@ -63,6 +64,7 @@ static ThreadAttributes main_thread_attrib;
   auto tid = syscall_impl<long>(SYS_gettid);
   if (tid <= 0)
     syscall_impl<long>(SYS_exit, 1);
+  ProcessIdentity::refresh_cache();
   main_thread_attrib.tid = static_cast<int>(tid);
 
   // After the argv array, is a 8-byte long NULL value before the array of env

>From 0edf09f3bd9b2ce71c22a1ad7d3e95eee3e9249b Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Wed, 19 Jun 2024 12:04:21 -0700
Subject: [PATCH 3/5] add TLS-based fork-inflight guard

---
 libc/src/__support/OSUtil/linux/pid.cpp | 1 +
 libc/src/__support/OSUtil/pid.h         | 6 ++++--
 libc/src/unistd/linux/fork.cpp          | 9 +++++++--
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/libc/src/__support/OSUtil/linux/pid.cpp b/libc/src/__support/OSUtil/linux/pid.cpp
index 47ef00565cbe9..632e250e40f0a 100644
--- a/libc/src/__support/OSUtil/linux/pid.cpp
+++ b/libc/src/__support/OSUtil/linux/pid.cpp
@@ -13,6 +13,7 @@
 namespace LIBC_NAMESPACE {
 
 pid_t ProcessIdentity::cache = -1;
+thread_local bool ProcessIdentity::fork_inflight = false;
 pid_t ProcessIdentity::get_uncached() {
   return syscall_impl<pid_t>(SYS_getpid);
 }
diff --git a/libc/src/__support/OSUtil/pid.h b/libc/src/__support/OSUtil/pid.h
index 5fda6726308e1..6349f06125d30 100644
--- a/libc/src/__support/OSUtil/pid.h
+++ b/libc/src/__support/OSUtil/pid.h
@@ -14,14 +14,16 @@
 namespace LIBC_NAMESPACE {
 
 class ProcessIdentity {
+  static thread_local bool fork_inflight;
   static pid_t cache;
   static pid_t get_uncached();
 
 public:
-  LIBC_INLINE static void invalidate_cache() { cache = -1; }
+  LIBC_INLINE static void start_fork() { fork_inflight = true; }
+  LIBC_INLINE static void end_fork() { fork_inflight = false; }
   LIBC_INLINE static void refresh_cache() { cache = get_uncached(); }
   LIBC_INLINE static pid_t get() {
-    if (LIBC_UNLIKELY(cache < 0))
+    if (LIBC_UNLIKELY(fork_inflight))
       return get_uncached();
     return cache;
   }
diff --git a/libc/src/unistd/linux/fork.cpp b/libc/src/unistd/linux/fork.cpp
index 98721e2f5e1d8..f7fb5a9a42cd9 100644
--- a/libc/src/unistd/linux/fork.cpp
+++ b/libc/src/unistd/linux/fork.cpp
@@ -30,7 +30,7 @@ LLVM_LIBC_FUNCTION(pid_t, fork, (void)) {
   // getting wrong values. gettid() is not async-signal-safe, but let's provide
   // our best efforts here.
   self.invalidate_tid();
-  ProcessIdentity::invalidate_cache();
+  ProcessIdentity::start_fork();
 
 #ifdef SYS_fork
   pid_t ret = LIBC_NAMESPACE::syscall_impl<pid_t>(SYS_fork);
@@ -46,15 +46,20 @@ LLVM_LIBC_FUNCTION(pid_t, fork, (void)) {
     return -1;
   }
 
+  // Common
   // Refresh tid/pid cache after fork
   self.refresh_tid();
-  ProcessIdentity::refresh_cache();
 
+  // Child process
   if (ret == 0) {
+    ProcessIdentity::refresh_cache();
+    ProcessIdentity::end_fork();
     invoke_child_callbacks();
     return 0;
   }
 
+  // Parent process
+  ProcessIdentity::end_fork();
   invoke_parent_callbacks();
   return ret;
 }

>From 030fe3acc7715ca2097d750b35b08a9d9b628dfb Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Wed, 19 Jun 2024 12:33:03 -0700
Subject: [PATCH 4/5] add options and docs

---
 libc/config/config.json                   | 12 +++++++++++-
 libc/docs/configure.rst                   |  3 +++
 libc/docs/dev/undefined_behavior.rst      |  4 +++-
 libc/src/__support/OSUtil/CMakeLists.txt  |  8 ++++++++
 libc/src/__support/OSUtil/pid.h           |  9 +++++++++
 libc/src/__support/threads/CMakeLists.txt |  8 ++++++++
 libc/src/__support/threads/thread.h       |  9 +++++++++
 7 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/libc/config/config.json b/libc/config/config.json
index 8d6a84e732597..c7b570ac719d0 100644
--- a/libc/config/config.json
+++ b/libc/config/config.json
@@ -54,5 +54,15 @@
       "value": 100,
       "doc": "Default number of spins before blocking if a rwlock is in contention (default to 100)."
     }
+  },
+  "unistd": {
+    "LIBC_CONF_ENABLE_TID_CACHE": {
+      "value": true,
+      "doc": "Enable caching mechanism for gettid to avoid syscall (only effective in fullbuild mode, default to true). Please refer to Undefined Behavior documentation for implications."
+    },
+    "LIBC_CONF_ENABLE_PID_CACHE": {
+      "value": true,
+      "doc": "Enable caching mechanism for getpid to avoid syscall (default to true). Please refer to Undefined Behavior documentation for implications."
+    }
   }
-}
+}
\ No newline at end of file
diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst
index bdae6c54052f2..ef74161aae92e 100644
--- a/libc/docs/configure.rst
+++ b/libc/docs/configure.rst
@@ -41,3 +41,6 @@ to learn about the defaults for your platform and target.
 * **"string" options**
     - ``LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING``: Inserts prefetch for write instructions (PREFETCHW) for memset on x86 to recover performance when hardware prefetcher is disabled.
     - ``LIBC_CONF_STRING_UNSAFE_WIDE_READ``: Read more than a byte at a time to perform byte-string operations like strlen.
+* **"unistd" options**
+    - ``LIBC_CONF_ENABLE_PID_CACHE``: Enable caching mechanism for getpid to avoid syscall (default to true). Please refer to Undefined Behavior documentation for implications.
+    - ``LIBC_CONF_ENABLE_TID_CACHE``: Enable caching mechanism for gettid to avoid syscall (only effective in fulldefault to true). Please refer to Undefined Behavior documentation for implications.
diff --git a/libc/docs/dev/undefined_behavior.rst b/libc/docs/dev/undefined_behavior.rst
index 9f31aeff9ab97..9894441be0307 100644
--- a/libc/docs/dev/undefined_behavior.rst
+++ b/libc/docs/dev/undefined_behavior.rst
@@ -95,7 +95,9 @@ Cached ``getpid/gettid``
 Since version ``2.25``, glibc removes its cache mechanism for ``getpid/gettid`` 
 (See the history section in https://man7.org/linux/man-pages/man2/getpid.2.html).
 LLVM's libc still implements the cache as it is useful for fast deadlock detection.
-The cache mechanism is also implemented in MUSL and bionic.
+The cache mechanism is also implemented in MUSL and bionic. The tid/pid cache can 
+be disabled by setting ``LIBC_CONF_ENABLE_TID_CACHE`` and ``LIBC_CONF_ENABLE_PID_CACHE``
+to ``false`` respectively.
 
 Unwrapped ``SYS_clone/SYS_fork/SYS_vfork``
 ------------------------------------------
diff --git a/libc/src/__support/OSUtil/CMakeLists.txt b/libc/src/__support/OSUtil/CMakeLists.txt
index 2db72f40c6f18..517f888178718 100644
--- a/libc/src/__support/OSUtil/CMakeLists.txt
+++ b/libc/src/__support/OSUtil/CMakeLists.txt
@@ -16,11 +16,19 @@ add_object_library(
     ${target_os_util}
 )
 
+if (LIBC_CONF_ENABLE_PID_CACHE)
+  set(libc_copt_enable_pid_cache 1)
+else()
+  set(libc_copt_enable_pid_cache 0)
+endif()
+
 if(TARGET libc.src.__support.OSUtil.${LIBC_TARGET_OS}.pid)
   add_object_library(
     pid
     ALIAS
     DEPENDS
       .${LIBC_TARGET_OS}.pid
+    COMPILE_OPTIONS
+      -DLIBC_COPT_ENABLE_PID_CACHE=${libc_copt_enable_pid_cache}
   )
 endif()
diff --git a/libc/src/__support/OSUtil/pid.h b/libc/src/__support/OSUtil/pid.h
index 6349f06125d30..4183c82df3655 100644
--- a/libc/src/__support/OSUtil/pid.h
+++ b/libc/src/__support/OSUtil/pid.h
@@ -11,6 +11,11 @@
 #include "hdr/types/pid_t.h"
 #include "src/__support/macros/attributes.h"
 #include "src/__support/macros/optimization.h"
+
+#ifndef LIBC_COPT_ENABLE_PID_CACHE
+#define LIBC_COPT_ENABLE_PID_CACHE 1
+#endif
+
 namespace LIBC_NAMESPACE {
 
 class ProcessIdentity {
@@ -23,9 +28,13 @@ class ProcessIdentity {
   LIBC_INLINE static void end_fork() { fork_inflight = false; }
   LIBC_INLINE static void refresh_cache() { cache = get_uncached(); }
   LIBC_INLINE static pid_t get() {
+#if LIBC_COPT_ENABLE_PID_CACHE
     if (LIBC_UNLIKELY(fork_inflight))
       return get_uncached();
     return cache;
+#else
+    return get_uncached();
+#endif
   }
 };
 
diff --git a/libc/src/__support/threads/CMakeLists.txt b/libc/src/__support/threads/CMakeLists.txt
index ef8769ff9498e..55ea474388072 100644
--- a/libc/src/__support/threads/CMakeLists.txt
+++ b/libc/src/__support/threads/CMakeLists.txt
@@ -35,6 +35,12 @@ if(TARGET libc.src.__support.threads.${LIBC_TARGET_OS}.mutex)
   )
 endif()
 
+if (LIBC_CONF_ENABLE_TID_CACHE)
+  set(libc_copt_enable_tid_cache 1)
+else()
+  set(libc_copt_enable_tid_cache 0)
+endif()
+
 add_header_library(
   thread_common
   HDRS
@@ -46,6 +52,8 @@ add_header_library(
     libc.src.__support.CPP.string_view
     libc.src.__support.CPP.stringstream
     libc.hdr.types.pid_t
+  COMPILE_OPTIONS
+    -DLIBC_COPT_ENABLE_TID_CACHE=${libc_copt_enable_tid_cache}
 )
 
 if(TARGET libc.src.__support.threads.${LIBC_TARGET_OS}.thread)
diff --git a/libc/src/__support/threads/thread.h b/libc/src/__support/threads/thread.h
index e53ff9995f678..5f4d9a2224c36 100644
--- a/libc/src/__support/threads/thread.h
+++ b/libc/src/__support/threads/thread.h
@@ -9,6 +9,10 @@
 #ifndef LLVM_LIBC_SRC___SUPPORT_THREADS_THREAD_H
 #define LLVM_LIBC_SRC___SUPPORT_THREADS_THREAD_H
 
+#ifndef LIBC_COPT_ENABLE_TID_CACHE
+#define LIBC_COPT_ENABLE_TID_CACHE 1
+#endif
+
 #include "hdr/types/pid_t.h"
 #include "src/__support/CPP/atomic.h"
 #include "src/__support/CPP/optional.h"
@@ -230,10 +234,15 @@ struct Thread {
 
   LIBC_INLINE void refresh_tid() { this->attrib->tid = get_uncached_tid(); }
   LIBC_INLINE void invalidate_tid() { this->attrib->tid = -1; }
+
   LIBC_INLINE pid_t get_tid() {
+#if LIBC_COPT_ENABLE_TID_CACHE
     if (LIBC_UNLIKELY(this->attrib->tid < 0))
       return get_uncached_tid();
     return this->attrib->tid;
+#else
+    return get_uncached_tid();
+#endif
   }
 };
 

>From cf311c007d527e4587de6a4b56827602155497c7 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Wed, 19 Jun 2024 12:44:06 -0700
Subject: [PATCH 5/5] add integration test

---
 libc/docs/configure.rst                       |  2 +-
 .../integration/src/unistd/CMakeLists.txt     |  4 ++++
 .../test/integration/src/unistd/fork_test.cpp | 24 ++++++++++++++++++-
 3 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst
index ef74161aae92e..44aa7d79544ab 100644
--- a/libc/docs/configure.rst
+++ b/libc/docs/configure.rst
@@ -43,4 +43,4 @@ to learn about the defaults for your platform and target.
     - ``LIBC_CONF_STRING_UNSAFE_WIDE_READ``: Read more than a byte at a time to perform byte-string operations like strlen.
 * **"unistd" options**
     - ``LIBC_CONF_ENABLE_PID_CACHE``: Enable caching mechanism for getpid to avoid syscall (default to true). Please refer to Undefined Behavior documentation for implications.
-    - ``LIBC_CONF_ENABLE_TID_CACHE``: Enable caching mechanism for gettid to avoid syscall (only effective in fulldefault to true). Please refer to Undefined Behavior documentation for implications.
+    - ``LIBC_CONF_ENABLE_TID_CACHE``: Enable caching mechanism for gettid to avoid syscall (only effective in fullbuild mode, default to true). Please refer to Undefined Behavior documentation for implications.
diff --git a/libc/test/integration/src/unistd/CMakeLists.txt b/libc/test/integration/src/unistd/CMakeLists.txt
index 3f18231209512..f50405d0925e2 100644
--- a/libc/test/integration/src/unistd/CMakeLists.txt
+++ b/libc/test/integration/src/unistd/CMakeLists.txt
@@ -31,6 +31,10 @@ add_integration_test(
     libc.src.sys.wait.wait4
     libc.src.sys.wait.waitpid
     libc.src.unistd.fork
+    libc.src.unistd.getpid
+    libc.src.unistd.gettid
+    libc.src.stdlib.exit
+    libc.include.sys_syscall
 )
 
 if((${LIBC_TARGET_OS} STREQUAL "linux") AND (${LIBC_TARGET_ARCHITECTURE_IS_X86}))
diff --git a/libc/test/integration/src/unistd/fork_test.cpp b/libc/test/integration/src/unistd/fork_test.cpp
index 9c9213ed46316..4b82d5f195627 100644
--- a/libc/test/integration/src/unistd/fork_test.cpp
+++ b/libc/test/integration/src/unistd/fork_test.cpp
@@ -6,17 +6,21 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "src/__support/OSUtil/syscall.h"
 #include "src/pthread/pthread_atfork.h"
 #include "src/signal/raise.h"
+#include "src/stdlib/exit.h"
 #include "src/sys/wait/wait.h"
 #include "src/sys/wait/wait4.h"
 #include "src/sys/wait/waitpid.h"
 #include "src/unistd/fork.h"
-
+#include "src/unistd/getpid.h"
+#include "src/unistd/gettid.h"
 #include "test/IntegrationTest/test.h"
 
 #include <errno.h>
 #include <signal.h>
+#include <sys/syscall.h>
 #include <sys/wait.h>
 #include <unistd.h>
 
@@ -140,7 +144,25 @@ void fork_with_atfork_callbacks() {
   ASSERT_NE(child, DONE);
 }
 
+void fork_pid_tid_test() {
+  pid_t pid = fork();
+  ASSERT_TRUE(pid >= 0);
+  ASSERT_EQ(LIBC_NAMESPACE::gettid(),
+            LIBC_NAMESPACE::syscall_impl<pid_t>(SYS_gettid));
+  ASSERT_EQ(LIBC_NAMESPACE::getpid(),
+            LIBC_NAMESPACE::syscall_impl<pid_t>(SYS_getpid));
+
+  if (pid == 0) {
+    LIBC_NAMESPACE::exit(0);
+  } else {
+    int status;
+    LIBC_NAMESPACE::waitpid(pid, &status, 0);
+    ASSERT_EQ(status, 0);
+  }
+}
+
 TEST_MAIN(int argc, char **argv, char **envp) {
+  fork_pid_tid_test();
   fork_and_wait_normal_exit();
   fork_and_wait4_normal_exit();
   fork_and_waitpid_normal_exit();



More information about the libc-commits mailing list