[libc-commits] [libc] 859c189 - [libc] Linux threads - Setup TLS area of a new thread and cleanup at exit.

Siva Chandra Reddy via libc-commits libc-commits at lists.llvm.org
Wed Jul 13 13:46:25 PDT 2022


Author: Siva Chandra Reddy
Date: 2022-07-13T20:46:18Z
New Revision: 859c1897275ca33c31537381a862458df0653a69

URL: https://github.com/llvm/llvm-project/commit/859c1897275ca33c31537381a862458df0653a69
DIFF: https://github.com/llvm/llvm-project/commit/859c1897275ca33c31537381a862458df0653a69.diff

LOG: [libc] Linux threads - Setup TLS area of a new thread and cleanup at exit.

Reviewed By: lntue

Differential Revision: https://reviews.llvm.org/D129543

Added: 
    libc/test/integration/src/__support/threads/thread_tls_test.cpp

Modified: 
    libc/config/linux/app.h
    libc/loader/linux/aarch64/start.cpp
    libc/loader/linux/x86_64/CMakeLists.txt
    libc/loader/linux/x86_64/start.cpp
    libc/src/__support/threads/linux/CMakeLists.txt
    libc/src/__support/threads/linux/thread.cpp
    libc/src/__support/threads/thread.h
    libc/test/integration/src/__support/threads/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/libc/config/linux/app.h b/libc/config/linux/app.h
index e835367413abc..c4cf6dfea5bb4 100644
--- a/libc/config/linux/app.h
+++ b/libc/config/linux/app.h
@@ -15,8 +15,8 @@
 
 namespace __llvm_libc {
 
-// Data structure to capture properties of the linux/ELF TLS.
-struct TLS {
+// Data structure to capture properties of the linux/ELF TLS image.
+struct TLSImage {
   // The load address of the TLS.
   uintptr_t address;
 
@@ -68,8 +68,8 @@ struct AppProperties {
 
   Args *args;
 
-  // The properties of an application's TLS.
-  TLS tls;
+  // The properties of an application's TLS image.
+  TLSImage tls;
 
   // Environment data.
   uint64_t *envPtr;
@@ -77,9 +77,29 @@ struct AppProperties {
 
 extern AppProperties app;
 
-// Creates and initializes the TLS area for the current thread. Should not
+// The descriptor of a thread's TLS area.
+struct TLSDescriptor {
+  // The size of the TLS area.
+  uintptr_t size = 0;
+
+  // The address of the TLS area. This address can be passed to cleanup
+  // functions like munmap.
+  uintptr_t addr = 0;
+
+  // The value the thread pointer register should be initialized to.
+  // Note that, dependending the target architecture ABI, it can be the
+  // same as |addr| or something else.
+  uintptr_t tp = 0;
+
+  constexpr TLSDescriptor() = default;
+};
+
+// Create and initialize the TLS area for the current thread. Should not
 // be called before app.tls has been initialized.
-void initTLS();
+void init_tls(TLSDescriptor &tls);
+
+// Cleanup the TLS area as described in |tls_descriptor|.
+void cleanup_tls(uintptr_t tls_addr, uintptr_t tls_size);
 
 } // namespace __llvm_libc
 

diff  --git a/libc/loader/linux/aarch64/start.cpp b/libc/loader/linux/aarch64/start.cpp
index 53308c1e74723..5835c838078c3 100644
--- a/libc/loader/linux/aarch64/start.cpp
+++ b/libc/loader/linux/aarch64/start.cpp
@@ -14,7 +14,6 @@
 
 #include <linux/auxvec.h>
 #include <linux/elf.h>
-#include <stddef.h>
 #include <stdint.h>
 #include <sys/mman.h>
 #include <sys/syscall.h>
@@ -36,9 +35,12 @@ static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
 
 AppProperties app;
 
-void initTLS() {
-  if (app.tls.size == 0)
+void init_tls(TLSDescriptor &tls_descriptor) {
+  if (app.tls.size == 0) {
+    tls_descriptor.size = 0;
+    tls_descriptor.tp = 0;
     return;
+  }
 
   // aarch64 follows the variant 1 TLS layout:
   //
@@ -49,14 +51,14 @@ void initTLS() {
   //
   // The thread pointer points to the first entry.
 
-  const size_t size_of_pointers = 2 * sizeof(uintptr_t);
-  size_t padding = 0;
-  const size_t ALIGNMENT_MASK = app.tls.align - 1;
-  size_t 
diff  = size_of_pointers & ALIGNMENT_MASK;
+  const uintptr_t size_of_pointers = 2 * sizeof(uintptr_t);
+  uintptr_t padding = 0;
+  const uintptr_t ALIGNMENT_MASK = app.tls.align - 1;
+  uintptr_t 
diff  = size_of_pointers & ALIGNMENT_MASK;
   if (
diff  != 0)
     padding += (ALIGNMENT_MASK - 
diff ) + 1;
 
-  size_t alloc_size = size_of_pointers + padding + app.tls.size;
+  uintptr_t alloc_size = size_of_pointers + padding + app.tls.size;
 
   // We cannot call the mmap function here as the functions set errno on
   // failure. Since errno is implemented via a thread local variable, we cannot
@@ -73,9 +75,19 @@ void initTLS() {
   __llvm_libc::inline_memcpy(reinterpret_cast<char *>(tls_addr),
                              reinterpret_cast<const char *>(app.tls.address),
                              app.tls.init_size);
-  __arm_wsr64("tpidr_el0", thread_ptr);
+  tls_descriptor.size = alloc_size;
+  tls_descriptor.addr = thread_ptr;
+  tls_descriptor.tp = thread_ptr;
 }
 
+void cleanup_tls(uintptr_t addr, uintptr_t size) {
+  if (size == 0)
+    return;
+  __llvm_libc::syscall(SYS_munmap, addr, size);
+}
+
+static void set_thread_ptr(uintptr_t val) { __arm_wsr64("tpidr_el0", val); }
+
 } // namespace __llvm_libc
 
 using __llvm_libc::app;
@@ -134,9 +146,13 @@ extern "C" void _start() {
     app.tls.align = phdr->p_align;
   }
 
-  __llvm_libc::initTLS();
+  __llvm_libc::TLSDescriptor tls;
+  __llvm_libc::init_tls(tls);
+  if (tls.size != 0)
+    __llvm_libc::set_thread_ptr(tls.tp);
 
-  __llvm_libc::syscall(SYS_exit, main(app.args->argc,
-                                      reinterpret_cast<char **>(app.args->argv),
-                                      reinterpret_cast<char **>(env_ptr)));
+  int retval = main(app.args->argc, reinterpret_cast<char **>(app.args->argv),
+                    reinterpret_cast<char **>(env_ptr));
+  __llvm_libc::cleanup_tls(tls.addr, tls.size);
+  __llvm_libc::syscall(SYS_exit, retval);
 }

diff  --git a/libc/loader/linux/x86_64/CMakeLists.txt b/libc/loader/linux/x86_64/CMakeLists.txt
index 6c2e615290fa1..47c3d38e24e8e 100644
--- a/libc/loader/linux/x86_64/CMakeLists.txt
+++ b/libc/loader/linux/x86_64/CMakeLists.txt
@@ -11,4 +11,5 @@ add_loader_object(
   COMPILE_OPTIONS
     -fno-omit-frame-pointer
     -ffreestanding # To avoid compiler warnings about calling the main function.
+    -fno-builtin
 )

diff  --git a/libc/loader/linux/x86_64/start.cpp b/libc/loader/linux/x86_64/start.cpp
index dae0d970f221b..514caaf4dca63 100644
--- a/libc/loader/linux/x86_64/start.cpp
+++ b/libc/loader/linux/x86_64/start.cpp
@@ -33,9 +33,12 @@ AppProperties app;
 
 // TODO: The function is x86_64 specific. Move it to config/linux/app.h
 // and generalize it. Also, dynamic loading is not handled currently.
-void initTLS() {
-  if (app.tls.size == 0)
+void init_tls(TLSDescriptor &tls_descriptor) {
+  if (app.tls.size == 0) {
+    tls_descriptor.size = 0;
+    tls_descriptor.tp = 0;
     return;
+  }
 
   // We will assume the alignment is always a power of two.
   uintptr_t tlsSize = app.tls.size & -app.tls.align;
@@ -45,7 +48,7 @@ void initTLS() {
   // Per the x86_64 TLS ABI, the entry pointed to by the thread pointer is the
   // address of the TLS block. So, we add more size to accomodate this address
   // entry.
-  size_t tlsSizeWithAddr = tlsSize + sizeof(uintptr_t);
+  uintptr_t tlsSizeWithAddr = tlsSize + sizeof(uintptr_t);
 
   // We cannot call the mmap function here as the functions set errno on
   // failure. Since errno is implemented via a thread local variable, we cannot
@@ -67,8 +70,21 @@ void initTLS() {
   __llvm_libc::inline_memcpy(reinterpret_cast<char *>(tlsAddr),
                              reinterpret_cast<const char *>(app.tls.address),
                              app.tls.init_size);
-  if (__llvm_libc::syscall(SYS_arch_prctl, ARCH_SET_FS, endPtr) == -1)
-    __llvm_libc::syscall(SYS_exit, 1);
+
+  tls_descriptor = {tlsSizeWithAddr, uintptr_t(tlsAddr), endPtr};
+  return;
+}
+
+void cleanup_tls(uintptr_t addr, uintptr_t size) {
+  if (size == 0)
+    return;
+  __llvm_libc::syscall(SYS_munmap, addr, size);
+}
+
+// Sets the thread pointer to |val|. Returns true on success, false on failure.
+static bool set_thread_ptr(uintptr_t val) {
+  return __llvm_libc::syscall(SYS_arch_prctl, ARCH_SET_FS, val) == -1 ? false
+                                                                      : true;
 }
 
 } // namespace __llvm_libc
@@ -144,9 +160,13 @@ extern "C" void _start() {
     app.tls.align = phdr->p_align;
   }
 
-  __llvm_libc::initTLS();
+  __llvm_libc::TLSDescriptor tls;
+  __llvm_libc::init_tls(tls);
+  if (tls.size != 0 && !__llvm_libc::set_thread_ptr(tls.tp))
+    __llvm_libc::syscall(SYS_exit, 1);
 
-  __llvm_libc::syscall(SYS_exit, main(app.args->argc,
-                                      reinterpret_cast<char **>(app.args->argv),
-                                      reinterpret_cast<char **>(env_ptr)));
+  int retval = main(app.args->argc, reinterpret_cast<char **>(app.args->argv),
+                    reinterpret_cast<char **>(env_ptr));
+  __llvm_libc::cleanup_tls(tls.addr, tls.size);
+  __llvm_libc::syscall(SYS_exit, retval);
 }

diff  --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt
index 058e7c2a369ed..5c7eb3ad8683b 100644
--- a/libc/src/__support/threads/linux/CMakeLists.txt
+++ b/libc/src/__support/threads/linux/CMakeLists.txt
@@ -26,6 +26,7 @@ add_object_library(
     thread.cpp
   DEPENDS
     .futex_word_type
+    libc.config.linux.app_h
     libc.include.sys_syscall
     libc.src.__support.CPP.atomic
     libc.src.__support.CPP.error

diff  --git a/libc/src/__support/threads/linux/thread.cpp b/libc/src/__support/threads/linux/thread.cpp
index 89b3b4ff518a8..e5b73d43c0a4b 100644
--- a/libc/src/__support/threads/linux/thread.cpp
+++ b/libc/src/__support/threads/linux/thread.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/__support/threads/thread.h"
+#include "config/linux/app.h"
 #include "src/__support/CPP/atomic.h"
 #include "src/__support/CPP/error.h"
 #include "src/__support/OSUtil/syscall.h"           // For syscall functions.
@@ -42,11 +43,10 @@ static constexpr unsigned CLONE_SYSCALL_FLAGS =
     | CLONE_THREAD  // Same thread group as the parent.
     | CLONE_SYSVSEM // Share a single list of System V semaphore adjustment
                     // values
-    | CLONE_PARENT_SETTID   // Set child thread ID in |ptid| of the parent.
-    | CLONE_CHILD_CLEARTID; // Let the kernel clear the tid address
-                            // wake the joining thread.
-// TODO: Add the CLONE_SETTLS flag and setup the TLS area correctly
-// when making the clone syscall.
+    | CLONE_PARENT_SETTID  // Set child thread ID in |ptid| of the parent.
+    | CLONE_CHILD_CLEARTID // Let the kernel clear the tid address
+                           // wake the joining thread.
+    | CLONE_SETTLS;        // Setup the thread pointer of the new thread.
 
 static inline cpp::ErrorOr<void *> alloc_stack(size_t size) {
   long mmap_result =
@@ -80,6 +80,14 @@ struct alignas(STACK_ALIGNMENT) StartArgs {
   void *arg;
 };
 
+static void cleanup_thread_resources(ThreadAttributes *attrib) {
+  // Cleanup the TLS before the stack as the TLS information is stored on
+  // the stack.
+  cleanup_tls(attrib->tls, attrib->tls_size);
+  if (attrib->owned_stack)
+    free_stack(attrib->stack, attrib->stack_size);
+}
+
 __attribute__((always_inline)) inline uintptr_t get_start_args_addr() {
 // NOTE: For __builtin_frame_address to work reliably across compilers,
 // architectures and various optimization levels, the TU including this file
@@ -118,8 +126,7 @@ static void start_thread() {
   if (!attrib->detach_state.compare_exchange_strong(
           joinable_state, uint32_t(DetachState::EXITING))) {
     // Thread is detached so cleanup the resources.
-    if (attrib->owned_stack)
-      free_stack(attrib->stack, attrib->stack_size);
+    cleanup_thread_resources(attrib);
 
     // Set the CLEAR_TID address to nullptr to prevent the kernel
     // from signalling at a non-existent futex location.
@@ -143,6 +150,9 @@ int Thread::run(ThreadStyle style, ThreadRunner runner, void *arg, void *stack,
     owned_stack = true;
   }
 
+  TLSDescriptor tls;
+  init_tls(tls);
+
   // When the new thread is spawned by the kernel, the new thread gets the
   // stack we pass to the clone syscall. However, this stack is empty and does
   // not have any local vars present in this function. Hence, one cannot
@@ -167,6 +177,8 @@ int Thread::run(ThreadStyle style, ThreadRunner runner, void *arg, void *stack,
   attrib->stack = stack;
   attrib->stack_size = size;
   attrib->owned_stack = owned_stack;
+  attrib->tls = tls.addr;
+  attrib->tls_size = tls.size;
 
   start_args->thread_attrib = attrib;
   start_args->runner = runner;
@@ -187,14 +199,14 @@ int Thread::run(ThreadStyle style, ThreadRunner runner, void *arg, void *stack,
       SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack,
       &attrib->tid,    // The address where the child tid is written
       &clear_tid->val, // The futex where the child thread status is signalled
-      0                // Set TLS to null for now.
+      tls.tp           // The thread pointer value for the new thread.
   );
 #elif defined(LLVM_LIBC_ARCH_AARCH64)
   long register clone_result asm("x0");
   clone_result = __llvm_libc::syscall(
       SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack,
       &attrib->tid,   // The address where the child tid is written
-      0,              // Set TLS to null for now.
+      tls.tp,         // The thread pointer value for the new thread.
       &clear_tid->val // The futex where the child thread status is signalled
   );
 #else
@@ -209,8 +221,7 @@ int Thread::run(ThreadStyle style, ThreadRunner runner, void *arg, void *stack,
 #endif
     start_thread();
   } else if (clone_result < 0) {
-    if (attrib->owned_stack)
-      free_stack(attrib->stack, attrib->stack_size);
+    cleanup_thread_resources(attrib);
     return -clone_result;
   }
 
@@ -225,8 +236,7 @@ int Thread::join(ThreadReturnValue &retval) {
   else
     retval.stdc_retval = attrib->retval.stdc_retval;
 
-  if (attrib->owned_stack)
-    free_stack(attrib->stack, attrib->stack_size);
+  cleanup_thread_resources(attrib);
 
   return 0;
 }
@@ -243,8 +253,8 @@ int Thread::detach() {
   // and free up resources.
   wait();
 
-  if (attrib->owned_stack)
-    free_stack(attrib->stack, attrib->stack_size);
+  cleanup_thread_resources(attrib);
+
   return int(DetachType::CLEANUP);
 }
 

diff  --git a/libc/src/__support/threads/thread.h b/libc/src/__support/threads/thread.h
index dfafbed3b5f6c..7f965923391d1 100644
--- a/libc/src/__support/threads/thread.h
+++ b/libc/src/__support/threads/thread.h
@@ -81,9 +81,10 @@ struct alignas(STACK_ALIGNMENT) ThreadAttributes {
   //          exits. It will clean up the thread resources once the thread
   //          exits.
   cpp::Atomic<uint32_t> detach_state;
-  void *stack; // Pointer to the thread stack
-  void *tls;
+  void *stack;                   // Pointer to the thread stack
   unsigned long long stack_size; // Size of the stack
+  uintptr_t tls;                 // Address to the thread TLS memory
+  uintptr_t tls_size;            // The size of area pointed to by |tls|.
   unsigned char owned_stack; // Indicates if the thread owns this stack memory
   int tid;
   ThreadStyle style;
@@ -92,10 +93,8 @@ struct alignas(STACK_ALIGNMENT) ThreadAttributes {
 
   constexpr ThreadAttributes()
       : detach_state(uint32_t(DetachState::DETACHED)), stack(nullptr),
-        tls(nullptr), stack_size(0), owned_stack(false), tid(-1),
-        style(ThreadStyle::POSIX), retval(),
-        platform_data(nullptr) {
-  }
+        stack_size(0), tls(0), tls_size(0), owned_stack(false), tid(-1),
+        style(ThreadStyle::POSIX), retval(), platform_data(nullptr) {}
 };
 
 struct Thread {

diff  --git a/libc/test/integration/src/__support/threads/CMakeLists.txt b/libc/test/integration/src/__support/threads/CMakeLists.txt
index cbf117b7ef40e..48e0e2c0eb0f2 100644
--- a/libc/test/integration/src/__support/threads/CMakeLists.txt
+++ b/libc/test/integration/src/__support/threads/CMakeLists.txt
@@ -17,3 +17,15 @@ add_integration_test(
     libc.src.__support.threads.mutex
     libc.src.__support.threads.thread
 )
+
+add_integration_test(
+  thread_tls_test
+  SUITE
+    libc-support-threads-integration-tests
+  SRCS
+    thread_tls_test.cpp
+  LOADER
+    libc.loader.linux.crt1
+  DEPENDS
+    libc.src.__support.threads.thread
+)

diff  --git a/libc/test/integration/src/__support/threads/thread_tls_test.cpp b/libc/test/integration/src/__support/threads/thread_tls_test.cpp
new file mode 100644
index 0000000000000..3fdd2ae15f9fc
--- /dev/null
+++ b/libc/test/integration/src/__support/threads/thread_tls_test.cpp
@@ -0,0 +1,46 @@
+//===-- Test handling of thread local data --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/threads/thread.h"
+#include "utils/IntegrationTest/test.h"
+
+static constexpr int INIT_VAL = 100;
+static constexpr int UPDATE_VAL = 123;
+
+static thread_local int tlval = INIT_VAL;
+
+// This function updates the tlval and returns its old value.
+int func(void *) {
+  int old_tlval = tlval;
+  tlval = UPDATE_VAL;
+  return old_tlval;
+}
+
+void thread_local_test() {
+  int retval;
+
+  __llvm_libc::Thread th1;
+  th1.run(func, nullptr, nullptr, 0);
+  th1.join(&retval);
+  ASSERT_EQ(retval, INIT_VAL);
+
+  __llvm_libc::Thread th2;
+  th2.run(func, nullptr, nullptr, 0);
+  th2.join(&retval);
+  ASSERT_EQ(retval, INIT_VAL);
+}
+
+int main() {
+  // From the main thread, we will update the main thread's tlval.
+  // This should not affect the child thread's tlval;
+  ASSERT_EQ(tlval, INIT_VAL);
+  tlval = UPDATE_VAL;
+  ASSERT_EQ(tlval, UPDATE_VAL);
+  thread_local_test();
+  return 0;
+}


        


More information about the libc-commits mailing list