[libc-commits] [libc] 2bc9944 - [libc] major refactor of startup library (#76092)

via libc-commits libc-commits at lists.llvm.org
Thu Jan 4 12:51:18 PST 2024


Author: Schrodinger ZHU Yifan
Date: 2024-01-04T12:51:14-08:00
New Revision: 2bc994456c5be2ab6d98b94de2349302577a9823

URL: https://github.com/llvm/llvm-project/commit/2bc994456c5be2ab6d98b94de2349302577a9823
DIFF: https://github.com/llvm/llvm-project/commit/2bc994456c5be2ab6d98b94de2349302577a9823.diff

LOG: [libc] major refactor of startup library (#76092)

* separate initialization routines into _start and do_start for all
architectures.
* lift do_start as a separate object library to avoid code duplication.
* (addtionally) address the problem of building hermetic libc with
-fstack-pointer-*

The `crt1.o` is now a merged result of three components:

```
___
  |___ x86_64
  |      |_______ start.cpp.o    <- _start (loads process initial stack and aligns stack pointer)
  |      |_______ tls.cpp.o      <- init_tls, cleanup_tls, set_thread_pointer (TLS related routines) 
  |___ do_start.cpp.o            <- do_start (sets up global variables and invokes the main function) 
```

Added: 
    libc/startup/linux/aarch64/tls.cpp
    libc/startup/linux/do_start.cpp
    libc/startup/linux/do_start.h
    libc/startup/linux/riscv/tls.cpp
    libc/startup/linux/x86_64/tls.cpp

Modified: 
    libc/cmake/modules/LLVMLibCTestRules.cmake
    libc/config/linux/app.h
    libc/startup/linux/CMakeLists.txt
    libc/startup/linux/aarch64/CMakeLists.txt
    libc/startup/linux/aarch64/start.cpp
    libc/startup/linux/riscv/CMakeLists.txt
    libc/startup/linux/riscv/start.cpp
    libc/startup/linux/x86_64/CMakeLists.txt
    libc/startup/linux/x86_64/start.cpp

Removed: 
    


################################################################################
diff  --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake
index 51d484b875aeff..b69839afebf8a1 100644
--- a/libc/cmake/modules/LLVMLibCTestRules.cmake
+++ b/libc/cmake/modules/LLVMLibCTestRules.cmake
@@ -498,6 +498,9 @@ function(add_integration_test test_name)
       libc.src.string.memcpy
       libc.src.string.memmove
       libc.src.string.memset
+      # __stack_chk_fail should always be included to allow building libc with
+      # stack protector.
+      libc.src.compiler.__stack_chk_fail
   )
   list(REMOVE_DUPLICATES fq_deps_list)
 
@@ -665,6 +668,9 @@ function(add_libc_hermetic_test test_name)
       libc.src.string.memmove
       libc.src.string.memset
       libc.src.__support.StringUtil.error_to_string
+      # __stack_chk_fail should always be included to allow building libc with
+      # stack protector.
+      libc.src.compiler.__stack_chk_fail
   )
 
   if(TARGET libc.src.time.clock)

diff  --git a/libc/config/linux/app.h b/libc/config/linux/app.h
index 548c141fd70535..1b3523deb1b23e 100644
--- a/libc/config/linux/app.h
+++ b/libc/config/linux/app.h
@@ -119,6 +119,9 @@ void init_tls(TLSDescriptor &tls);
 // Cleanup the TLS area as described in |tls_descriptor|.
 void cleanup_tls(uintptr_t tls_addr, uintptr_t tls_size);
 
+// Set the thread pointer for the current thread.
+bool set_thread_ptr(uintptr_t val);
+
 } // namespace LIBC_NAMESPACE
 
 #endif // LLVM_LIBC_CONFIG_LINUX_APP_H

diff  --git a/libc/startup/linux/CMakeLists.txt b/libc/startup/linux/CMakeLists.txt
index 2d55a365669718..39bcca9cdba9fe 100644
--- a/libc/startup/linux/CMakeLists.txt
+++ b/libc/startup/linux/CMakeLists.txt
@@ -84,10 +84,33 @@ endif()
 
 add_subdirectory(${LIBC_TARGET_ARCHITECTURE})
 
+add_object_library(
+  do_start
+  SRCS
+    do_start.cpp
+  HDRS
+    do_start.h
+  DEPENDS
+    libc.config.linux.app_h
+    libc.include.sys_mman
+    libc.include.sys_syscall
+    libc.src.__support.threads.thread
+    libc.src.__support.OSUtil.osutil
+    libc.src.stdlib.exit
+    libc.src.stdlib.atexit
+    libc.src.unistd.environ
+  COMPILE_OPTIONS
+    -ffreestanding       # To avoid compiler warnings about calling the main function.
+    -fno-builtin         # avoid emit unexpected calls
+    -fno-stack-protector # stack protect canary is not available yet.
+)
+
 # TODO: factor out crt1 into multiple objects
 merge_relocatable_object(
   crt1
-  .${LIBC_TARGET_ARCHITECTURE}.crt1
+  .${LIBC_TARGET_ARCHITECTURE}.start
+  .${LIBC_TARGET_ARCHITECTURE}.tls
+  .do_start
 )
 
 add_startup_object(

diff  --git a/libc/startup/linux/aarch64/CMakeLists.txt b/libc/startup/linux/aarch64/CMakeLists.txt
index b47db8eb5d23f3..5ea6ae59abcb28 100644
--- a/libc/startup/linux/aarch64/CMakeLists.txt
+++ b/libc/startup/linux/aarch64/CMakeLists.txt
@@ -1,17 +1,24 @@
 add_startup_object(
-  crt1
+  tls
   SRC
-    start.cpp
+    tls.cpp
   DEPENDS
     libc.config.linux.app_h
     libc.include.sys_mman
     libc.include.sys_syscall
-    libc.src.__support.threads.thread
     libc.src.__support.OSUtil.osutil
-    libc.src.stdlib.exit
-    libc.src.stdlib.atexit
     libc.src.string.memory_utils.inline_memcpy
-    libc.src.unistd.environ
+  COMPILE_OPTIONS
+    -fno-omit-frame-pointer
+    -ffreestanding # To avoid compiler warnings about calling the main function.
+)
+
+add_startup_object(
+  start
+  SRC
+    start.cpp
+  DEPENDS
+    libc.config.linux.app_h
   COMPILE_OPTIONS
     -fno-omit-frame-pointer
     -ffreestanding # To avoid compiler warnings about calling the main function.

diff  --git a/libc/startup/linux/aarch64/start.cpp b/libc/startup/linux/aarch64/start.cpp
index bc01582aeb49c7..d0a85268733903 100644
--- a/libc/startup/linux/aarch64/start.cpp
+++ b/libc/startup/linux/aarch64/start.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of crt for aarch64 ---------------------------------===//
+//===-- Implementation of _start for aarch64 ------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,213 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "config/linux/app.h"
-#include "src/__support/OSUtil/syscall.h"
-#include "src/__support/threads/thread.h"
-#include "src/stdlib/atexit.h"
-#include "src/stdlib/exit.h"
-#include "src/string/memory_utils/inline_memcpy.h"
-
-#include <arm_acle.h>
-
-#include <linux/auxvec.h>
-#include <linux/elf.h>
-#include <stdint.h>
-#include <sys/mman.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-
-extern "C" int main(int, char **, char **);
-
-// Source documentation:
-// https://github.com/ARM-software/abi-aa/tree/main/sysvabi64
-
-namespace LIBC_NAMESPACE {
-
-#ifdef SYS_mmap2
-static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
-#elif SYS_mmap
-static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
-#else
-#error "mmap and mmap2 syscalls not available."
-#endif
-
-AppProperties app;
-
-static ThreadAttributes main_thread_attrib;
-
-void init_tls(TLSDescriptor &tls_descriptor) {
-  if (app.tls.size == 0) {
-    tls_descriptor.size = 0;
-    tls_descriptor.tp = 0;
-    return;
-  }
-
-  // aarch64 follows the variant 1 TLS layout:
-  //
-  // 1. First entry is the dynamic thread vector pointer
-  // 2. Second entry is a 8-byte reserved word.
-  // 3. Padding for alignment.
-  // 4. The TLS data from the ELF image.
-  //
-  // The thread pointer points to the first entry.
-
-  const uintptr_t size_of_pointers = 2 * sizeof(uintptr_t);
-  uintptr_t padding = 0;
-  const uintptr_t ALIGNMENT_MASK = app.tls.align - 1;
-  uintptr_t 
diff  = size_of_pointers & ALIGNMENT_MASK;
-  if (
diff  != 0)
-    padding += (ALIGNMENT_MASK - 
diff ) + 1;
-
-  uintptr_t alloc_size = size_of_pointers + padding + app.tls.size;
-
-  // We cannot call the mmap function here as the functions set errno on
-  // failure. Since errno is implemented via a thread local variable, we cannot
-  // use errno before TLS is setup.
-  long mmap_ret_val = LIBC_NAMESPACE::syscall_impl<long>(
-      MMAP_SYSCALL_NUMBER, nullptr, alloc_size, PROT_READ | PROT_WRITE,
-      MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-  // We cannot check the return value with MAP_FAILED as that is the return
-  // of the mmap function and not the mmap syscall.
-  if (mmap_ret_val < 0 && static_cast<uintptr_t>(mmap_ret_val) > -app.page_size)
-    LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, 1);
-  uintptr_t thread_ptr = uintptr_t(reinterpret_cast<uintptr_t *>(mmap_ret_val));
-  uintptr_t tls_addr = thread_ptr + size_of_pointers + padding;
-  LIBC_NAMESPACE::inline_memcpy(reinterpret_cast<char *>(tls_addr),
-                                reinterpret_cast<const char *>(app.tls.address),
-                                app.tls.init_size);
-  tls_descriptor.size = alloc_size;
-  tls_descriptor.addr = thread_ptr;
-  tls_descriptor.tp = thread_ptr;
-}
-
-void cleanup_tls(uintptr_t addr, uintptr_t size) {
-  if (size == 0)
-    return;
-  LIBC_NAMESPACE::syscall_impl<long>(SYS_munmap, addr, size);
-}
-
-static void set_thread_ptr(uintptr_t val) { __arm_wsr64("tpidr_el0", val); }
-
-using InitCallback = void(int, char **, char **);
-using FiniCallback = void(void);
-
-extern "C" {
-// These arrays are present in the .init_array and .fini_array sections.
-// The symbols are inserted by linker when it sees references to them.
-extern uintptr_t __preinit_array_start[];
-extern uintptr_t __preinit_array_end[];
-extern uintptr_t __init_array_start[];
-extern uintptr_t __init_array_end[];
-extern uintptr_t __fini_array_start[];
-extern uintptr_t __fini_array_end[];
-}
-
-static void call_init_array_callbacks(int argc, char **argv, char **env) {
-  size_t preinit_array_size = __preinit_array_end - __preinit_array_start;
-  for (size_t i = 0; i < preinit_array_size; ++i)
-    reinterpret_cast<InitCallback *>(__preinit_array_start[i])(argc, argv, env);
-  size_t init_array_size = __init_array_end - __init_array_start;
-  for (size_t i = 0; i < init_array_size; ++i)
-    reinterpret_cast<InitCallback *>(__init_array_start[i])(argc, argv, env);
-}
-
-static void call_fini_array_callbacks() {
-  size_t fini_array_size = __fini_array_end - __fini_array_start;
-  for (size_t i = fini_array_size; i > 0; --i)
-    reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])();
-}
-
-} // namespace LIBC_NAMESPACE
-
-using LIBC_NAMESPACE::app;
-using LIBC_NAMESPACE::AuxEntry;
-
-__attribute__((noinline)) static void do_start() {
-  auto tid = LIBC_NAMESPACE::syscall_impl<long>(SYS_gettid);
-  if (tid <= 0)
-    LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, 1);
-  LIBC_NAMESPACE::main_thread_attrib.tid = static_cast<int>(tid);
-
-  // After the argv array, is a 8-byte long NULL value before the array of env
-  // values. The end of the env values is marked by another 8-byte long NULL
-  // value. We step over it (the "+ 1" below) to get to the env values.
-  uint64_t *env_ptr = app.args->argv + app.args->argc + 1;
-  uint64_t *env_end_marker = env_ptr;
-  app.env_ptr = env_ptr;
-  while (*env_end_marker)
-    ++env_end_marker;
-
-  // Initialize the POSIX global declared in unistd.h
-  environ = reinterpret_cast<char **>(env_ptr);
-
-  // After the env array, is the aux-vector. The end of the aux-vector is
-  // denoted by an AT_NULL entry.
-  Elf64_Phdr *program_hdr_table = nullptr;
-  uintptr_t program_hdr_count;
-  app.auxv_ptr = reinterpret_cast<AuxEntry *>(env_end_marker + 1);
-  for (auto *aux_entry = app.auxv_ptr; aux_entry->id != AT_NULL; ++aux_entry) {
-    switch (aux_entry->id) {
-    case AT_PHDR:
-      program_hdr_table = reinterpret_cast<Elf64_Phdr *>(aux_entry->value);
-      break;
-    case AT_PHNUM:
-      program_hdr_count = aux_entry->value;
-      break;
-    case AT_PAGESZ:
-      app.page_size = aux_entry->value;
-      break;
-    default:
-      break; // TODO: Read other useful entries from the aux vector.
-    }
-  }
-
-  app.tls.size = 0;
-  for (uintptr_t i = 0; i < program_hdr_count; ++i) {
-    Elf64_Phdr *phdr = program_hdr_table + i;
-    if (phdr->p_type != PT_TLS)
-      continue;
-    // TODO: p_vaddr value has to be adjusted for static-pie executables.
-    app.tls.address = phdr->p_vaddr;
-    app.tls.size = phdr->p_memsz;
-    app.tls.init_size = phdr->p_filesz;
-    app.tls.align = phdr->p_align;
-  }
-
-  // This descriptor has to be static since its cleanup function cannot
-  // capture the context.
-  static LIBC_NAMESPACE::TLSDescriptor tls;
-  LIBC_NAMESPACE::init_tls(tls);
-  if (tls.size != 0)
-    LIBC_NAMESPACE::set_thread_ptr(tls.tp);
-
-  LIBC_NAMESPACE::self.attrib = &LIBC_NAMESPACE::main_thread_attrib;
-  LIBC_NAMESPACE::main_thread_attrib.atexit_callback_mgr =
-      LIBC_NAMESPACE::internal::get_thread_atexit_callback_mgr();
-  // We register the cleanup_tls function to be the last atexit callback to be
-  // invoked. It will tear down the TLS. Other callbacks may depend on TLS (such
-  // as the stack protector canary).
-  LIBC_NAMESPACE::atexit(
-      []() { LIBC_NAMESPACE::cleanup_tls(tls.tp, tls.size); });
-  // We want the fini array callbacks to be run after other atexit
-  // callbacks are run. So, we register them before running the init
-  // array callbacks as they can potentially register their own atexit
-  // callbacks.
-  LIBC_NAMESPACE::atexit(&LIBC_NAMESPACE::call_fini_array_callbacks);
-
-  LIBC_NAMESPACE::call_init_array_callbacks(
-      static_cast<int>(app.args->argc),
-      reinterpret_cast<char **>(app.args->argv),
-      reinterpret_cast<char **>(env_ptr));
-
-  int retval = main(static_cast<int>(app.args->argc),
-                    reinterpret_cast<char **>(app.args->argv),
-                    reinterpret_cast<char **>(env_ptr));
-
-  LIBC_NAMESPACE::exit(retval);
-}
-
-extern "C" void _start() {
+#include "startup/linux/do_start.h"
+extern "C" [[noreturn]] void _start() {
   // Skip the Frame Pointer and the Link Register
   // https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst
   // Section 6.2.3. Note that this only works if the current function
@@ -223,7 +18,7 @@ extern "C" void _start() {
   // will take us to the previous stack pointer. That is the reason why the
   // actual business logic of the startup code is pushed into a non-inline
   // function do_start so that this function is free of any stack usage.
-  app.args = reinterpret_cast<LIBC_NAMESPACE::Args *>(
+  LIBC_NAMESPACE::app.args = reinterpret_cast<LIBC_NAMESPACE::Args *>(
       reinterpret_cast<uintptr_t *>(__builtin_frame_address(0)) + 2);
-  do_start();
+  LIBC_NAMESPACE::do_start();
 }

diff  --git a/libc/startup/linux/aarch64/tls.cpp b/libc/startup/linux/aarch64/tls.cpp
new file mode 100644
index 00000000000000..f2579e821b1bf2
--- /dev/null
+++ b/libc/startup/linux/aarch64/tls.cpp
@@ -0,0 +1,86 @@
+//===-- Implementation of tls for aarch64 ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/OSUtil/syscall.h"
+#include "src/__support/threads/thread.h"
+#include "src/string/memory_utils/inline_memcpy.h"
+#include "startup/linux/do_start.h"
+
+#include <arm_acle.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+// Source documentation:
+// https://github.com/ARM-software/abi-aa/tree/main/sysvabi64
+
+namespace LIBC_NAMESPACE {
+
+#ifdef SYS_mmap2
+static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
+#elif SYS_mmap
+static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
+#else
+#error "mmap and mmap2 syscalls not available."
+#endif
+
+void init_tls(TLSDescriptor &tls_descriptor) {
+  if (app.tls.size == 0) {
+    tls_descriptor.size = 0;
+    tls_descriptor.tp = 0;
+    return;
+  }
+
+  // aarch64 follows the variant 1 TLS layout:
+  //
+  // 1. First entry is the dynamic thread vector pointer
+  // 2. Second entry is a 8-byte reserved word.
+  // 3. Padding for alignment.
+  // 4. The TLS data from the ELF image.
+  //
+  // The thread pointer points to the first entry.
+
+  const uintptr_t size_of_pointers = 2 * sizeof(uintptr_t);
+  uintptr_t padding = 0;
+  const uintptr_t ALIGNMENT_MASK = app.tls.align - 1;
+  uintptr_t 
diff  = size_of_pointers & ALIGNMENT_MASK;
+  if (
diff  != 0)
+    padding += (ALIGNMENT_MASK - 
diff ) + 1;
+
+  uintptr_t alloc_size = size_of_pointers + padding + app.tls.size;
+
+  // We cannot call the mmap function here as the functions set errno on
+  // failure. Since errno is implemented via a thread local variable, we cannot
+  // use errno before TLS is setup.
+  long mmap_ret_val = syscall_impl<long>(MMAP_SYSCALL_NUMBER, nullptr,
+                                         alloc_size, PROT_READ | PROT_WRITE,
+                                         MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+  // We cannot check the return value with MAP_FAILED as that is the return
+  // of the mmap function and not the mmap syscall.
+  if (mmap_ret_val < 0 && static_cast<uintptr_t>(mmap_ret_val) > -app.page_size)
+    syscall_impl<long>(SYS_exit, 1);
+  uintptr_t thread_ptr = uintptr_t(reinterpret_cast<uintptr_t *>(mmap_ret_val));
+  uintptr_t tls_addr = thread_ptr + size_of_pointers + padding;
+  inline_memcpy(reinterpret_cast<char *>(tls_addr),
+                reinterpret_cast<const char *>(app.tls.address),
+                app.tls.init_size);
+  tls_descriptor.size = alloc_size;
+  tls_descriptor.addr = thread_ptr;
+  tls_descriptor.tp = thread_ptr;
+}
+
+void cleanup_tls(uintptr_t addr, uintptr_t size) {
+  if (size == 0)
+    return;
+  syscall_impl<long>(SYS_munmap, addr, size);
+}
+
+bool set_thread_ptr(uintptr_t val) {
+  __arm_wsr64("tpidr_el0", val);
+  return true;
+}
+} // namespace LIBC_NAMESPACE

diff  --git a/libc/startup/linux/do_start.cpp b/libc/startup/linux/do_start.cpp
new file mode 100644
index 00000000000000..05dbd4488f5882
--- /dev/null
+++ b/libc/startup/linux/do_start.cpp
@@ -0,0 +1,140 @@
+//===-- Implementation file of do_start -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "startup/linux/do_start.h"
+#include "src/__support/OSUtil/syscall.h"
+#include "src/__support/threads/thread.h"
+#include "src/stdlib/atexit.h"
+#include "src/stdlib/exit.h"
+#include "src/unistd/environ.h"
+
+#include <linux/auxvec.h>
+#include <linux/elf.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+extern "C" int main(int argc, char **argv, char **envp);
+
+extern "C" {
+// These arrays are present in the .init_array and .fini_array sections.
+// The symbols are inserted by linker when it sees references to them.
+extern uintptr_t __preinit_array_start[];
+extern uintptr_t __preinit_array_end[];
+extern uintptr_t __init_array_start[];
+extern uintptr_t __init_array_end[];
+extern uintptr_t __fini_array_start[];
+extern uintptr_t __fini_array_end[];
+}
+
+namespace LIBC_NAMESPACE {
+// TODO: this symbol will be moved to config.linux.app
+AppProperties app;
+
+using InitCallback = void(int, char **, char **);
+using FiniCallback = void(void);
+
+static void call_init_array_callbacks(int argc, char **argv, char **env) {
+  size_t preinit_array_size = __preinit_array_end - __preinit_array_start;
+  for (size_t i = 0; i < preinit_array_size; ++i)
+    reinterpret_cast<InitCallback *>(__preinit_array_start[i])(argc, argv, env);
+  size_t init_array_size = __init_array_end - __init_array_start;
+  for (size_t i = 0; i < init_array_size; ++i)
+    reinterpret_cast<InitCallback *>(__init_array_start[i])(argc, argv, env);
+}
+
+static void call_fini_array_callbacks() {
+  size_t fini_array_size = __fini_array_end - __fini_array_start;
+  for (size_t i = fini_array_size; i > 0; --i)
+    reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])();
+}
+
+static ThreadAttributes main_thread_attrib;
+
+[[noreturn]] void do_start() {
+  auto tid = syscall_impl<long>(SYS_gettid);
+  if (tid <= 0)
+    syscall_impl<long>(SYS_exit, 1);
+  main_thread_attrib.tid = static_cast<int>(tid);
+
+  // After the argv array, is a 8-byte long NULL value before the array of env
+  // values. The end of the env values is marked by another 8-byte long NULL
+  // value. We step over it (the "+ 1" below) to get to the env values.
+  ArgVEntryType *env_ptr = app.args->argv + app.args->argc + 1;
+  ArgVEntryType *env_end_marker = env_ptr;
+  app.env_ptr = env_ptr;
+  while (*env_end_marker)
+    ++env_end_marker;
+
+  // Initialize the POSIX global declared in unistd.h
+  environ = reinterpret_cast<char **>(env_ptr);
+
+  // After the env array, is the aux-vector. The end of the aux-vector is
+  // denoted by an AT_NULL entry.
+  Elf64_Phdr *program_hdr_table = nullptr;
+  uintptr_t program_hdr_count = 0;
+  app.auxv_ptr = reinterpret_cast<AuxEntry *>(env_end_marker + 1);
+  for (auto *aux_entry = app.auxv_ptr; aux_entry->id != AT_NULL; ++aux_entry) {
+    switch (aux_entry->id) {
+    case AT_PHDR:
+      program_hdr_table = reinterpret_cast<Elf64_Phdr *>(aux_entry->value);
+      break;
+    case AT_PHNUM:
+      program_hdr_count = aux_entry->value;
+      break;
+    case AT_PAGESZ:
+      app.page_size = aux_entry->value;
+      break;
+    default:
+      break; // TODO: Read other useful entries from the aux vector.
+    }
+  }
+
+  app.tls.size = 0;
+  for (uintptr_t i = 0; i < program_hdr_count; ++i) {
+    Elf64_Phdr *phdr = program_hdr_table + i;
+    if (phdr->p_type != PT_TLS)
+      continue;
+    // TODO: p_vaddr value has to be adjusted for static-pie executables.
+    app.tls.address = phdr->p_vaddr;
+    app.tls.size = phdr->p_memsz;
+    app.tls.init_size = phdr->p_filesz;
+    app.tls.align = phdr->p_align;
+  }
+
+  // This descriptor has to be static since its cleanup function cannot
+  // capture the context.
+  static TLSDescriptor tls;
+  init_tls(tls);
+  if (tls.size != 0 && !set_thread_ptr(tls.tp))
+    syscall_impl<long>(SYS_exit, 1);
+
+  self.attrib = &main_thread_attrib;
+  main_thread_attrib.atexit_callback_mgr =
+      internal::get_thread_atexit_callback_mgr();
+  // We register the cleanup_tls function to be the last atexit callback to be
+  // invoked. It will tear down the TLS. Other callbacks may depend on TLS (such
+  // as the stack protector canary).
+  atexit([]() { cleanup_tls(tls.tp, tls.size); });
+  // We want the fini array callbacks to be run after other atexit
+  // callbacks are run. So, we register them before running the init
+  // array callbacks as they can potentially register their own atexit
+  // callbacks.
+  atexit(&call_fini_array_callbacks);
+
+  call_init_array_callbacks(static_cast<int>(app.args->argc),
+                            reinterpret_cast<char **>(app.args->argv),
+                            reinterpret_cast<char **>(env_ptr));
+
+  int retval = main(static_cast<int>(app.args->argc),
+                    reinterpret_cast<char **>(app.args->argv),
+                    reinterpret_cast<char **>(env_ptr));
+
+  exit(retval);
+}
+
+} // namespace LIBC_NAMESPACE

diff  --git a/libc/startup/linux/do_start.h b/libc/startup/linux/do_start.h
new file mode 100644
index 00000000000000..a0e7a3cd695627
--- /dev/null
+++ b/libc/startup/linux/do_start.h
@@ -0,0 +1,14 @@
+//===-- Header file of do_start -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "config/linux/app.h"
+
+namespace LIBC_NAMESPACE {
+// setup the libc runtime and invoke the main routine.
+[[noreturn]] void do_start();
+} // namespace LIBC_NAMESPACE

diff  --git a/libc/startup/linux/riscv/CMakeLists.txt b/libc/startup/linux/riscv/CMakeLists.txt
index b47db8eb5d23f3..3717784233c151 100644
--- a/libc/startup/linux/riscv/CMakeLists.txt
+++ b/libc/startup/linux/riscv/CMakeLists.txt
@@ -1,17 +1,25 @@
 add_startup_object(
-  crt1
+  tls
   SRC
-    start.cpp
+    tls.cpp
   DEPENDS
     libc.config.linux.app_h
     libc.include.sys_mman
     libc.include.sys_syscall
-    libc.src.__support.threads.thread
     libc.src.__support.OSUtil.osutil
-    libc.src.stdlib.exit
-    libc.src.stdlib.atexit
     libc.src.string.memory_utils.inline_memcpy
-    libc.src.unistd.environ
+  COMPILE_OPTIONS
+    -fno-omit-frame-pointer
+    -ffreestanding # To avoid compiler warnings about calling the main function.
+)
+
+add_startup_object(
+  start
+  SRC
+    start.cpp
+  DEPENDS
+    libc.config.linux.app_h
+    libc.src.__support.macros.attributes
   COMPILE_OPTIONS
     -fno-omit-frame-pointer
     -ffreestanding # To avoid compiler warnings about calling the main function.

diff  --git a/libc/startup/linux/riscv/start.cpp b/libc/startup/linux/riscv/start.cpp
index 5b6e5bde8da81d..389f71a66d30ac 100644
--- a/libc/startup/linux/riscv/start.cpp
+++ b/libc/startup/linux/riscv/start.cpp
@@ -1,223 +1,20 @@
-//===-- Implementation of crt for riscv64 ---------------------------------===//
+//===-- Implementation of _start for riscv --------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-
-#include "config/linux/app.h"
-#include "src/__support/OSUtil/syscall.h"
-#include "src/__support/threads/thread.h"
-#include "src/stdlib/atexit.h"
-#include "src/stdlib/exit.h"
-#include "src/string/memory_utils/inline_memcpy.h"
-
-#include <linux/auxvec.h>
-#include <linux/elf.h>
-#include <stdint.h>
-#include <sys/mman.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-
-extern "C" int main(int, char **, char **);
-
-namespace LIBC_NAMESPACE {
-
-#ifdef SYS_mmap2
-static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
-#elif SYS_mmap
-static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
-#else
-#error "mmap and mmap2 syscalls not available."
-#endif
-
-AppProperties app;
-
-static ThreadAttributes main_thread_attrib;
-
-void init_tls(TLSDescriptor &tls_descriptor) {
-  if (app.tls.size == 0) {
-    tls_descriptor.size = 0;
-    tls_descriptor.tp = 0;
-    return;
-  }
-
-  // riscv64 follows the variant 1 TLS layout:
-  const uintptr_t size_of_pointers = 2 * sizeof(uintptr_t);
-  uintptr_t padding = 0;
-  const uintptr_t ALIGNMENT_MASK = app.tls.align - 1;
-  uintptr_t 
diff  = size_of_pointers & ALIGNMENT_MASK;
-  if (
diff  != 0)
-    padding += (ALIGNMENT_MASK - 
diff ) + 1;
-
-  uintptr_t alloc_size = size_of_pointers + padding + app.tls.size;
-
-  // We cannot call the mmap function here as the functions set errno on
-  // failure. Since errno is implemented via a thread local variable, we cannot
-  // use errno before TLS is setup.
-  long mmap_ret_val = LIBC_NAMESPACE::syscall_impl<long>(
-      MMAP_SYSCALL_NUMBER, nullptr, alloc_size, PROT_READ | PROT_WRITE,
-      MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-  // We cannot check the return value with MAP_FAILED as that is the return
-  // of the mmap function and not the mmap syscall.
-  if (mmap_ret_val < 0 && static_cast<uintptr_t>(mmap_ret_val) > -app.page_size)
-    LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, 1);
-  uintptr_t thread_ptr = uintptr_t(reinterpret_cast<uintptr_t *>(mmap_ret_val));
-  uintptr_t tls_addr = thread_ptr + size_of_pointers + padding;
-  LIBC_NAMESPACE::inline_memcpy(reinterpret_cast<char *>(tls_addr),
-                                reinterpret_cast<const char *>(app.tls.address),
-                                app.tls.init_size);
-  tls_descriptor.size = alloc_size;
-  tls_descriptor.addr = thread_ptr;
-  tls_descriptor.tp = tls_addr;
-}
-
-void cleanup_tls(uintptr_t addr, uintptr_t size) {
-  if (size == 0)
-    return;
-  LIBC_NAMESPACE::syscall_impl<long>(SYS_munmap, addr, size);
-}
-
-static void set_thread_ptr(uintptr_t val) {
-  LIBC_INLINE_ASM("mv tp, %0\n\t" : : "r"(val));
-}
-
-using InitCallback = void(int, char **, char **);
-using FiniCallback = void(void);
-
-extern "C" {
-// These arrays are present in the .init_array and .fini_array sections.
-// The symbols are inserted by linker when it sees references to them.
-extern uintptr_t __preinit_array_start[];
-extern uintptr_t __preinit_array_end[];
-extern uintptr_t __init_array_start[];
-extern uintptr_t __init_array_end[];
-extern uintptr_t __fini_array_start[];
-extern uintptr_t __fini_array_end[];
-}
-
-static void call_init_array_callbacks(int argc, char **argv, char **env) {
-  size_t preinit_array_size = __preinit_array_end - __preinit_array_start;
-  for (size_t i = 0; i < preinit_array_size; ++i)
-    reinterpret_cast<InitCallback *>(__preinit_array_start[i])(argc, argv, env);
-  size_t init_array_size = __init_array_end - __init_array_start;
-  for (size_t i = 0; i < init_array_size; ++i)
-    reinterpret_cast<InitCallback *>(__init_array_start[i])(argc, argv, env);
-}
-
-static void call_fini_array_callbacks() {
-  size_t fini_array_size = __fini_array_end - __fini_array_start;
-  for (size_t i = fini_array_size; i > 0; --i)
-    reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])();
-}
-
-} // namespace LIBC_NAMESPACE
-
-using LIBC_NAMESPACE::app;
-using LIBC_NAMESPACE::AuxEntry;
-
-#if defined(LIBC_TARGET_ARCH_IS_X86_64) ||                                     \
-    defined(LIBC_TARGET_ARCH_IS_AARCH64) ||                                    \
-    defined(LIBC_TARGET_ARCH_IS_RISCV64)
-typedef Elf64_Phdr PgrHdrTableType;
-#elif defined(LIBC_TARGET_ARCH_IS_RISCV32)
-typedef Elf32_Phdr PgrHdrTableType;
-#else
-#error "Program header table type is not defined for the target platform."
-#endif
-
-__attribute__((noinline)) static void do_start() {
-  LIBC_INLINE_ASM(".option push\n\t"
-                  ".option norelax\n\t"
-                  "lla gp, __global_pointer$\n\t"
-                  ".option pop\n\t");
-  auto tid = LIBC_NAMESPACE::syscall_impl<long>(SYS_gettid);
-  if (tid <= 0)
-    LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, 1);
-  LIBC_NAMESPACE::main_thread_attrib.tid = static_cast<int>(tid);
-
-  // After the argv array, is a 8-byte long NULL value before the array of env
-  // values. The end of the env values is marked by another 8-byte long NULL
-  // value. We step over it (the "+ 1" below) to get to the env values.
-  LIBC_NAMESPACE::ArgVEntryType *env_ptr = app.args->argv + app.args->argc + 1;
-  LIBC_NAMESPACE::ArgVEntryType *env_end_marker = env_ptr;
-  app.env_ptr = env_ptr;
-  while (*env_end_marker)
-    ++env_end_marker;
-
-  // Initialize the POSIX global declared in unistd.h
-  environ = reinterpret_cast<char **>(env_ptr);
-
-  // After the env array, is the aux-vector. The end of the aux-vector is
-  // denoted by an AT_NULL entry.
-  PgrHdrTableType *program_hdr_table = nullptr;
-  uintptr_t program_hdr_count;
-  app.auxv_ptr = reinterpret_cast<AuxEntry *>(env_end_marker + 1);
-  for (auto *aux_entry = app.auxv_ptr; aux_entry->id != AT_NULL; ++aux_entry) {
-    switch (aux_entry->id) {
-    case AT_PHDR:
-      program_hdr_table = reinterpret_cast<PgrHdrTableType *>(aux_entry->value);
-      break;
-    case AT_PHNUM:
-      program_hdr_count = aux_entry->value;
-      break;
-    case AT_PAGESZ:
-      app.page_size = aux_entry->value;
-      break;
-    default:
-      break; // TODO: Read other useful entries from the aux vector.
-    }
-  }
-
-  app.tls.size = 0;
-  for (uintptr_t i = 0; i < program_hdr_count; ++i) {
-    PgrHdrTableType *phdr = program_hdr_table + i;
-    if (phdr->p_type != PT_TLS)
-      continue;
-    // TODO: p_vaddr value has to be adjusted for static-pie executables.
-    app.tls.address = phdr->p_vaddr;
-    app.tls.size = phdr->p_memsz;
-    app.tls.init_size = phdr->p_filesz;
-    app.tls.align = phdr->p_align;
-  }
-
-  // This descriptor has to be static since its cleanup function cannot
-  // capture the context.
-  static LIBC_NAMESPACE::TLSDescriptor tls;
-  LIBC_NAMESPACE::init_tls(tls);
-  if (tls.size != 0)
-    LIBC_NAMESPACE::set_thread_ptr(tls.tp);
-
-  LIBC_NAMESPACE::self.attrib = &LIBC_NAMESPACE::main_thread_attrib;
-  LIBC_NAMESPACE::main_thread_attrib.atexit_callback_mgr =
-      LIBC_NAMESPACE::internal::get_thread_atexit_callback_mgr();
-  // We register the cleanup_tls function to be the last atexit callback to be
-  // invoked. It will tear down the TLS. Other callbacks may depend on TLS (such
-  // as the stack protector canary).
-  LIBC_NAMESPACE::atexit(
-      []() { LIBC_NAMESPACE::cleanup_tls(tls.tp, tls.size); });
-  // We want the fini array callbacks to be run after other atexit
-  // callbacks are run. So, we register them before running the init
-  // array callbacks as they can potentially register their own atexit
-  // callbacks.
-  LIBC_NAMESPACE::atexit(&LIBC_NAMESPACE::call_fini_array_callbacks);
-
-  LIBC_NAMESPACE::call_init_array_callbacks(
-      static_cast<int>(app.args->argc),
-      reinterpret_cast<char **>(app.args->argv),
-      reinterpret_cast<char **>(env_ptr));
-
-  int retval = main(static_cast<int>(app.args->argc),
-                    reinterpret_cast<char **>(app.args->argv),
-                    reinterpret_cast<char **>(env_ptr));
-
-  LIBC_NAMESPACE::exit(retval);
-}
-
-extern "C" void _start() {
+#include "src/__support/macros/attributes.h"
+#include "startup/linux/do_start.h"
+
+extern "C" [[noreturn]] void _start() {
+  asm volatile(".option push\n\t"
+               ".option norelax\n\t"
+               "lla gp, __global_pointer$\n\t"
+               ".option pop\n\t");
   // Fetch the args using the frame pointer.
-  app.args = reinterpret_cast<LIBC_NAMESPACE::Args *>(
+  LIBC_NAMESPACE::app.args = reinterpret_cast<LIBC_NAMESPACE::Args *>(
       reinterpret_cast<uintptr_t *>(__builtin_frame_address(0)));
-  do_start();
+  LIBC_NAMESPACE::do_start();
 }

diff  --git a/libc/startup/linux/riscv/tls.cpp b/libc/startup/linux/riscv/tls.cpp
new file mode 100644
index 00000000000000..997912c77e7377
--- /dev/null
+++ b/libc/startup/linux/riscv/tls.cpp
@@ -0,0 +1,74 @@
+//===-- Implementation of tls for riscv -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/OSUtil/syscall.h"
+#include "src/__support/threads/thread.h"
+#include "src/string/memory_utils/inline_memcpy.h"
+#include "startup/linux/do_start.h"
+
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+namespace LIBC_NAMESPACE {
+
+#ifdef SYS_mmap2
+static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
+#elif SYS_mmap
+static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
+#else
+#error "mmap and mmap2 syscalls not available."
+#endif
+
+void init_tls(TLSDescriptor &tls_descriptor) {
+  if (app.tls.size == 0) {
+    tls_descriptor.size = 0;
+    tls_descriptor.tp = 0;
+    return;
+  }
+
+  // riscv64 follows the variant 1 TLS layout:
+  const uintptr_t size_of_pointers = 2 * sizeof(uintptr_t);
+  uintptr_t padding = 0;
+  const uintptr_t ALIGNMENT_MASK = app.tls.align - 1;
+  uintptr_t 
diff  = size_of_pointers & ALIGNMENT_MASK;
+  if (
diff  != 0)
+    padding += (ALIGNMENT_MASK - 
diff ) + 1;
+
+  uintptr_t alloc_size = size_of_pointers + padding + app.tls.size;
+
+  // We cannot call the mmap function here as the functions set errno on
+  // failure. Since errno is implemented via a thread local variable, we cannot
+  // use errno before TLS is setup.
+  long mmap_ret_val = syscall_impl<long>(MMAP_SYSCALL_NUMBER, nullptr,
+                                         alloc_size, PROT_READ | PROT_WRITE,
+                                         MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+  // We cannot check the return value with MAP_FAILED as that is the return
+  // of the mmap function and not the mmap syscall.
+  if (mmap_ret_val < 0 && static_cast<uintptr_t>(mmap_ret_val) > -app.page_size)
+    syscall_impl<long>(SYS_exit, 1);
+  uintptr_t thread_ptr = uintptr_t(reinterpret_cast<uintptr_t *>(mmap_ret_val));
+  uintptr_t tls_addr = thread_ptr + size_of_pointers + padding;
+  inline_memcpy(reinterpret_cast<char *>(tls_addr),
+                reinterpret_cast<const char *>(app.tls.address),
+                app.tls.init_size);
+  tls_descriptor.size = alloc_size;
+  tls_descriptor.addr = thread_ptr;
+  tls_descriptor.tp = tls_addr;
+}
+
+void cleanup_tls(uintptr_t addr, uintptr_t size) {
+  if (size == 0)
+    return;
+  syscall_impl<long>(SYS_munmap, addr, size);
+}
+
+bool set_thread_ptr(uintptr_t val) {
+  LIBC_INLINE_ASM("mv tp, %0\n\t" : : "r"(val));
+  return true;
+}
+} // namespace LIBC_NAMESPACE

diff  --git a/libc/startup/linux/x86_64/CMakeLists.txt b/libc/startup/linux/x86_64/CMakeLists.txt
index aac5a0626a176a..30da7ab4e1ec3d 100644
--- a/libc/startup/linux/x86_64/CMakeLists.txt
+++ b/libc/startup/linux/x86_64/CMakeLists.txt
@@ -1,22 +1,30 @@
 add_startup_object(
-  crt1
+  tls
   SRC
-    start.cpp
+    tls.cpp
   DEPENDS
     libc.config.linux.app_h
     libc.include.sys_mman
     libc.include.sys_syscall
-    libc.include.unistd
-    libc.src.__support.threads.thread
     libc.src.__support.OSUtil.osutil
-    libc.src.stdlib.exit
-    libc.src.stdlib.abort
-    libc.src.stdlib.atexit
     libc.src.string.memory_utils.inline_memcpy
-    libc.src.unistd.environ
   COMPILE_OPTIONS
     -fno-stack-protector
     -fno-omit-frame-pointer
-    -ffreestanding # To avoid compiler warnings about calling the main function.
+    -ffreestanding
+    -fno-builtin
+)
+
+add_startup_object(
+  start
+  SRC
+    start.cpp
+  DEPENDS
+    libc.config.linux.app_h
+    libc.src.__support.macros.attributes
+  COMPILE_OPTIONS
+    -fno-stack-protector
+    -fno-omit-frame-pointer
+    -ffreestanding
     -fno-builtin
 )

diff  --git a/libc/startup/linux/x86_64/start.cpp b/libc/startup/linux/x86_64/start.cpp
index bc03a3cb1de27f..25da25a496daa5 100644
--- a/libc/startup/linux/x86_64/start.cpp
+++ b/libc/startup/linux/x86_64/start.cpp
@@ -1,151 +1,18 @@
-//===-- Implementation of crt for x86_64 ----------------------------------===//
+//===-- Implementation of _start for x86_64 -------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-
-#include "config/linux/app.h"
-#include "src/__support/OSUtil/io.h"
-#include "src/__support/OSUtil/syscall.h"
-#include "src/__support/threads/thread.h"
-#include "src/stdlib/abort.h"
-#include "src/stdlib/atexit.h"
-#include "src/stdlib/exit.h"
-#include "src/string/memory_utils/inline_memcpy.h"
-
-#include <asm/prctl.h>
-#include <linux/auxvec.h>
-#include <linux/elf.h>
-#include <stdint.h>
-#include <sys/mman.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-
-extern "C" int main(int, char **, char **);
-
-namespace LIBC_NAMESPACE {
-
-#ifdef SYS_mmap2
-static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
-#elif SYS_mmap
-static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
-#else
-#error "mmap and mmap2 syscalls not available."
-#endif
-
-AppProperties app;
-
-static ThreadAttributes main_thread_attrib;
-
-// TODO: The function is x86_64 specific. Move it to config/linux/app.h
-// and generalize it. Also, dynamic loading is not handled currently.
-void init_tls(TLSDescriptor &tls_descriptor) {
-  if (app.tls.size == 0) {
-    tls_descriptor.size = 0;
-    tls_descriptor.tp = 0;
-    return;
-  }
-
-  // We will assume the alignment is always a power of two.
-  uintptr_t tls_size = app.tls.size & -app.tls.align;
-  if (tls_size != app.tls.size)
-    tls_size += app.tls.align;
-
-  // Per the x86_64 TLS ABI, the entry pointed to by the thread pointer is the
-  // address of the TLS block. So, we add more size to accomodate this address
-  // entry.
-  // We also need to include space for the stack canary. The canary is at
-  // offset 0x28 (40) and is of size uintptr_t.
-  uintptr_t tls_size_with_addr = tls_size + sizeof(uintptr_t) + 40;
-
-  // We cannot call the mmap function here as the functions set errno on
-  // failure. Since errno is implemented via a thread local variable, we cannot
-  // use errno before TLS is setup.
-  long mmap_retval = LIBC_NAMESPACE::syscall_impl<long>(
-      MMAP_SYSCALL_NUMBER, nullptr, tls_size_with_addr, PROT_READ | PROT_WRITE,
-      MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-  // We cannot check the return value with MAP_FAILED as that is the return
-  // of the mmap function and not the mmap syscall.
-  if (mmap_retval < 0 && static_cast<uintptr_t>(mmap_retval) > -app.page_size)
-    LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, 1);
-  uintptr_t *tls_addr = reinterpret_cast<uintptr_t *>(mmap_retval);
-
-  // x86_64 TLS faces down from the thread pointer with the first entry
-  // pointing to the address of the first real TLS byte.
-  uintptr_t end_ptr = reinterpret_cast<uintptr_t>(tls_addr) + tls_size;
-  *reinterpret_cast<uintptr_t *>(end_ptr) = end_ptr;
-
-  LIBC_NAMESPACE::inline_memcpy(reinterpret_cast<char *>(tls_addr),
-                                reinterpret_cast<const char *>(app.tls.address),
-                                app.tls.init_size);
-  uintptr_t *stack_guard_addr = reinterpret_cast<uintptr_t *>(end_ptr + 40);
-  // Setting the stack guard to a random value.
-  // We cannot call the get_random function here as the function sets errno on
-  // failure. Since errno is implemented via a thread local variable, we cannot
-  // use errno before TLS is setup.
-  ssize_t stack_guard_retval = LIBC_NAMESPACE::syscall_impl<ssize_t>(
-      SYS_getrandom, reinterpret_cast<long>(stack_guard_addr), sizeof(uint64_t),
-      0);
-  if (stack_guard_retval < 0)
-    LIBC_NAMESPACE::syscall_impl(SYS_exit, 1);
-
-  tls_descriptor = {tls_size_with_addr, reinterpret_cast<uintptr_t>(tls_addr),
-                    end_ptr};
-  return;
-}
-
-void cleanup_tls(uintptr_t addr, uintptr_t size) {
-  if (size == 0)
-    return;
-  LIBC_NAMESPACE::syscall_impl<long>(SYS_munmap, addr, size);
-}
-
-// Sets the thread pointer to |val|. Returns true on success, false on failure.
-static bool set_thread_ptr(uintptr_t val) {
-  return LIBC_NAMESPACE::syscall_impl(SYS_arch_prctl, ARCH_SET_FS, val) != -1;
-}
-
-using InitCallback = void(int, char **, char **);
-using FiniCallback = void(void);
-
-extern "C" {
-// These arrays are present in the .init_array and .fini_array sections.
-// The symbols are inserted by linker when it sees references to them.
-extern uintptr_t __preinit_array_start[];
-extern uintptr_t __preinit_array_end[];
-extern uintptr_t __init_array_start[];
-extern uintptr_t __init_array_end[];
-extern uintptr_t __fini_array_start[];
-extern uintptr_t __fini_array_end[];
-}
-
-static void call_init_array_callbacks(int argc, char **argv, char **env) {
-  size_t preinit_array_size = __preinit_array_end - __preinit_array_start;
-  for (size_t i = 0; i < preinit_array_size; ++i)
-    reinterpret_cast<InitCallback *>(__preinit_array_start[i])(argc, argv, env);
-  size_t init_array_size = __init_array_end - __init_array_start;
-  for (size_t i = 0; i < init_array_size; ++i)
-    reinterpret_cast<InitCallback *>(__init_array_start[i])(argc, argv, env);
-}
-
-static void call_fini_array_callbacks() {
-  size_t fini_array_size = __fini_array_end - __fini_array_start;
-  for (size_t i = fini_array_size; i > 0; --i)
-    reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])();
-}
-
-} // namespace LIBC_NAMESPACE
-
-using LIBC_NAMESPACE::app;
-using LIBC_NAMESPACE::AuxEntry;
-
-extern "C" void _start() {
-  // This TU is compiled with -fno-omit-frame-pointer. Hence, the previous value
-  // of the base pointer is pushed on to the stack. So, we step over it (the
-  // "+ 1" below) to get to the args.
-  app.args = reinterpret_cast<LIBC_NAMESPACE::Args *>(
+#include "src/__support/macros/attributes.h"
+#include "startup/linux/do_start.h"
+
+extern "C" [[noreturn]] void _start() {
+  // This TU is compiled with -fno-omit-frame-pointer. Hence, the previous
+  // value of the base pointer is pushed on to the stack. So, we step over
+  // it (the "+ 1" below) to get to the args.
+  LIBC_NAMESPACE::app.args = reinterpret_cast<LIBC_NAMESPACE::Args *>(
       reinterpret_cast<uintptr_t *>(__builtin_frame_address(0)) + 1);
 
   // The x86_64 ABI requires that the stack pointer is aligned to a 16-byte
@@ -159,88 +26,8 @@ extern "C" void _start() {
   // compilers can generate code assuming the alignment as required by the ABI.
   // If the stack pointers as setup by the OS are already aligned, then the
   // following code is a NOP.
-  __asm__ __volatile__("andq $0xfffffffffffffff0, %rsp\n\t");
-  __asm__ __volatile__("andq $0xfffffffffffffff0, %rbp\n\t");
-
-  auto tid = LIBC_NAMESPACE::syscall_impl<long>(SYS_gettid);
-  if (tid <= 0)
-    LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, 1);
-  LIBC_NAMESPACE::main_thread_attrib.tid = static_cast<int>(tid);
-
-  // After the argv array, is a 8-byte long NULL value before the array of env
-  // values. The end of the env values is marked by another 8-byte long NULL
-  // value. We step over it (the "+ 1" below) to get to the env values.
-  uint64_t *env_ptr = app.args->argv + app.args->argc + 1;
-  uint64_t *env_end_marker = env_ptr;
-  app.env_ptr = env_ptr;
-  while (*env_end_marker)
-    ++env_end_marker;
-
-  // Initialize the POSIX global declared in unistd.h
-  environ = reinterpret_cast<char **>(env_ptr);
-
-  // After the env array, is the aux-vector. The end of the aux-vector is
-  // denoted by an AT_NULL entry.
-  Elf64_Phdr *program_hdr_table = nullptr;
-  uintptr_t program_hdr_count = 0;
-  app.auxv_ptr = reinterpret_cast<AuxEntry *>(env_end_marker + 1);
-  for (auto *aux_entry = app.auxv_ptr; aux_entry->id != AT_NULL; ++aux_entry) {
-    switch (aux_entry->id) {
-    case AT_PHDR:
-      program_hdr_table = reinterpret_cast<Elf64_Phdr *>(aux_entry->value);
-      break;
-    case AT_PHNUM:
-      program_hdr_count = aux_entry->value;
-      break;
-    case AT_PAGESZ:
-      app.page_size = aux_entry->value;
-      break;
-    default:
-      break; // TODO: Read other useful entries from the aux vector.
-    }
-  }
-
-  app.tls.size = 0;
-  for (uintptr_t i = 0; i < program_hdr_count; ++i) {
-    Elf64_Phdr *phdr = program_hdr_table + i;
-    if (phdr->p_type != PT_TLS)
-      continue;
-    // TODO: p_vaddr value has to be adjusted for static-pie executables.
-    app.tls.address = phdr->p_vaddr;
-    app.tls.size = phdr->p_memsz;
-    app.tls.init_size = phdr->p_filesz;
-    app.tls.align = phdr->p_align;
-  }
-
-  // This descriptor has to be static since its cleanup function cannot
-  // capture the context.
-  static LIBC_NAMESPACE::TLSDescriptor tls;
-  LIBC_NAMESPACE::init_tls(tls);
-  if (tls.size != 0 && !LIBC_NAMESPACE::set_thread_ptr(tls.tp))
-    LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, 1);
-
-  LIBC_NAMESPACE::self.attrib = &LIBC_NAMESPACE::main_thread_attrib;
-  LIBC_NAMESPACE::main_thread_attrib.atexit_callback_mgr =
-      LIBC_NAMESPACE::internal::get_thread_atexit_callback_mgr();
-  // We register the cleanup_tls function to be the last atexit callback to be
-  // invoked. It will tear down the TLS. Other callbacks may depend on TLS (such
-  // as the stack protector canary).
-  LIBC_NAMESPACE::atexit(
-      []() { LIBC_NAMESPACE::cleanup_tls(tls.tp, tls.size); });
-  // We want the fini array callbacks to be run after other atexit
-  // callbacks are run. So, we register them before running the init
-  // array callbacks as they can potentially register their own atexit
-  // callbacks.
-  LIBC_NAMESPACE::atexit(&LIBC_NAMESPACE::call_fini_array_callbacks);
-
-  LIBC_NAMESPACE::call_init_array_callbacks(
-      static_cast<int>(app.args->argc),
-      reinterpret_cast<char **>(app.args->argv),
-      reinterpret_cast<char **>(env_ptr));
-
-  int retval = main(static_cast<int>(app.args->argc),
-                    reinterpret_cast<char **>(app.args->argv),
-                    reinterpret_cast<char **>(env_ptr));
+  asm volatile("andq $0xfffffffffffffff0, %rsp\n\t");
+  asm volatile("andq $0xfffffffffffffff0, %rbp\n\t");
 
-  LIBC_NAMESPACE::exit(retval);
+  LIBC_NAMESPACE::do_start();
 }

diff  --git a/libc/startup/linux/x86_64/tls.cpp b/libc/startup/linux/x86_64/tls.cpp
new file mode 100644
index 00000000000000..8b0fa987362444
--- /dev/null
+++ b/libc/startup/linux/x86_64/tls.cpp
@@ -0,0 +1,93 @@
+//===-- Implementation of tls for x86_64 ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/OSUtil/syscall.h"
+#include "src/string/memory_utils/inline_memcpy.h"
+#include "startup/linux/do_start.h"
+
+#include <asm/prctl.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+namespace LIBC_NAMESPACE {
+
+#ifdef SYS_mmap2
+static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
+#elif SYS_mmap
+static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
+#else
+#error "mmap and mmap2 syscalls not available."
+#endif
+
+// TODO: Also generalize this routine and handle dynamic loading properly.
+void init_tls(TLSDescriptor &tls_descriptor) {
+  if (app.tls.size == 0) {
+    tls_descriptor.size = 0;
+    tls_descriptor.tp = 0;
+    return;
+  }
+
+  // We will assume the alignment is always a power of two.
+  uintptr_t tls_size = app.tls.size & -app.tls.align;
+  if (tls_size != app.tls.size)
+    tls_size += app.tls.align;
+
+  // Per the x86_64 TLS ABI, the entry pointed to by the thread pointer is the
+  // address of the TLS block. So, we add more size to accomodate this address
+  // entry.
+  // We also need to include space for the stack canary. The canary is at
+  // offset 0x28 (40) and is of size uintptr_t.
+  uintptr_t tls_size_with_addr = tls_size + sizeof(uintptr_t) + 40;
+
+  // We cannot call the mmap function here as the functions set errno on
+  // failure. Since errno is implemented via a thread local variable, we cannot
+  // use errno before TLS is setup.
+  long mmap_retval = syscall_impl<long>(
+      MMAP_SYSCALL_NUMBER, nullptr, tls_size_with_addr, PROT_READ | PROT_WRITE,
+      MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+  // We cannot check the return value with MAP_FAILED as that is the return
+  // of the mmap function and not the mmap syscall.
+  if (mmap_retval < 0 && static_cast<uintptr_t>(mmap_retval) > -app.page_size)
+    syscall_impl<long>(SYS_exit, 1);
+  uintptr_t *tls_addr = reinterpret_cast<uintptr_t *>(mmap_retval);
+
+  // x86_64 TLS faces down from the thread pointer with the first entry
+  // pointing to the address of the first real TLS byte.
+  uintptr_t end_ptr = reinterpret_cast<uintptr_t>(tls_addr) + tls_size;
+  *reinterpret_cast<uintptr_t *>(end_ptr) = end_ptr;
+
+  inline_memcpy(reinterpret_cast<char *>(tls_addr),
+                reinterpret_cast<const char *>(app.tls.address),
+                app.tls.init_size);
+  uintptr_t *stack_guard_addr = reinterpret_cast<uintptr_t *>(end_ptr + 40);
+  // Setting the stack guard to a random value.
+  // We cannot call the get_random function here as the function sets errno on
+  // failure. Since errno is implemented via a thread local variable, we cannot
+  // use errno before TLS is setup.
+  long stack_guard_retval =
+      syscall_impl(SYS_getrandom, reinterpret_cast<long>(stack_guard_addr),
+                   sizeof(uint64_t), 0);
+  if (stack_guard_retval < 0)
+    syscall_impl(SYS_exit, 1);
+
+  tls_descriptor = {tls_size_with_addr, reinterpret_cast<uintptr_t>(tls_addr),
+                    end_ptr};
+  return;
+}
+
+void cleanup_tls(uintptr_t addr, uintptr_t size) {
+  if (size == 0)
+    return;
+  syscall_impl<long>(SYS_munmap, addr, size);
+}
+
+// Sets the thread pointer to |val|. Returns true on success, false on failure.
+bool set_thread_ptr(uintptr_t val) {
+  return syscall_impl(SYS_arch_prctl, ARCH_SET_FS, val) != -1;
+}
+} // namespace LIBC_NAMESPACE


        


More information about the libc-commits mailing list