[libc-commits] [libc] [libc] major refactor of startup library (PR #76092)

Schrodinger ZHU Yifan via libc-commits libc-commits at lists.llvm.org
Thu Jan 4 12:17:08 PST 2024


https://github.com/SchrodingerZhu updated https://github.com/llvm/llvm-project/pull/76092

>From 97dee47dc06f05bf83fd2c328917951dafa4c380 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Wed, 20 Dec 2023 13:51:16 -0500
Subject: [PATCH 1/5] [libc] major refactor of startup library

* separate initialization routines into _start and do_start for all
  architectures.
* lift do_start as a separate object library to avoid code duplication.
* (addtionally) address the problem of building hermetic libc with -fstack-pointer-*
---
 libc/cmake/modules/LLVMLibCTestRules.cmake |   6 +
 libc/config/linux/app.h                    |   3 +
 libc/startup/linux/CMakeLists.txt          |  24 ++-
 libc/startup/linux/aarch64/CMakeLists.txt  |   6 +-
 libc/startup/linux/aarch64/start.cpp       | 156 ++-----------------
 libc/startup/linux/do_start.cpp            | 141 +++++++++++++++++
 libc/startup/linux/do_start.h              |  14 ++
 libc/startup/linux/riscv/CMakeLists.txt    |   6 +-
 libc/startup/linux/riscv/start.cpp         | 166 ++-------------------
 libc/startup/linux/x86_64/CMakeLists.txt   |  10 +-
 libc/startup/linux/x86_64/start.cpp        | 165 +++-----------------
 11 files changed, 238 insertions(+), 459 deletions(-)
 create mode 100644 libc/startup/linux/do_start.cpp
 create mode 100644 libc/startup/linux/do_start.h

diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake
index 51d484b875aeff..b69839afebf8a1 100644
--- a/libc/cmake/modules/LLVMLibCTestRules.cmake
+++ b/libc/cmake/modules/LLVMLibCTestRules.cmake
@@ -498,6 +498,9 @@ function(add_integration_test test_name)
       libc.src.string.memcpy
       libc.src.string.memmove
       libc.src.string.memset
+      # __stack_chk_fail should always be included to allow building libc with
+      # stack protector.
+      libc.src.compiler.__stack_chk_fail
   )
   list(REMOVE_DUPLICATES fq_deps_list)
 
@@ -665,6 +668,9 @@ function(add_libc_hermetic_test test_name)
       libc.src.string.memmove
       libc.src.string.memset
       libc.src.__support.StringUtil.error_to_string
+      # __stack_chk_fail should always be included to allow building libc with
+      # stack protector.
+      libc.src.compiler.__stack_chk_fail
   )
 
   if(TARGET libc.src.time.clock)
diff --git a/libc/config/linux/app.h b/libc/config/linux/app.h
index 548c141fd70535..1b3523deb1b23e 100644
--- a/libc/config/linux/app.h
+++ b/libc/config/linux/app.h
@@ -119,6 +119,9 @@ void init_tls(TLSDescriptor &tls);
 // Cleanup the TLS area as described in |tls_descriptor|.
 void cleanup_tls(uintptr_t tls_addr, uintptr_t tls_size);
 
+// Set the thread pointer for the current thread.
+bool set_thread_ptr(uintptr_t val);
+
 } // namespace LIBC_NAMESPACE
 
 #endif // LLVM_LIBC_CONFIG_LINUX_APP_H
diff --git a/libc/startup/linux/CMakeLists.txt b/libc/startup/linux/CMakeLists.txt
index 2d55a365669718..e9bdc2dd4478e9 100644
--- a/libc/startup/linux/CMakeLists.txt
+++ b/libc/startup/linux/CMakeLists.txt
@@ -84,10 +84,32 @@ endif()
 
 add_subdirectory(${LIBC_TARGET_ARCHITECTURE})
 
+add_object_library(
+  do_start
+  SRCS
+    do_start.cpp
+  HDRS
+    do_start.h
+  DEPENDS
+    libc.config.linux.app_h
+    libc.include.sys_mman
+    libc.include.sys_syscall
+    libc.src.__support.threads.thread
+    libc.src.__support.OSUtil.osutil
+    libc.src.stdlib.exit
+    libc.src.stdlib.atexit
+    libc.src.unistd.environ
+  COMPILE_OPTIONS
+    -ffreestanding       # To avoid compiler warnings about calling the main function.
+    -fno-builtin         # avoid emit unexpected calls
+    -fno-stack-protector # stack protect canary is not available yet.
+)
+
 # TODO: factor out crt1 into multiple objects
 merge_relocatable_object(
   crt1
-  .${LIBC_TARGET_ARCHITECTURE}.crt1
+  .${LIBC_TARGET_ARCHITECTURE}.start
+  .do_start
 )
 
 add_startup_object(
diff --git a/libc/startup/linux/aarch64/CMakeLists.txt b/libc/startup/linux/aarch64/CMakeLists.txt
index b47db8eb5d23f3..868b755d38f519 100644
--- a/libc/startup/linux/aarch64/CMakeLists.txt
+++ b/libc/startup/linux/aarch64/CMakeLists.txt
@@ -1,17 +1,13 @@
 add_startup_object(
-  crt1
+  start
   SRC
     start.cpp
   DEPENDS
     libc.config.linux.app_h
     libc.include.sys_mman
     libc.include.sys_syscall
-    libc.src.__support.threads.thread
     libc.src.__support.OSUtil.osutil
-    libc.src.stdlib.exit
-    libc.src.stdlib.atexit
     libc.src.string.memory_utils.inline_memcpy
-    libc.src.unistd.environ
   COMPILE_OPTIONS
     -fno-omit-frame-pointer
     -ffreestanding # To avoid compiler warnings about calling the main function.
diff --git a/libc/startup/linux/aarch64/start.cpp b/libc/startup/linux/aarch64/start.cpp
index bc01582aeb49c7..0e1efdc1b04da9 100644
--- a/libc/startup/linux/aarch64/start.cpp
+++ b/libc/startup/linux/aarch64/start.cpp
@@ -6,23 +6,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "config/linux/app.h"
 #include "src/__support/OSUtil/syscall.h"
 #include "src/__support/threads/thread.h"
-#include "src/stdlib/atexit.h"
-#include "src/stdlib/exit.h"
 #include "src/string/memory_utils/inline_memcpy.h"
+#include "startup/linux/do_start.h"
 
 #include <arm_acle.h>
-
-#include <linux/auxvec.h>
-#include <linux/elf.h>
-#include <stdint.h>
 #include <sys/mman.h>
 #include <sys/syscall.h>
-#include <unistd.h>
-
-extern "C" int main(int, char **, char **);
 
 // Source documentation:
 // https://github.com/ARM-software/abi-aa/tree/main/sysvabi64
@@ -37,10 +28,6 @@ static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
 #error "mmap and mmap2 syscalls not available."
 #endif
 
-AppProperties app;
-
-static ThreadAttributes main_thread_attrib;
-
 void init_tls(TLSDescriptor &tls_descriptor) {
   if (app.tls.size == 0) {
     tls_descriptor.size = 0;
@@ -69,18 +56,18 @@ void init_tls(TLSDescriptor &tls_descriptor) {
   // We cannot call the mmap function here as the functions set errno on
   // failure. Since errno is implemented via a thread local variable, we cannot
   // use errno before TLS is setup.
-  long mmap_ret_val = LIBC_NAMESPACE::syscall_impl<long>(
-      MMAP_SYSCALL_NUMBER, nullptr, alloc_size, PROT_READ | PROT_WRITE,
-      MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+  long mmap_ret_val = syscall_impl<long>(MMAP_SYSCALL_NUMBER, nullptr,
+                                         alloc_size, PROT_READ | PROT_WRITE,
+                                         MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
   // We cannot check the return value with MAP_FAILED as that is the return
   // of the mmap function and not the mmap syscall.
   if (mmap_ret_val < 0 && static_cast<uintptr_t>(mmap_ret_val) > -app.page_size)
-    LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, 1);
+    syscall_impl<long>(SYS_exit, 1);
   uintptr_t thread_ptr = uintptr_t(reinterpret_cast<uintptr_t *>(mmap_ret_val));
   uintptr_t tls_addr = thread_ptr + size_of_pointers + padding;
-  LIBC_NAMESPACE::inline_memcpy(reinterpret_cast<char *>(tls_addr),
-                                reinterpret_cast<const char *>(app.tls.address),
-                                app.tls.init_size);
+  inline_memcpy(reinterpret_cast<char *>(tls_addr),
+                reinterpret_cast<const char *>(app.tls.address),
+                app.tls.init_size);
   tls_descriptor.size = alloc_size;
   tls_descriptor.addr = thread_ptr;
   tls_descriptor.tp = thread_ptr;
@@ -89,127 +76,12 @@ void init_tls(TLSDescriptor &tls_descriptor) {
 void cleanup_tls(uintptr_t addr, uintptr_t size) {
   if (size == 0)
     return;
-  LIBC_NAMESPACE::syscall_impl<long>(SYS_munmap, addr, size);
-}
-
-static void set_thread_ptr(uintptr_t val) { __arm_wsr64("tpidr_el0", val); }
-
-using InitCallback = void(int, char **, char **);
-using FiniCallback = void(void);
-
-extern "C" {
-// These arrays are present in the .init_array and .fini_array sections.
-// The symbols are inserted by linker when it sees references to them.
-extern uintptr_t __preinit_array_start[];
-extern uintptr_t __preinit_array_end[];
-extern uintptr_t __init_array_start[];
-extern uintptr_t __init_array_end[];
-extern uintptr_t __fini_array_start[];
-extern uintptr_t __fini_array_end[];
-}
-
-static void call_init_array_callbacks(int argc, char **argv, char **env) {
-  size_t preinit_array_size = __preinit_array_end - __preinit_array_start;
-  for (size_t i = 0; i < preinit_array_size; ++i)
-    reinterpret_cast<InitCallback *>(__preinit_array_start[i])(argc, argv, env);
-  size_t init_array_size = __init_array_end - __init_array_start;
-  for (size_t i = 0; i < init_array_size; ++i)
-    reinterpret_cast<InitCallback *>(__init_array_start[i])(argc, argv, env);
+  syscall_impl<long>(SYS_munmap, addr, size);
 }
 
-static void call_fini_array_callbacks() {
-  size_t fini_array_size = __fini_array_end - __fini_array_start;
-  for (size_t i = fini_array_size; i > 0; --i)
-    reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])();
-}
-
-} // namespace LIBC_NAMESPACE
-
-using LIBC_NAMESPACE::app;
-using LIBC_NAMESPACE::AuxEntry;
-
-__attribute__((noinline)) static void do_start() {
-  auto tid = LIBC_NAMESPACE::syscall_impl<long>(SYS_gettid);
-  if (tid <= 0)
-    LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, 1);
-  LIBC_NAMESPACE::main_thread_attrib.tid = static_cast<int>(tid);
-
-  // After the argv array, is a 8-byte long NULL value before the array of env
-  // values. The end of the env values is marked by another 8-byte long NULL
-  // value. We step over it (the "+ 1" below) to get to the env values.
-  uint64_t *env_ptr = app.args->argv + app.args->argc + 1;
-  uint64_t *env_end_marker = env_ptr;
-  app.env_ptr = env_ptr;
-  while (*env_end_marker)
-    ++env_end_marker;
-
-  // Initialize the POSIX global declared in unistd.h
-  environ = reinterpret_cast<char **>(env_ptr);
-
-  // After the env array, is the aux-vector. The end of the aux-vector is
-  // denoted by an AT_NULL entry.
-  Elf64_Phdr *program_hdr_table = nullptr;
-  uintptr_t program_hdr_count;
-  app.auxv_ptr = reinterpret_cast<AuxEntry *>(env_end_marker + 1);
-  for (auto *aux_entry = app.auxv_ptr; aux_entry->id != AT_NULL; ++aux_entry) {
-    switch (aux_entry->id) {
-    case AT_PHDR:
-      program_hdr_table = reinterpret_cast<Elf64_Phdr *>(aux_entry->value);
-      break;
-    case AT_PHNUM:
-      program_hdr_count = aux_entry->value;
-      break;
-    case AT_PAGESZ:
-      app.page_size = aux_entry->value;
-      break;
-    default:
-      break; // TODO: Read other useful entries from the aux vector.
-    }
-  }
-
-  app.tls.size = 0;
-  for (uintptr_t i = 0; i < program_hdr_count; ++i) {
-    Elf64_Phdr *phdr = program_hdr_table + i;
-    if (phdr->p_type != PT_TLS)
-      continue;
-    // TODO: p_vaddr value has to be adjusted for static-pie executables.
-    app.tls.address = phdr->p_vaddr;
-    app.tls.size = phdr->p_memsz;
-    app.tls.init_size = phdr->p_filesz;
-    app.tls.align = phdr->p_align;
-  }
-
-  // This descriptor has to be static since its cleanup function cannot
-  // capture the context.
-  static LIBC_NAMESPACE::TLSDescriptor tls;
-  LIBC_NAMESPACE::init_tls(tls);
-  if (tls.size != 0)
-    LIBC_NAMESPACE::set_thread_ptr(tls.tp);
-
-  LIBC_NAMESPACE::self.attrib = &LIBC_NAMESPACE::main_thread_attrib;
-  LIBC_NAMESPACE::main_thread_attrib.atexit_callback_mgr =
-      LIBC_NAMESPACE::internal::get_thread_atexit_callback_mgr();
-  // We register the cleanup_tls function to be the last atexit callback to be
-  // invoked. It will tear down the TLS. Other callbacks may depend on TLS (such
-  // as the stack protector canary).
-  LIBC_NAMESPACE::atexit(
-      []() { LIBC_NAMESPACE::cleanup_tls(tls.tp, tls.size); });
-  // We want the fini array callbacks to be run after other atexit
-  // callbacks are run. So, we register them before running the init
-  // array callbacks as they can potentially register their own atexit
-  // callbacks.
-  LIBC_NAMESPACE::atexit(&LIBC_NAMESPACE::call_fini_array_callbacks);
-
-  LIBC_NAMESPACE::call_init_array_callbacks(
-      static_cast<int>(app.args->argc),
-      reinterpret_cast<char **>(app.args->argv),
-      reinterpret_cast<char **>(env_ptr));
-
-  int retval = main(static_cast<int>(app.args->argc),
-                    reinterpret_cast<char **>(app.args->argv),
-                    reinterpret_cast<char **>(env_ptr));
-
-  LIBC_NAMESPACE::exit(retval);
+bool set_thread_ptr(uintptr_t val) {
+  __arm_wsr64("tpidr_el0", val);
+  return true;
 }
 
 extern "C" void _start() {
@@ -223,7 +95,9 @@ extern "C" void _start() {
   // will take us to the previous stack pointer. That is the reason why the
   // actual business logic of the startup code is pushed into a non-inline
   // function do_start so that this function is free of any stack usage.
-  app.args = reinterpret_cast<LIBC_NAMESPACE::Args *>(
+  app.args = reinterpret_cast<Args *>(
       reinterpret_cast<uintptr_t *>(__builtin_frame_address(0)) + 2);
   do_start();
 }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/startup/linux/do_start.cpp b/libc/startup/linux/do_start.cpp
new file mode 100644
index 00000000000000..4a86bc385a7c8b
--- /dev/null
+++ b/libc/startup/linux/do_start.cpp
@@ -0,0 +1,141 @@
+//===-- Implementation file of do_start -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "startup/linux/do_start.h"
+#include "src/__support/OSUtil/syscall.h"
+#include "src/__support/threads/thread.h"
+#include "src/stdlib/atexit.h"
+#include "src/stdlib/exit.h"
+#include "src/unistd/environ.h"
+
+#include <linux/auxvec.h>
+#include <linux/elf.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+namespace LIBC_NAMESPACE {
+
+// TODO: this symbol will be moved to config.linux.app
+AppProperties app;
+
+using InitCallback = void(int, char **, char **);
+using FiniCallback = void(void);
+
+extern "C" int main(int argc, char **argv, char **envp);
+
+extern "C" {
+// These arrays are present in the .init_array and .fini_array sections.
+// The symbols are inserted by linker when it sees references to them.
+extern uintptr_t __preinit_array_start[];
+extern uintptr_t __preinit_array_end[];
+extern uintptr_t __init_array_start[];
+extern uintptr_t __init_array_end[];
+extern uintptr_t __fini_array_start[];
+extern uintptr_t __fini_array_end[];
+}
+
+static void call_init_array_callbacks(int argc, char **argv, char **env) {
+  size_t preinit_array_size = __preinit_array_end - __preinit_array_start;
+  for (size_t i = 0; i < preinit_array_size; ++i)
+    reinterpret_cast<InitCallback *>(__preinit_array_start[i])(argc, argv, env);
+  size_t init_array_size = __init_array_end - __init_array_start;
+  for (size_t i = 0; i < init_array_size; ++i)
+    reinterpret_cast<InitCallback *>(__init_array_start[i])(argc, argv, env);
+}
+
+static void call_fini_array_callbacks() {
+  size_t fini_array_size = __fini_array_end - __fini_array_start;
+  for (size_t i = fini_array_size; i > 0; --i)
+    reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])();
+}
+
+static ThreadAttributes main_thread_attrib;
+
+[[noreturn]] void do_start() {
+  auto tid = syscall_impl<long>(SYS_gettid);
+  if (tid <= 0)
+    syscall_impl<long>(SYS_exit, 1);
+  main_thread_attrib.tid = static_cast<int>(tid);
+
+  // After the argv array, is a 8-byte long NULL value before the array of env
+  // values. The end of the env values is marked by another 8-byte long NULL
+  // value. We step over it (the "+ 1" below) to get to the env values.
+  uint64_t *env_ptr = app.args->argv + app.args->argc + 1;
+  uint64_t *env_end_marker = env_ptr;
+  app.env_ptr = env_ptr;
+  while (*env_end_marker)
+    ++env_end_marker;
+
+  // Initialize the POSIX global declared in unistd.h
+  environ = reinterpret_cast<char **>(env_ptr);
+
+  // After the env array, is the aux-vector. The end of the aux-vector is
+  // denoted by an AT_NULL entry.
+  Elf64_Phdr *program_hdr_table = nullptr;
+  uintptr_t program_hdr_count = 0;
+  app.auxv_ptr = reinterpret_cast<AuxEntry *>(env_end_marker + 1);
+  for (auto *aux_entry = app.auxv_ptr; aux_entry->id != AT_NULL; ++aux_entry) {
+    switch (aux_entry->id) {
+    case AT_PHDR:
+      program_hdr_table = reinterpret_cast<Elf64_Phdr *>(aux_entry->value);
+      break;
+    case AT_PHNUM:
+      program_hdr_count = aux_entry->value;
+      break;
+    case AT_PAGESZ:
+      app.page_size = aux_entry->value;
+      break;
+    default:
+      break; // TODO: Read other useful entries from the aux vector.
+    }
+  }
+
+  app.tls.size = 0;
+  for (uintptr_t i = 0; i < program_hdr_count; ++i) {
+    Elf64_Phdr *phdr = program_hdr_table + i;
+    if (phdr->p_type != PT_TLS)
+      continue;
+    // TODO: p_vaddr value has to be adjusted for static-pie executables.
+    app.tls.address = phdr->p_vaddr;
+    app.tls.size = phdr->p_memsz;
+    app.tls.init_size = phdr->p_filesz;
+    app.tls.align = phdr->p_align;
+  }
+
+  // This descriptor has to be static since its cleanup function cannot
+  // capture the context.
+  static TLSDescriptor tls;
+  init_tls(tls);
+  if (tls.size != 0 && !set_thread_ptr(tls.tp))
+    syscall_impl<long>(SYS_exit, 1);
+
+  self.attrib = &main_thread_attrib;
+  main_thread_attrib.atexit_callback_mgr =
+      internal::get_thread_atexit_callback_mgr();
+  // We register the cleanup_tls function to be the last atexit callback to be
+  // invoked. It will tear down the TLS. Other callbacks may depend on TLS (such
+  // as the stack protector canary).
+  atexit([]() { cleanup_tls(tls.tp, tls.size); });
+  // We want the fini array callbacks to be run after other atexit
+  // callbacks are run. So, we register them before running the init
+  // array callbacks as they can potentially register their own atexit
+  // callbacks.
+  atexit(&call_fini_array_callbacks);
+
+  call_init_array_callbacks(static_cast<int>(app.args->argc),
+                            reinterpret_cast<char **>(app.args->argv),
+                            reinterpret_cast<char **>(env_ptr));
+
+  int retval = main(static_cast<int>(app.args->argc),
+                    reinterpret_cast<char **>(app.args->argv),
+                    reinterpret_cast<char **>(env_ptr));
+
+  exit(retval);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/startup/linux/do_start.h b/libc/startup/linux/do_start.h
new file mode 100644
index 00000000000000..a0e7a3cd695627
--- /dev/null
+++ b/libc/startup/linux/do_start.h
@@ -0,0 +1,14 @@
+//===-- Header file of do_start -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "config/linux/app.h"
+
+namespace LIBC_NAMESPACE {
+// setup the libc runtime and invoke the main routine.
+[[noreturn]] void do_start();
+} // namespace LIBC_NAMESPACE
diff --git a/libc/startup/linux/riscv/CMakeLists.txt b/libc/startup/linux/riscv/CMakeLists.txt
index b47db8eb5d23f3..868b755d38f519 100644
--- a/libc/startup/linux/riscv/CMakeLists.txt
+++ b/libc/startup/linux/riscv/CMakeLists.txt
@@ -1,17 +1,13 @@
 add_startup_object(
-  crt1
+  start
   SRC
     start.cpp
   DEPENDS
     libc.config.linux.app_h
     libc.include.sys_mman
     libc.include.sys_syscall
-    libc.src.__support.threads.thread
     libc.src.__support.OSUtil.osutil
-    libc.src.stdlib.exit
-    libc.src.stdlib.atexit
     libc.src.string.memory_utils.inline_memcpy
-    libc.src.unistd.environ
   COMPILE_OPTIONS
     -fno-omit-frame-pointer
     -ffreestanding # To avoid compiler warnings about calling the main function.
diff --git a/libc/startup/linux/riscv/start.cpp b/libc/startup/linux/riscv/start.cpp
index 5b6e5bde8da81d..57c83b502e9c94 100644
--- a/libc/startup/linux/riscv/start.cpp
+++ b/libc/startup/linux/riscv/start.cpp
@@ -6,21 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "config/linux/app.h"
 #include "src/__support/OSUtil/syscall.h"
 #include "src/__support/threads/thread.h"
-#include "src/stdlib/atexit.h"
-#include "src/stdlib/exit.h"
 #include "src/string/memory_utils/inline_memcpy.h"
+#include "startup/linux/do_start.h"
 
-#include <linux/auxvec.h>
-#include <linux/elf.h>
-#include <stdint.h>
 #include <sys/mman.h>
 #include <sys/syscall.h>
-#include <unistd.h>
-
-extern "C" int main(int, char **, char **);
 
 namespace LIBC_NAMESPACE {
 
@@ -32,10 +24,6 @@ static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
 #error "mmap and mmap2 syscalls not available."
 #endif
 
-AppProperties app;
-
-static ThreadAttributes main_thread_attrib;
-
 void init_tls(TLSDescriptor &tls_descriptor) {
   if (app.tls.size == 0) {
     tls_descriptor.size = 0;
@@ -56,18 +44,18 @@ void init_tls(TLSDescriptor &tls_descriptor) {
   // We cannot call the mmap function here as the functions set errno on
   // failure. Since errno is implemented via a thread local variable, we cannot
   // use errno before TLS is setup.
-  long mmap_ret_val = LIBC_NAMESPACE::syscall_impl<long>(
-      MMAP_SYSCALL_NUMBER, nullptr, alloc_size, PROT_READ | PROT_WRITE,
-      MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+  long mmap_ret_val = syscall_impl<long>(MMAP_SYSCALL_NUMBER, nullptr,
+                                         alloc_size, PROT_READ | PROT_WRITE,
+                                         MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
   // We cannot check the return value with MAP_FAILED as that is the return
   // of the mmap function and not the mmap syscall.
   if (mmap_ret_val < 0 && static_cast<uintptr_t>(mmap_ret_val) > -app.page_size)
-    LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, 1);
+    syscall_impl<long>(SYS_exit, 1);
   uintptr_t thread_ptr = uintptr_t(reinterpret_cast<uintptr_t *>(mmap_ret_val));
   uintptr_t tls_addr = thread_ptr + size_of_pointers + padding;
-  LIBC_NAMESPACE::inline_memcpy(reinterpret_cast<char *>(tls_addr),
-                                reinterpret_cast<const char *>(app.tls.address),
-                                app.tls.init_size);
+  inline_memcpy(reinterpret_cast<char *>(tls_addr),
+                reinterpret_cast<const char *>(app.tls.address),
+                app.tls.init_size);
   tls_descriptor.size = alloc_size;
   tls_descriptor.addr = thread_ptr;
   tls_descriptor.tp = tls_addr;
@@ -76,148 +64,22 @@ void init_tls(TLSDescriptor &tls_descriptor) {
 void cleanup_tls(uintptr_t addr, uintptr_t size) {
   if (size == 0)
     return;
-  LIBC_NAMESPACE::syscall_impl<long>(SYS_munmap, addr, size);
+  syscall_impl<long>(SYS_munmap, addr, size);
 }
 
-static void set_thread_ptr(uintptr_t val) {
+bool set_thread_ptr(uintptr_t val) {
   LIBC_INLINE_ASM("mv tp, %0\n\t" : : "r"(val));
+  return true;
 }
 
-using InitCallback = void(int, char **, char **);
-using FiniCallback = void(void);
-
-extern "C" {
-// These arrays are present in the .init_array and .fini_array sections.
-// The symbols are inserted by linker when it sees references to them.
-extern uintptr_t __preinit_array_start[];
-extern uintptr_t __preinit_array_end[];
-extern uintptr_t __init_array_start[];
-extern uintptr_t __init_array_end[];
-extern uintptr_t __fini_array_start[];
-extern uintptr_t __fini_array_end[];
-}
-
-static void call_init_array_callbacks(int argc, char **argv, char **env) {
-  size_t preinit_array_size = __preinit_array_end - __preinit_array_start;
-  for (size_t i = 0; i < preinit_array_size; ++i)
-    reinterpret_cast<InitCallback *>(__preinit_array_start[i])(argc, argv, env);
-  size_t init_array_size = __init_array_end - __init_array_start;
-  for (size_t i = 0; i < init_array_size; ++i)
-    reinterpret_cast<InitCallback *>(__init_array_start[i])(argc, argv, env);
-}
-
-static void call_fini_array_callbacks() {
-  size_t fini_array_size = __fini_array_end - __fini_array_start;
-  for (size_t i = fini_array_size; i > 0; --i)
-    reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])();
-}
-
-} // namespace LIBC_NAMESPACE
-
-using LIBC_NAMESPACE::app;
-using LIBC_NAMESPACE::AuxEntry;
-
-#if defined(LIBC_TARGET_ARCH_IS_X86_64) ||                                     \
-    defined(LIBC_TARGET_ARCH_IS_AARCH64) ||                                    \
-    defined(LIBC_TARGET_ARCH_IS_RISCV64)
-typedef Elf64_Phdr PgrHdrTableType;
-#elif defined(LIBC_TARGET_ARCH_IS_RISCV32)
-typedef Elf32_Phdr PgrHdrTableType;
-#else
-#error "Program header table type is not defined for the target platform."
-#endif
-
-__attribute__((noinline)) static void do_start() {
+extern "C" [[noreturn]] void _start() {
   LIBC_INLINE_ASM(".option push\n\t"
                   ".option norelax\n\t"
                   "lla gp, __global_pointer$\n\t"
                   ".option pop\n\t");
-  auto tid = LIBC_NAMESPACE::syscall_impl<long>(SYS_gettid);
-  if (tid <= 0)
-    LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, 1);
-  LIBC_NAMESPACE::main_thread_attrib.tid = static_cast<int>(tid);
-
-  // After the argv array, is a 8-byte long NULL value before the array of env
-  // values. The end of the env values is marked by another 8-byte long NULL
-  // value. We step over it (the "+ 1" below) to get to the env values.
-  LIBC_NAMESPACE::ArgVEntryType *env_ptr = app.args->argv + app.args->argc + 1;
-  LIBC_NAMESPACE::ArgVEntryType *env_end_marker = env_ptr;
-  app.env_ptr = env_ptr;
-  while (*env_end_marker)
-    ++env_end_marker;
-
-  // Initialize the POSIX global declared in unistd.h
-  environ = reinterpret_cast<char **>(env_ptr);
-
-  // After the env array, is the aux-vector. The end of the aux-vector is
-  // denoted by an AT_NULL entry.
-  PgrHdrTableType *program_hdr_table = nullptr;
-  uintptr_t program_hdr_count;
-  app.auxv_ptr = reinterpret_cast<AuxEntry *>(env_end_marker + 1);
-  for (auto *aux_entry = app.auxv_ptr; aux_entry->id != AT_NULL; ++aux_entry) {
-    switch (aux_entry->id) {
-    case AT_PHDR:
-      program_hdr_table = reinterpret_cast<PgrHdrTableType *>(aux_entry->value);
-      break;
-    case AT_PHNUM:
-      program_hdr_count = aux_entry->value;
-      break;
-    case AT_PAGESZ:
-      app.page_size = aux_entry->value;
-      break;
-    default:
-      break; // TODO: Read other useful entries from the aux vector.
-    }
-  }
-
-  app.tls.size = 0;
-  for (uintptr_t i = 0; i < program_hdr_count; ++i) {
-    PgrHdrTableType *phdr = program_hdr_table + i;
-    if (phdr->p_type != PT_TLS)
-      continue;
-    // TODO: p_vaddr value has to be adjusted for static-pie executables.
-    app.tls.address = phdr->p_vaddr;
-    app.tls.size = phdr->p_memsz;
-    app.tls.init_size = phdr->p_filesz;
-    app.tls.align = phdr->p_align;
-  }
-
-  // This descriptor has to be static since its cleanup function cannot
-  // capture the context.
-  static LIBC_NAMESPACE::TLSDescriptor tls;
-  LIBC_NAMESPACE::init_tls(tls);
-  if (tls.size != 0)
-    LIBC_NAMESPACE::set_thread_ptr(tls.tp);
-
-  LIBC_NAMESPACE::self.attrib = &LIBC_NAMESPACE::main_thread_attrib;
-  LIBC_NAMESPACE::main_thread_attrib.atexit_callback_mgr =
-      LIBC_NAMESPACE::internal::get_thread_atexit_callback_mgr();
-  // We register the cleanup_tls function to be the last atexit callback to be
-  // invoked. It will tear down the TLS. Other callbacks may depend on TLS (such
-  // as the stack protector canary).
-  LIBC_NAMESPACE::atexit(
-      []() { LIBC_NAMESPACE::cleanup_tls(tls.tp, tls.size); });
-  // We want the fini array callbacks to be run after other atexit
-  // callbacks are run. So, we register them before running the init
-  // array callbacks as they can potentially register their own atexit
-  // callbacks.
-  LIBC_NAMESPACE::atexit(&LIBC_NAMESPACE::call_fini_array_callbacks);
-
-  LIBC_NAMESPACE::call_init_array_callbacks(
-      static_cast<int>(app.args->argc),
-      reinterpret_cast<char **>(app.args->argv),
-      reinterpret_cast<char **>(env_ptr));
-
-  int retval = main(static_cast<int>(app.args->argc),
-                    reinterpret_cast<char **>(app.args->argv),
-                    reinterpret_cast<char **>(env_ptr));
-
-  LIBC_NAMESPACE::exit(retval);
-}
-
-extern "C" void _start() {
   // Fetch the args using the frame pointer.
-  app.args = reinterpret_cast<LIBC_NAMESPACE::Args *>(
+  app.args = reinterpret_cast<Args *>(
       reinterpret_cast<uintptr_t *>(__builtin_frame_address(0)));
   do_start();
 }
+} // namespace LIBC_NAMESPACE
diff --git a/libc/startup/linux/x86_64/CMakeLists.txt b/libc/startup/linux/x86_64/CMakeLists.txt
index aac5a0626a176a..71a1e531e36b1b 100644
--- a/libc/startup/linux/x86_64/CMakeLists.txt
+++ b/libc/startup/linux/x86_64/CMakeLists.txt
@@ -1,22 +1,16 @@
 add_startup_object(
-  crt1
+  start
   SRC
     start.cpp
   DEPENDS
     libc.config.linux.app_h
     libc.include.sys_mman
     libc.include.sys_syscall
-    libc.include.unistd
-    libc.src.__support.threads.thread
     libc.src.__support.OSUtil.osutil
-    libc.src.stdlib.exit
-    libc.src.stdlib.abort
-    libc.src.stdlib.atexit
     libc.src.string.memory_utils.inline_memcpy
-    libc.src.unistd.environ
   COMPILE_OPTIONS
     -fno-stack-protector
     -fno-omit-frame-pointer
-    -ffreestanding # To avoid compiler warnings about calling the main function.
+    -ffreestanding
     -fno-builtin
 )
diff --git a/libc/startup/linux/x86_64/start.cpp b/libc/startup/linux/x86_64/start.cpp
index bc03a3cb1de27f..6402c39f4b6c8e 100644
--- a/libc/startup/linux/x86_64/start.cpp
+++ b/libc/startup/linux/x86_64/start.cpp
@@ -6,24 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "config/linux/app.h"
-#include "src/__support/OSUtil/io.h"
 #include "src/__support/OSUtil/syscall.h"
-#include "src/__support/threads/thread.h"
-#include "src/stdlib/abort.h"
-#include "src/stdlib/atexit.h"
-#include "src/stdlib/exit.h"
 #include "src/string/memory_utils/inline_memcpy.h"
+#include "startup/linux/do_start.h"
 
 #include <asm/prctl.h>
-#include <linux/auxvec.h>
-#include <linux/elf.h>
-#include <stdint.h>
 #include <sys/mman.h>
 #include <sys/syscall.h>
-#include <unistd.h>
-
-extern "C" int main(int, char **, char **);
 
 namespace LIBC_NAMESPACE {
 
@@ -35,12 +24,7 @@ static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
 #error "mmap and mmap2 syscalls not available."
 #endif
 
-AppProperties app;
-
-static ThreadAttributes main_thread_attrib;
-
-// TODO: The function is x86_64 specific. Move it to config/linux/app.h
-// and generalize it. Also, dynamic loading is not handled currently.
+// TODO: Also generalize this routine and handle dynamic loading properly.
 void init_tls(TLSDescriptor &tls_descriptor) {
   if (app.tls.size == 0) {
     tls_descriptor.size = 0;
@@ -63,13 +47,13 @@ void init_tls(TLSDescriptor &tls_descriptor) {
   // We cannot call the mmap function here as the functions set errno on
   // failure. Since errno is implemented via a thread local variable, we cannot
   // use errno before TLS is setup.
-  long mmap_retval = LIBC_NAMESPACE::syscall_impl<long>(
+  long mmap_retval = syscall_impl<long>(
       MMAP_SYSCALL_NUMBER, nullptr, tls_size_with_addr, PROT_READ | PROT_WRITE,
       MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
   // We cannot check the return value with MAP_FAILED as that is the return
   // of the mmap function and not the mmap syscall.
   if (mmap_retval < 0 && static_cast<uintptr_t>(mmap_retval) > -app.page_size)
-    LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, 1);
+    syscall_impl<long>(SYS_exit, 1);
   uintptr_t *tls_addr = reinterpret_cast<uintptr_t *>(mmap_retval);
 
   // x86_64 TLS faces down from the thread pointer with the first entry
@@ -77,19 +61,19 @@ void init_tls(TLSDescriptor &tls_descriptor) {
   uintptr_t end_ptr = reinterpret_cast<uintptr_t>(tls_addr) + tls_size;
   *reinterpret_cast<uintptr_t *>(end_ptr) = end_ptr;
 
-  LIBC_NAMESPACE::inline_memcpy(reinterpret_cast<char *>(tls_addr),
-                                reinterpret_cast<const char *>(app.tls.address),
-                                app.tls.init_size);
+  inline_memcpy(reinterpret_cast<char *>(tls_addr),
+                reinterpret_cast<const char *>(app.tls.address),
+                app.tls.init_size);
   uintptr_t *stack_guard_addr = reinterpret_cast<uintptr_t *>(end_ptr + 40);
   // Setting the stack guard to a random value.
   // We cannot call the get_random function here as the function sets errno on
   // failure. Since errno is implemented via a thread local variable, we cannot
   // use errno before TLS is setup.
-  ssize_t stack_guard_retval = LIBC_NAMESPACE::syscall_impl<ssize_t>(
-      SYS_getrandom, reinterpret_cast<long>(stack_guard_addr), sizeof(uint64_t),
-      0);
+  long stack_guard_retval =
+      syscall_impl(SYS_getrandom, reinterpret_cast<long>(stack_guard_addr),
+                   sizeof(uint64_t), 0);
   if (stack_guard_retval < 0)
-    LIBC_NAMESPACE::syscall_impl(SYS_exit, 1);
+    syscall_impl(SYS_exit, 1);
 
   tls_descriptor = {tls_size_with_addr, reinterpret_cast<uintptr_t>(tls_addr),
                     end_ptr};
@@ -99,53 +83,19 @@ void init_tls(TLSDescriptor &tls_descriptor) {
 void cleanup_tls(uintptr_t addr, uintptr_t size) {
   if (size == 0)
     return;
-  LIBC_NAMESPACE::syscall_impl<long>(SYS_munmap, addr, size);
+  syscall_impl<long>(SYS_munmap, addr, size);
 }
 
 // Sets the thread pointer to |val|. Returns true on success, false on failure.
-static bool set_thread_ptr(uintptr_t val) {
-  return LIBC_NAMESPACE::syscall_impl(SYS_arch_prctl, ARCH_SET_FS, val) != -1;
-}
-
-using InitCallback = void(int, char **, char **);
-using FiniCallback = void(void);
-
-extern "C" {
-// These arrays are present in the .init_array and .fini_array sections.
-// The symbols are inserted by linker when it sees references to them.
-extern uintptr_t __preinit_array_start[];
-extern uintptr_t __preinit_array_end[];
-extern uintptr_t __init_array_start[];
-extern uintptr_t __init_array_end[];
-extern uintptr_t __fini_array_start[];
-extern uintptr_t __fini_array_end[];
-}
-
-static void call_init_array_callbacks(int argc, char **argv, char **env) {
-  size_t preinit_array_size = __preinit_array_end - __preinit_array_start;
-  for (size_t i = 0; i < preinit_array_size; ++i)
-    reinterpret_cast<InitCallback *>(__preinit_array_start[i])(argc, argv, env);
-  size_t init_array_size = __init_array_end - __init_array_start;
-  for (size_t i = 0; i < init_array_size; ++i)
-    reinterpret_cast<InitCallback *>(__init_array_start[i])(argc, argv, env);
-}
-
-static void call_fini_array_callbacks() {
-  size_t fini_array_size = __fini_array_end - __fini_array_start;
-  for (size_t i = fini_array_size; i > 0; --i)
-    reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])();
+bool set_thread_ptr(uintptr_t val) {
+  return syscall_impl(SYS_arch_prctl, ARCH_SET_FS, val) != -1;
 }
 
-} // namespace LIBC_NAMESPACE
-
-using LIBC_NAMESPACE::app;
-using LIBC_NAMESPACE::AuxEntry;
-
-extern "C" void _start() {
+extern "C" [[noreturn]] void _start() {
   // This TU is compiled with -fno-omit-frame-pointer. Hence, the previous value
   // of the base pointer is pushed on to the stack. So, we step over it (the
   // "+ 1" below) to get to the args.
-  app.args = reinterpret_cast<LIBC_NAMESPACE::Args *>(
+  app.args = reinterpret_cast<Args *>(
       reinterpret_cast<uintptr_t *>(__builtin_frame_address(0)) + 1);
 
   // The x86_64 ABI requires that the stack pointer is aligned to a 16-byte
@@ -162,85 +112,6 @@ extern "C" void _start() {
   __asm__ __volatile__("andq $0xfffffffffffffff0, %rsp\n\t");
   __asm__ __volatile__("andq $0xfffffffffffffff0, %rbp\n\t");
 
-  auto tid = LIBC_NAMESPACE::syscall_impl<long>(SYS_gettid);
-  if (tid <= 0)
-    LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, 1);
-  LIBC_NAMESPACE::main_thread_attrib.tid = static_cast<int>(tid);
-
-  // After the argv array, is a 8-byte long NULL value before the array of env
-  // values. The end of the env values is marked by another 8-byte long NULL
-  // value. We step over it (the "+ 1" below) to get to the env values.
-  uint64_t *env_ptr = app.args->argv + app.args->argc + 1;
-  uint64_t *env_end_marker = env_ptr;
-  app.env_ptr = env_ptr;
-  while (*env_end_marker)
-    ++env_end_marker;
-
-  // Initialize the POSIX global declared in unistd.h
-  environ = reinterpret_cast<char **>(env_ptr);
-
-  // After the env array, is the aux-vector. The end of the aux-vector is
-  // denoted by an AT_NULL entry.
-  Elf64_Phdr *program_hdr_table = nullptr;
-  uintptr_t program_hdr_count = 0;
-  app.auxv_ptr = reinterpret_cast<AuxEntry *>(env_end_marker + 1);
-  for (auto *aux_entry = app.auxv_ptr; aux_entry->id != AT_NULL; ++aux_entry) {
-    switch (aux_entry->id) {
-    case AT_PHDR:
-      program_hdr_table = reinterpret_cast<Elf64_Phdr *>(aux_entry->value);
-      break;
-    case AT_PHNUM:
-      program_hdr_count = aux_entry->value;
-      break;
-    case AT_PAGESZ:
-      app.page_size = aux_entry->value;
-      break;
-    default:
-      break; // TODO: Read other useful entries from the aux vector.
-    }
-  }
-
-  app.tls.size = 0;
-  for (uintptr_t i = 0; i < program_hdr_count; ++i) {
-    Elf64_Phdr *phdr = program_hdr_table + i;
-    if (phdr->p_type != PT_TLS)
-      continue;
-    // TODO: p_vaddr value has to be adjusted for static-pie executables.
-    app.tls.address = phdr->p_vaddr;
-    app.tls.size = phdr->p_memsz;
-    app.tls.init_size = phdr->p_filesz;
-    app.tls.align = phdr->p_align;
-  }
-
-  // This descriptor has to be static since its cleanup function cannot
-  // capture the context.
-  static LIBC_NAMESPACE::TLSDescriptor tls;
-  LIBC_NAMESPACE::init_tls(tls);
-  if (tls.size != 0 && !LIBC_NAMESPACE::set_thread_ptr(tls.tp))
-    LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, 1);
-
-  LIBC_NAMESPACE::self.attrib = &LIBC_NAMESPACE::main_thread_attrib;
-  LIBC_NAMESPACE::main_thread_attrib.atexit_callback_mgr =
-      LIBC_NAMESPACE::internal::get_thread_atexit_callback_mgr();
-  // We register the cleanup_tls function to be the last atexit callback to be
-  // invoked. It will tear down the TLS. Other callbacks may depend on TLS (such
-  // as the stack protector canary).
-  LIBC_NAMESPACE::atexit(
-      []() { LIBC_NAMESPACE::cleanup_tls(tls.tp, tls.size); });
-  // We want the fini array callbacks to be run after other atexit
-  // callbacks are run. So, we register them before running the init
-  // array callbacks as they can potentially register their own atexit
-  // callbacks.
-  LIBC_NAMESPACE::atexit(&LIBC_NAMESPACE::call_fini_array_callbacks);
-
-  LIBC_NAMESPACE::call_init_array_callbacks(
-      static_cast<int>(app.args->argc),
-      reinterpret_cast<char **>(app.args->argv),
-      reinterpret_cast<char **>(env_ptr));
-
-  int retval = main(static_cast<int>(app.args->argc),
-                    reinterpret_cast<char **>(app.args->argv),
-                    reinterpret_cast<char **>(env_ptr));
-
-  LIBC_NAMESPACE::exit(retval);
+  do_start();
 }
+} // namespace LIBC_NAMESPACE

>From 5689214719d2563b50a78a1f6e12f37e3942fd95 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Wed, 20 Dec 2023 14:43:31 -0500
Subject: [PATCH 2/5] use inline asm

---
 libc/startup/linux/x86_64/start.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libc/startup/linux/x86_64/start.cpp b/libc/startup/linux/x86_64/start.cpp
index 6402c39f4b6c8e..6778b67fca4f1c 100644
--- a/libc/startup/linux/x86_64/start.cpp
+++ b/libc/startup/linux/x86_64/start.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/__support/OSUtil/syscall.h"
+#include "src/__support/macros/attributes.h"
 #include "src/string/memory_utils/inline_memcpy.h"
 #include "startup/linux/do_start.h"
 
@@ -109,8 +110,8 @@ extern "C" [[noreturn]] void _start() {
   // compilers can generate code assuming the alignment as required by the ABI.
   // If the stack pointers as setup by the OS are already aligned, then the
   // following code is a NOP.
-  __asm__ __volatile__("andq $0xfffffffffffffff0, %rsp\n\t");
-  __asm__ __volatile__("andq $0xfffffffffffffff0, %rbp\n\t");
+  LIBC_INLINE_ASM("andq $0xfffffffffffffff0, %rsp\n\t");
+  LIBC_INLINE_ASM("andq $0xfffffffffffffff0, %rbp\n\t");
 
   do_start();
 }

>From 21bea980ab5dd516bd4f7961d6254105ebdd16df Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Thu, 21 Dec 2023 11:01:09 -0500
Subject: [PATCH 3/5] split more and address CR

---
 libc/startup/linux/CMakeLists.txt         |  1 +
 libc/startup/linux/aarch64/CMakeLists.txt | 15 +++-
 libc/startup/linux/aarch64/start.cpp      | 82 +-------------------
 libc/startup/linux/aarch64/tls.cpp        | 86 +++++++++++++++++++++
 libc/startup/linux/do_start.cpp           | 19 +++--
 libc/startup/linux/riscv/CMakeLists.txt   | 16 +++-
 libc/startup/linux/riscv/start.cpp        | 70 +----------------
 libc/startup/linux/riscv/tls.cpp          | 74 ++++++++++++++++++
 libc/startup/linux/x86_64/CMakeLists.txt  | 18 ++++-
 libc/startup/linux/x86_64/start.cpp       | 93 ++---------------------
 libc/startup/linux/x86_64/tls.cpp         | 93 +++++++++++++++++++++++
 11 files changed, 316 insertions(+), 251 deletions(-)
 create mode 100644 libc/startup/linux/aarch64/tls.cpp
 create mode 100644 libc/startup/linux/riscv/tls.cpp
 create mode 100644 libc/startup/linux/x86_64/tls.cpp

diff --git a/libc/startup/linux/CMakeLists.txt b/libc/startup/linux/CMakeLists.txt
index e9bdc2dd4478e9..39bcca9cdba9fe 100644
--- a/libc/startup/linux/CMakeLists.txt
+++ b/libc/startup/linux/CMakeLists.txt
@@ -109,6 +109,7 @@ add_object_library(
 merge_relocatable_object(
   crt1
   .${LIBC_TARGET_ARCHITECTURE}.start
+  .${LIBC_TARGET_ARCHITECTURE}.tls
   .do_start
 )
 
diff --git a/libc/startup/linux/aarch64/CMakeLists.txt b/libc/startup/linux/aarch64/CMakeLists.txt
index 868b755d38f519..5ea6ae59abcb28 100644
--- a/libc/startup/linux/aarch64/CMakeLists.txt
+++ b/libc/startup/linux/aarch64/CMakeLists.txt
@@ -1,7 +1,7 @@
 add_startup_object(
-  start
+  tls
   SRC
-    start.cpp
+    tls.cpp
   DEPENDS
     libc.config.linux.app_h
     libc.include.sys_mman
@@ -12,3 +12,14 @@ add_startup_object(
     -fno-omit-frame-pointer
     -ffreestanding # To avoid compiler warnings about calling the main function.
 )
+
+add_startup_object(
+  start
+  SRC
+    start.cpp
+  DEPENDS
+    libc.config.linux.app_h
+  COMPILE_OPTIONS
+    -fno-omit-frame-pointer
+    -ffreestanding # To avoid compiler warnings about calling the main function.
+)
diff --git a/libc/startup/linux/aarch64/start.cpp b/libc/startup/linux/aarch64/start.cpp
index 0e1efdc1b04da9..0eabb726695a85 100644
--- a/libc/startup/linux/aarch64/start.cpp
+++ b/libc/startup/linux/aarch64/start.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of crt for aarch64 ---------------------------------===//
+//===-- Implementation of _start for aarch64 ------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,85 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "src/__support/OSUtil/syscall.h"
-#include "src/__support/threads/thread.h"
-#include "src/string/memory_utils/inline_memcpy.h"
 #include "startup/linux/do_start.h"
-
-#include <arm_acle.h>
-#include <sys/mman.h>
-#include <sys/syscall.h>
-
-// Source documentation:
-// https://github.com/ARM-software/abi-aa/tree/main/sysvabi64
-
-namespace LIBC_NAMESPACE {
-
-#ifdef SYS_mmap2
-static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
-#elif SYS_mmap
-static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
-#else
-#error "mmap and mmap2 syscalls not available."
-#endif
-
-void init_tls(TLSDescriptor &tls_descriptor) {
-  if (app.tls.size == 0) {
-    tls_descriptor.size = 0;
-    tls_descriptor.tp = 0;
-    return;
-  }
-
-  // aarch64 follows the variant 1 TLS layout:
-  //
-  // 1. First entry is the dynamic thread vector pointer
-  // 2. Second entry is a 8-byte reserved word.
-  // 3. Padding for alignment.
-  // 4. The TLS data from the ELF image.
-  //
-  // The thread pointer points to the first entry.
-
-  const uintptr_t size_of_pointers = 2 * sizeof(uintptr_t);
-  uintptr_t padding = 0;
-  const uintptr_t ALIGNMENT_MASK = app.tls.align - 1;
-  uintptr_t diff = size_of_pointers & ALIGNMENT_MASK;
-  if (diff != 0)
-    padding += (ALIGNMENT_MASK - diff) + 1;
-
-  uintptr_t alloc_size = size_of_pointers + padding + app.tls.size;
-
-  // We cannot call the mmap function here as the functions set errno on
-  // failure. Since errno is implemented via a thread local variable, we cannot
-  // use errno before TLS is setup.
-  long mmap_ret_val = syscall_impl<long>(MMAP_SYSCALL_NUMBER, nullptr,
-                                         alloc_size, PROT_READ | PROT_WRITE,
-                                         MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-  // We cannot check the return value with MAP_FAILED as that is the return
-  // of the mmap function and not the mmap syscall.
-  if (mmap_ret_val < 0 && static_cast<uintptr_t>(mmap_ret_val) > -app.page_size)
-    syscall_impl<long>(SYS_exit, 1);
-  uintptr_t thread_ptr = uintptr_t(reinterpret_cast<uintptr_t *>(mmap_ret_val));
-  uintptr_t tls_addr = thread_ptr + size_of_pointers + padding;
-  inline_memcpy(reinterpret_cast<char *>(tls_addr),
-                reinterpret_cast<const char *>(app.tls.address),
-                app.tls.init_size);
-  tls_descriptor.size = alloc_size;
-  tls_descriptor.addr = thread_ptr;
-  tls_descriptor.tp = thread_ptr;
-}
-
-void cleanup_tls(uintptr_t addr, uintptr_t size) {
-  if (size == 0)
-    return;
-  syscall_impl<long>(SYS_munmap, addr, size);
-}
-
-bool set_thread_ptr(uintptr_t val) {
-  __arm_wsr64("tpidr_el0", val);
-  return true;
-}
-
 extern "C" void _start() {
+  using namespace LIBC_NAMESPACE;
   // Skip the Frame Pointer and the Link Register
   // https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst
   // Section 6.2.3. Note that this only works if the current function
@@ -99,5 +23,3 @@ extern "C" void _start() {
       reinterpret_cast<uintptr_t *>(__builtin_frame_address(0)) + 2);
   do_start();
 }
-
-} // namespace LIBC_NAMESPACE
diff --git a/libc/startup/linux/aarch64/tls.cpp b/libc/startup/linux/aarch64/tls.cpp
new file mode 100644
index 00000000000000..50dcf888de5cd3
--- /dev/null
+++ b/libc/startup/linux/aarch64/tls.cpp
@@ -0,0 +1,86 @@
+//===-- Implementation of crt for aarch64 ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/OSUtil/syscall.h"
+#include "src/__support/threads/thread.h"
+#include "src/string/memory_utils/inline_memcpy.h"
+#include "startup/linux/do_start.h"
+
+#include <arm_acle.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+// Source documentation:
+// https://github.com/ARM-software/abi-aa/tree/main/sysvabi64
+
+namespace LIBC_NAMESPACE {
+
+#ifdef SYS_mmap2
+static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
+#elif SYS_mmap
+static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
+#else
+#error "mmap and mmap2 syscalls not available."
+#endif
+
+void init_tls(TLSDescriptor &tls_descriptor) {
+  if (app.tls.size == 0) {
+    tls_descriptor.size = 0;
+    tls_descriptor.tp = 0;
+    return;
+  }
+
+  // aarch64 follows the variant 1 TLS layout:
+  //
+  // 1. First entry is the dynamic thread vector pointer
+  // 2. Second entry is a 8-byte reserved word.
+  // 3. Padding for alignment.
+  // 4. The TLS data from the ELF image.
+  //
+  // The thread pointer points to the first entry.
+
+  const uintptr_t size_of_pointers = 2 * sizeof(uintptr_t);
+  uintptr_t padding = 0;
+  const uintptr_t ALIGNMENT_MASK = app.tls.align - 1;
+  uintptr_t diff = size_of_pointers & ALIGNMENT_MASK;
+  if (diff != 0)
+    padding += (ALIGNMENT_MASK - diff) + 1;
+
+  uintptr_t alloc_size = size_of_pointers + padding + app.tls.size;
+
+  // We cannot call the mmap function here as the functions set errno on
+  // failure. Since errno is implemented via a thread local variable, we cannot
+  // use errno before TLS is setup.
+  long mmap_ret_val = syscall_impl<long>(MMAP_SYSCALL_NUMBER, nullptr,
+                                         alloc_size, PROT_READ | PROT_WRITE,
+                                         MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+  // We cannot check the return value with MAP_FAILED as that is the return
+  // of the mmap function and not the mmap syscall.
+  if (mmap_ret_val < 0 && static_cast<uintptr_t>(mmap_ret_val) > -app.page_size)
+    syscall_impl<long>(SYS_exit, 1);
+  uintptr_t thread_ptr = uintptr_t(reinterpret_cast<uintptr_t *>(mmap_ret_val));
+  uintptr_t tls_addr = thread_ptr + size_of_pointers + padding;
+  inline_memcpy(reinterpret_cast<char *>(tls_addr),
+                reinterpret_cast<const char *>(app.tls.address),
+                app.tls.init_size);
+  tls_descriptor.size = alloc_size;
+  tls_descriptor.addr = thread_ptr;
+  tls_descriptor.tp = thread_ptr;
+}
+
+void cleanup_tls(uintptr_t addr, uintptr_t size) {
+  if (size == 0)
+    return;
+  syscall_impl<long>(SYS_munmap, addr, size);
+}
+
+bool set_thread_ptr(uintptr_t val) {
+  __arm_wsr64("tpidr_el0", val);
+  return true;
+}
+} // namespace LIBC_NAMESPACE
diff --git a/libc/startup/linux/do_start.cpp b/libc/startup/linux/do_start.cpp
index 4a86bc385a7c8b..05dbd4488f5882 100644
--- a/libc/startup/linux/do_start.cpp
+++ b/libc/startup/linux/do_start.cpp
@@ -18,14 +18,6 @@
 #include <sys/mman.h>
 #include <sys/syscall.h>
 
-namespace LIBC_NAMESPACE {
-
-// TODO: this symbol will be moved to config.linux.app
-AppProperties app;
-
-using InitCallback = void(int, char **, char **);
-using FiniCallback = void(void);
-
 extern "C" int main(int argc, char **argv, char **envp);
 
 extern "C" {
@@ -39,6 +31,13 @@ extern uintptr_t __fini_array_start[];
 extern uintptr_t __fini_array_end[];
 }
 
+namespace LIBC_NAMESPACE {
+// TODO: this symbol will be moved to config.linux.app
+AppProperties app;
+
+using InitCallback = void(int, char **, char **);
+using FiniCallback = void(void);
+
 static void call_init_array_callbacks(int argc, char **argv, char **env) {
   size_t preinit_array_size = __preinit_array_end - __preinit_array_start;
   for (size_t i = 0; i < preinit_array_size; ++i)
@@ -65,8 +64,8 @@ static ThreadAttributes main_thread_attrib;
   // After the argv array, is a 8-byte long NULL value before the array of env
   // values. The end of the env values is marked by another 8-byte long NULL
   // value. We step over it (the "+ 1" below) to get to the env values.
-  uint64_t *env_ptr = app.args->argv + app.args->argc + 1;
-  uint64_t *env_end_marker = env_ptr;
+  ArgVEntryType *env_ptr = app.args->argv + app.args->argc + 1;
+  ArgVEntryType *env_end_marker = env_ptr;
   app.env_ptr = env_ptr;
   while (*env_end_marker)
     ++env_end_marker;
diff --git a/libc/startup/linux/riscv/CMakeLists.txt b/libc/startup/linux/riscv/CMakeLists.txt
index 868b755d38f519..3717784233c151 100644
--- a/libc/startup/linux/riscv/CMakeLists.txt
+++ b/libc/startup/linux/riscv/CMakeLists.txt
@@ -1,7 +1,7 @@
 add_startup_object(
-  start
+  tls
   SRC
-    start.cpp
+    tls.cpp
   DEPENDS
     libc.config.linux.app_h
     libc.include.sys_mman
@@ -12,3 +12,15 @@ add_startup_object(
     -fno-omit-frame-pointer
     -ffreestanding # To avoid compiler warnings about calling the main function.
 )
+
+add_startup_object(
+  start
+  SRC
+    start.cpp
+  DEPENDS
+    libc.config.linux.app_h
+    libc.src.__support.macros.attributes
+  COMPILE_OPTIONS
+    -fno-omit-frame-pointer
+    -ffreestanding # To avoid compiler warnings about calling the main function.
+)
diff --git a/libc/startup/linux/riscv/start.cpp b/libc/startup/linux/riscv/start.cpp
index 57c83b502e9c94..989de4142e8932 100644
--- a/libc/startup/linux/riscv/start.cpp
+++ b/libc/startup/linux/riscv/start.cpp
@@ -1,78 +1,15 @@
-//===-- Implementation of crt for riscv64 ---------------------------------===//
+//===-- Implementation of _start for riscv --------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-
-#include "src/__support/OSUtil/syscall.h"
-#include "src/__support/threads/thread.h"
-#include "src/string/memory_utils/inline_memcpy.h"
+#include "src/__support/macros/attributes.h"
 #include "startup/linux/do_start.h"
 
-#include <sys/mman.h>
-#include <sys/syscall.h>
-
-namespace LIBC_NAMESPACE {
-
-#ifdef SYS_mmap2
-static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
-#elif SYS_mmap
-static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
-#else
-#error "mmap and mmap2 syscalls not available."
-#endif
-
-void init_tls(TLSDescriptor &tls_descriptor) {
-  if (app.tls.size == 0) {
-    tls_descriptor.size = 0;
-    tls_descriptor.tp = 0;
-    return;
-  }
-
-  // riscv64 follows the variant 1 TLS layout:
-  const uintptr_t size_of_pointers = 2 * sizeof(uintptr_t);
-  uintptr_t padding = 0;
-  const uintptr_t ALIGNMENT_MASK = app.tls.align - 1;
-  uintptr_t diff = size_of_pointers & ALIGNMENT_MASK;
-  if (diff != 0)
-    padding += (ALIGNMENT_MASK - diff) + 1;
-
-  uintptr_t alloc_size = size_of_pointers + padding + app.tls.size;
-
-  // We cannot call the mmap function here as the functions set errno on
-  // failure. Since errno is implemented via a thread local variable, we cannot
-  // use errno before TLS is setup.
-  long mmap_ret_val = syscall_impl<long>(MMAP_SYSCALL_NUMBER, nullptr,
-                                         alloc_size, PROT_READ | PROT_WRITE,
-                                         MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-  // We cannot check the return value with MAP_FAILED as that is the return
-  // of the mmap function and not the mmap syscall.
-  if (mmap_ret_val < 0 && static_cast<uintptr_t>(mmap_ret_val) > -app.page_size)
-    syscall_impl<long>(SYS_exit, 1);
-  uintptr_t thread_ptr = uintptr_t(reinterpret_cast<uintptr_t *>(mmap_ret_val));
-  uintptr_t tls_addr = thread_ptr + size_of_pointers + padding;
-  inline_memcpy(reinterpret_cast<char *>(tls_addr),
-                reinterpret_cast<const char *>(app.tls.address),
-                app.tls.init_size);
-  tls_descriptor.size = alloc_size;
-  tls_descriptor.addr = thread_ptr;
-  tls_descriptor.tp = tls_addr;
-}
-
-void cleanup_tls(uintptr_t addr, uintptr_t size) {
-  if (size == 0)
-    return;
-  syscall_impl<long>(SYS_munmap, addr, size);
-}
-
-bool set_thread_ptr(uintptr_t val) {
-  LIBC_INLINE_ASM("mv tp, %0\n\t" : : "r"(val));
-  return true;
-}
-
 extern "C" [[noreturn]] void _start() {
+  using namespace LIBC_NAMESPACE;
   LIBC_INLINE_ASM(".option push\n\t"
                   ".option norelax\n\t"
                   "lla gp, __global_pointer$\n\t"
@@ -82,4 +19,3 @@ extern "C" [[noreturn]] void _start() {
       reinterpret_cast<uintptr_t *>(__builtin_frame_address(0)));
   do_start();
 }
-} // namespace LIBC_NAMESPACE
diff --git a/libc/startup/linux/riscv/tls.cpp b/libc/startup/linux/riscv/tls.cpp
new file mode 100644
index 00000000000000..6cb0c306562f7a
--- /dev/null
+++ b/libc/startup/linux/riscv/tls.cpp
@@ -0,0 +1,74 @@
+//===-- Implementation of tls for riscv64 ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/OSUtil/syscall.h"
+#include "src/__support/threads/thread.h"
+#include "src/string/memory_utils/inline_memcpy.h"
+#include "startup/linux/do_start.h"
+
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+namespace LIBC_NAMESPACE {
+
+#ifdef SYS_mmap2
+static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
+#elif SYS_mmap
+static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
+#else
+#error "mmap and mmap2 syscalls not available."
+#endif
+
+void init_tls(TLSDescriptor &tls_descriptor) {
+  if (app.tls.size == 0) {
+    tls_descriptor.size = 0;
+    tls_descriptor.tp = 0;
+    return;
+  }
+
+  // riscv64 follows the variant 1 TLS layout:
+  const uintptr_t size_of_pointers = 2 * sizeof(uintptr_t);
+  uintptr_t padding = 0;
+  const uintptr_t ALIGNMENT_MASK = app.tls.align - 1;
+  uintptr_t diff = size_of_pointers & ALIGNMENT_MASK;
+  if (diff != 0)
+    padding += (ALIGNMENT_MASK - diff) + 1;
+
+  uintptr_t alloc_size = size_of_pointers + padding + app.tls.size;
+
+  // We cannot call the mmap function here as the functions set errno on
+  // failure. Since errno is implemented via a thread local variable, we cannot
+  // use errno before TLS is setup.
+  long mmap_ret_val = syscall_impl<long>(MMAP_SYSCALL_NUMBER, nullptr,
+                                         alloc_size, PROT_READ | PROT_WRITE,
+                                         MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+  // We cannot check the return value with MAP_FAILED as that is the return
+  // of the mmap function and not the mmap syscall.
+  if (mmap_ret_val < 0 && static_cast<uintptr_t>(mmap_ret_val) > -app.page_size)
+    syscall_impl<long>(SYS_exit, 1);
+  uintptr_t thread_ptr = uintptr_t(reinterpret_cast<uintptr_t *>(mmap_ret_val));
+  uintptr_t tls_addr = thread_ptr + size_of_pointers + padding;
+  inline_memcpy(reinterpret_cast<char *>(tls_addr),
+                reinterpret_cast<const char *>(app.tls.address),
+                app.tls.init_size);
+  tls_descriptor.size = alloc_size;
+  tls_descriptor.addr = thread_ptr;
+  tls_descriptor.tp = tls_addr;
+}
+
+void cleanup_tls(uintptr_t addr, uintptr_t size) {
+  if (size == 0)
+    return;
+  syscall_impl<long>(SYS_munmap, addr, size);
+}
+
+bool set_thread_ptr(uintptr_t val) {
+  LIBC_INLINE_ASM("mv tp, %0\n\t" : : "r"(val));
+  return true;
+}
+} // namespace LIBC_NAMESPACE
diff --git a/libc/startup/linux/x86_64/CMakeLists.txt b/libc/startup/linux/x86_64/CMakeLists.txt
index 71a1e531e36b1b..30da7ab4e1ec3d 100644
--- a/libc/startup/linux/x86_64/CMakeLists.txt
+++ b/libc/startup/linux/x86_64/CMakeLists.txt
@@ -1,7 +1,7 @@
 add_startup_object(
-  start
+  tls
   SRC
-    start.cpp
+    tls.cpp
   DEPENDS
     libc.config.linux.app_h
     libc.include.sys_mman
@@ -14,3 +14,17 @@ add_startup_object(
     -ffreestanding
     -fno-builtin
 )
+
+add_startup_object(
+  start
+  SRC
+    start.cpp
+  DEPENDS
+    libc.config.linux.app_h
+    libc.src.__support.macros.attributes
+  COMPILE_OPTIONS
+    -fno-stack-protector
+    -fno-omit-frame-pointer
+    -ffreestanding
+    -fno-builtin
+)
diff --git a/libc/startup/linux/x86_64/start.cpp b/libc/startup/linux/x86_64/start.cpp
index 6778b67fca4f1c..fbf6d65baccaa9 100644
--- a/libc/startup/linux/x86_64/start.cpp
+++ b/libc/startup/linux/x86_64/start.cpp
@@ -1,100 +1,18 @@
-//===-- Implementation of crt for x86_64 ----------------------------------===//
+//===-- Implementation of _start for x86_64 -------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-
-#include "src/__support/OSUtil/syscall.h"
 #include "src/__support/macros/attributes.h"
-#include "src/string/memory_utils/inline_memcpy.h"
 #include "startup/linux/do_start.h"
 
-#include <asm/prctl.h>
-#include <sys/mman.h>
-#include <sys/syscall.h>
-
-namespace LIBC_NAMESPACE {
-
-#ifdef SYS_mmap2
-static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
-#elif SYS_mmap
-static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
-#else
-#error "mmap and mmap2 syscalls not available."
-#endif
-
-// TODO: Also generalize this routine and handle dynamic loading properly.
-void init_tls(TLSDescriptor &tls_descriptor) {
-  if (app.tls.size == 0) {
-    tls_descriptor.size = 0;
-    tls_descriptor.tp = 0;
-    return;
-  }
-
-  // We will assume the alignment is always a power of two.
-  uintptr_t tls_size = app.tls.size & -app.tls.align;
-  if (tls_size != app.tls.size)
-    tls_size += app.tls.align;
-
-  // Per the x86_64 TLS ABI, the entry pointed to by the thread pointer is the
-  // address of the TLS block. So, we add more size to accomodate this address
-  // entry.
-  // We also need to include space for the stack canary. The canary is at
-  // offset 0x28 (40) and is of size uintptr_t.
-  uintptr_t tls_size_with_addr = tls_size + sizeof(uintptr_t) + 40;
-
-  // We cannot call the mmap function here as the functions set errno on
-  // failure. Since errno is implemented via a thread local variable, we cannot
-  // use errno before TLS is setup.
-  long mmap_retval = syscall_impl<long>(
-      MMAP_SYSCALL_NUMBER, nullptr, tls_size_with_addr, PROT_READ | PROT_WRITE,
-      MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-  // We cannot check the return value with MAP_FAILED as that is the return
-  // of the mmap function and not the mmap syscall.
-  if (mmap_retval < 0 && static_cast<uintptr_t>(mmap_retval) > -app.page_size)
-    syscall_impl<long>(SYS_exit, 1);
-  uintptr_t *tls_addr = reinterpret_cast<uintptr_t *>(mmap_retval);
-
-  // x86_64 TLS faces down from the thread pointer with the first entry
-  // pointing to the address of the first real TLS byte.
-  uintptr_t end_ptr = reinterpret_cast<uintptr_t>(tls_addr) + tls_size;
-  *reinterpret_cast<uintptr_t *>(end_ptr) = end_ptr;
-
-  inline_memcpy(reinterpret_cast<char *>(tls_addr),
-                reinterpret_cast<const char *>(app.tls.address),
-                app.tls.init_size);
-  uintptr_t *stack_guard_addr = reinterpret_cast<uintptr_t *>(end_ptr + 40);
-  // Setting the stack guard to a random value.
-  // We cannot call the get_random function here as the function sets errno on
-  // failure. Since errno is implemented via a thread local variable, we cannot
-  // use errno before TLS is setup.
-  long stack_guard_retval =
-      syscall_impl(SYS_getrandom, reinterpret_cast<long>(stack_guard_addr),
-                   sizeof(uint64_t), 0);
-  if (stack_guard_retval < 0)
-    syscall_impl(SYS_exit, 1);
-
-  tls_descriptor = {tls_size_with_addr, reinterpret_cast<uintptr_t>(tls_addr),
-                    end_ptr};
-  return;
-}
-
-void cleanup_tls(uintptr_t addr, uintptr_t size) {
-  if (size == 0)
-    return;
-  syscall_impl<long>(SYS_munmap, addr, size);
-}
-
-// Sets the thread pointer to |val|. Returns true on success, false on failure.
-bool set_thread_ptr(uintptr_t val) {
-  return syscall_impl(SYS_arch_prctl, ARCH_SET_FS, val) != -1;
-}
-
 extern "C" [[noreturn]] void _start() {
-  // This TU is compiled with -fno-omit-frame-pointer. Hence, the previous value
-  // of the base pointer is pushed on to the stack. So, we step over it (the
+  using namespace LIBC_NAMESPACE;
+  // This TU is compiled with -fno-omit-frame-pointer. Hence, the previous
+  // value of the base pointer is pushed on to the stack. So, we step over
+  // it (the
   // "+ 1" below) to get to the args.
   app.args = reinterpret_cast<Args *>(
       reinterpret_cast<uintptr_t *>(__builtin_frame_address(0)) + 1);
@@ -115,4 +33,3 @@ extern "C" [[noreturn]] void _start() {
 
   do_start();
 }
-} // namespace LIBC_NAMESPACE
diff --git a/libc/startup/linux/x86_64/tls.cpp b/libc/startup/linux/x86_64/tls.cpp
new file mode 100644
index 00000000000000..8b0fa987362444
--- /dev/null
+++ b/libc/startup/linux/x86_64/tls.cpp
@@ -0,0 +1,93 @@
+//===-- Implementation of tls for x86_64 ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/OSUtil/syscall.h"
+#include "src/string/memory_utils/inline_memcpy.h"
+#include "startup/linux/do_start.h"
+
+#include <asm/prctl.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+namespace LIBC_NAMESPACE {
+
+#ifdef SYS_mmap2
+static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
+#elif SYS_mmap
+static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
+#else
+#error "mmap and mmap2 syscalls not available."
+#endif
+
+// TODO: Also generalize this routine and handle dynamic loading properly.
+void init_tls(TLSDescriptor &tls_descriptor) {
+  if (app.tls.size == 0) {
+    tls_descriptor.size = 0;
+    tls_descriptor.tp = 0;
+    return;
+  }
+
+  // We will assume the alignment is always a power of two.
+  uintptr_t tls_size = app.tls.size & -app.tls.align;
+  if (tls_size != app.tls.size)
+    tls_size += app.tls.align;
+
+  // Per the x86_64 TLS ABI, the entry pointed to by the thread pointer is the
+  // address of the TLS block. So, we add more size to accomodate this address
+  // entry.
+  // We also need to include space for the stack canary. The canary is at
+  // offset 0x28 (40) and is of size uintptr_t.
+  uintptr_t tls_size_with_addr = tls_size + sizeof(uintptr_t) + 40;
+
+  // We cannot call the mmap function here as the functions set errno on
+  // failure. Since errno is implemented via a thread local variable, we cannot
+  // use errno before TLS is setup.
+  long mmap_retval = syscall_impl<long>(
+      MMAP_SYSCALL_NUMBER, nullptr, tls_size_with_addr, PROT_READ | PROT_WRITE,
+      MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+  // We cannot check the return value with MAP_FAILED as that is the return
+  // of the mmap function and not the mmap syscall.
+  if (mmap_retval < 0 && static_cast<uintptr_t>(mmap_retval) > -app.page_size)
+    syscall_impl<long>(SYS_exit, 1);
+  uintptr_t *tls_addr = reinterpret_cast<uintptr_t *>(mmap_retval);
+
+  // x86_64 TLS faces down from the thread pointer with the first entry
+  // pointing to the address of the first real TLS byte.
+  uintptr_t end_ptr = reinterpret_cast<uintptr_t>(tls_addr) + tls_size;
+  *reinterpret_cast<uintptr_t *>(end_ptr) = end_ptr;
+
+  inline_memcpy(reinterpret_cast<char *>(tls_addr),
+                reinterpret_cast<const char *>(app.tls.address),
+                app.tls.init_size);
+  uintptr_t *stack_guard_addr = reinterpret_cast<uintptr_t *>(end_ptr + 40);
+  // Setting the stack guard to a random value.
+  // We cannot call the get_random function here as the function sets errno on
+  // failure. Since errno is implemented via a thread local variable, we cannot
+  // use errno before TLS is setup.
+  long stack_guard_retval =
+      syscall_impl(SYS_getrandom, reinterpret_cast<long>(stack_guard_addr),
+                   sizeof(uint64_t), 0);
+  if (stack_guard_retval < 0)
+    syscall_impl(SYS_exit, 1);
+
+  tls_descriptor = {tls_size_with_addr, reinterpret_cast<uintptr_t>(tls_addr),
+                    end_ptr};
+  return;
+}
+
+void cleanup_tls(uintptr_t addr, uintptr_t size) {
+  if (size == 0)
+    return;
+  syscall_impl<long>(SYS_munmap, addr, size);
+}
+
+// Sets the thread pointer to |val|. Returns true on success, false on failure.
+bool set_thread_ptr(uintptr_t val) {
+  return syscall_impl(SYS_arch_prctl, ARCH_SET_FS, val) != -1;
+}
+} // namespace LIBC_NAMESPACE

>From 38d72c68f0d59888a4ff0a6b1d9b80b87fac191c Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Fri, 22 Dec 2023 10:37:48 -0500
Subject: [PATCH 4/5] address reviews

---
 libc/startup/linux/aarch64/start.cpp |  5 ++---
 libc/startup/linux/aarch64/tls.cpp   |  2 +-
 libc/startup/linux/riscv/start.cpp   | 13 ++++++-------
 libc/startup/linux/riscv/tls.cpp     |  2 +-
 libc/startup/linux/x86_64/start.cpp  | 12 +++++-------
 5 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/libc/startup/linux/aarch64/start.cpp b/libc/startup/linux/aarch64/start.cpp
index 0eabb726695a85..b5588fa2f24f0e 100644
--- a/libc/startup/linux/aarch64/start.cpp
+++ b/libc/startup/linux/aarch64/start.cpp
@@ -8,7 +8,6 @@
 
 #include "startup/linux/do_start.h"
 extern "C" void _start() {
-  using namespace LIBC_NAMESPACE;
   // Skip the Frame Pointer and the Link Register
   // https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst
   // Section 6.2.3. Note that this only works if the current function
@@ -19,7 +18,7 @@ extern "C" void _start() {
   // will take us to the previous stack pointer. That is the reason why the
   // actual business logic of the startup code is pushed into a non-inline
   // function do_start so that this function is free of any stack usage.
-  app.args = reinterpret_cast<Args *>(
+  LIBC_NAMESPACE::app.args = reinterpret_cast<LIBC_NAMESPACE::Args *>(
       reinterpret_cast<uintptr_t *>(__builtin_frame_address(0)) + 2);
-  do_start();
+  LIBC_NAMESPACE::do_start();
 }
diff --git a/libc/startup/linux/aarch64/tls.cpp b/libc/startup/linux/aarch64/tls.cpp
index 50dcf888de5cd3..f2579e821b1bf2 100644
--- a/libc/startup/linux/aarch64/tls.cpp
+++ b/libc/startup/linux/aarch64/tls.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of crt for aarch64 ---------------------------------===//
+//===-- Implementation of tls for aarch64 ---------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/libc/startup/linux/riscv/start.cpp b/libc/startup/linux/riscv/start.cpp
index 989de4142e8932..389f71a66d30ac 100644
--- a/libc/startup/linux/riscv/start.cpp
+++ b/libc/startup/linux/riscv/start.cpp
@@ -9,13 +9,12 @@
 #include "startup/linux/do_start.h"
 
 extern "C" [[noreturn]] void _start() {
-  using namespace LIBC_NAMESPACE;
-  LIBC_INLINE_ASM(".option push\n\t"
-                  ".option norelax\n\t"
-                  "lla gp, __global_pointer$\n\t"
-                  ".option pop\n\t");
+  asm volatile(".option push\n\t"
+               ".option norelax\n\t"
+               "lla gp, __global_pointer$\n\t"
+               ".option pop\n\t");
   // Fetch the args using the frame pointer.
-  app.args = reinterpret_cast<Args *>(
+  LIBC_NAMESPACE::app.args = reinterpret_cast<LIBC_NAMESPACE::Args *>(
       reinterpret_cast<uintptr_t *>(__builtin_frame_address(0)));
-  do_start();
+  LIBC_NAMESPACE::do_start();
 }
diff --git a/libc/startup/linux/riscv/tls.cpp b/libc/startup/linux/riscv/tls.cpp
index 6cb0c306562f7a..997912c77e7377 100644
--- a/libc/startup/linux/riscv/tls.cpp
+++ b/libc/startup/linux/riscv/tls.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of tls for riscv64 ---------------------------------===//
+//===-- Implementation of tls for riscv -----------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/libc/startup/linux/x86_64/start.cpp b/libc/startup/linux/x86_64/start.cpp
index fbf6d65baccaa9..25da25a496daa5 100644
--- a/libc/startup/linux/x86_64/start.cpp
+++ b/libc/startup/linux/x86_64/start.cpp
@@ -9,12 +9,10 @@
 #include "startup/linux/do_start.h"
 
 extern "C" [[noreturn]] void _start() {
-  using namespace LIBC_NAMESPACE;
   // This TU is compiled with -fno-omit-frame-pointer. Hence, the previous
   // value of the base pointer is pushed on to the stack. So, we step over
-  // it (the
-  // "+ 1" below) to get to the args.
-  app.args = reinterpret_cast<Args *>(
+  // it (the "+ 1" below) to get to the args.
+  LIBC_NAMESPACE::app.args = reinterpret_cast<LIBC_NAMESPACE::Args *>(
       reinterpret_cast<uintptr_t *>(__builtin_frame_address(0)) + 1);
 
   // The x86_64 ABI requires that the stack pointer is aligned to a 16-byte
@@ -28,8 +26,8 @@ extern "C" [[noreturn]] void _start() {
   // compilers can generate code assuming the alignment as required by the ABI.
   // If the stack pointers as setup by the OS are already aligned, then the
   // following code is a NOP.
-  LIBC_INLINE_ASM("andq $0xfffffffffffffff0, %rsp\n\t");
-  LIBC_INLINE_ASM("andq $0xfffffffffffffff0, %rbp\n\t");
+  asm volatile("andq $0xfffffffffffffff0, %rsp\n\t");
+  asm volatile("andq $0xfffffffffffffff0, %rbp\n\t");
 
-  do_start();
+  LIBC_NAMESPACE::do_start();
 }

>From c79d36d2f70bb0f5e15e5068a6c13631147a7847 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Fri, 22 Dec 2023 10:38:42 -0500
Subject: [PATCH 5/5] mark [[noreturn]]

---
 libc/startup/linux/aarch64/start.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/startup/linux/aarch64/start.cpp b/libc/startup/linux/aarch64/start.cpp
index b5588fa2f24f0e..d0a85268733903 100644
--- a/libc/startup/linux/aarch64/start.cpp
+++ b/libc/startup/linux/aarch64/start.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "startup/linux/do_start.h"
-extern "C" void _start() {
+extern "C" [[noreturn]] void _start() {
   // Skip the Frame Pointer and the Link Register
   // https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst
   // Section 6.2.3. Note that this only works if the current function



More information about the libc-commits mailing list