[libc-commits] [libc] [LIBC][ARM] Enable MMU setup and alignment fault handling during startup. (PR #204803)

Simi Pallipurath via libc-commits libc-commits at lists.llvm.org
Thu Jun 25 02:54:36 PDT 2026


https://github.com/simpal01 updated https://github.com/llvm/llvm-project/pull/204803

>From 8842b43284cbe6263f474c7f567167c07e308a26 Mon Sep 17 00:00:00 2001
From: Simi Pallipurath <simi.pallipurath at arm.com>
Date: Thu, 18 Jun 2026 14:00:33 +0100
Subject: [PATCH 1/3] [LIBC][ARM] Enable MMU setup and alignment fault handling
 during startup.

This change adds MMU initialization for Arm targets
and configures alignment fault behavior.

This patch:
1. Add MMU setup for AArch64 startup.
  - Create a minimal flat-mapped translation table.
  - Configure TTBR0_EL1, MAIR_EL1, and TCR_EL1.
  - Mark stack/heap memory as execute-never (XN) when possible.
  - Enable the MMU and configure alignment checking through SCTLR_EL1.
2. Add MMU setup for ARMA startup:
  - Create a flat 1 MB section mapping covering the address space.
  - Configure DACR, TTBCR, and TTBR0.
  - Enable MMU, instruction cache, and data cache.
  - Configure alignment fault handling through SCTLR.
3. Enable unaligned access trapping on Arm M-profile targets when
  `__ARM_FEATURE_UNALIGNED` is not available by setting `CCR.UNALIGN_TRP`.

This allows libc startup code to provide consistent alignment
checking behavior across Arm architectures while enabling the MMU and
basic memory protection features on A-profile systems.

Assisted-by: codex, reviewed and cross checked, also tested with
ATfE(Arm Toolchain for Embedded), by me.
---
 libc/startup/baremetal/aarch64/start.cpp | 59 ++++++++++++++++++++
 libc/startup/baremetal/arm/start.cpp     | 68 ++++++++++++++++++++++++
 2 files changed, 127 insertions(+)

diff --git a/libc/startup/baremetal/aarch64/start.cpp b/libc/startup/baremetal/aarch64/start.cpp
index ff3ea933c5240..fbd565114b6af 100644
--- a/libc/startup/baremetal/aarch64/start.cpp
+++ b/libc/startup/baremetal/aarch64/start.cpp
@@ -32,9 +32,66 @@ extern uintptr_t __data_start[];
 extern uintptr_t __data_size[];
 extern uintptr_t __bss_start[];
 extern uintptr_t __bss_size[];
+[[gnu::weak]] extern uintptr_t __heap_start;
 } // extern "C"
 
 namespace {
+constexpr uint64_t PAGE_TABLE_ENTRY_COUNT = 512;
+constexpr uint64_t PAGE_TABLE_ALIGNMENT = 4096;
+
+// Put the page table in a no-init section so it doesn't later get
+// zero-initialized.
+[[gnu::section(".noinit.page_table"), gnu::aligned(PAGE_TABLE_ALIGNMENT), gnu::used]]
+volatile uint64_t page_table[PAGE_TABLE_ENTRY_COUNT];
+
+uintptr_t get_stackheap_start() {
+  if (reinterpret_cast<uintptr_t>(&__heap_start))
+    return reinterpret_cast<uintptr_t>(&__heap_start);
+
+  uintptr_t page = reinterpret_cast<uintptr_t>(&get_stackheap_start) >> 30;
+  return (page + 1) << 30;
+}
+
+void setup_mmu() {
+  constexpr uint64_t PAGE_SHIFT = 30;
+  constexpr uint64_t PAGE_TABLE_ENTRY = 0x405; // Index = 1, AF=1.
+  // Map the stack/heap as normal memory, but mark it non-executable for both
+  // privileged and unprivileged execution. This prevents accidentally executing
+  // code from writable stack/heap memory.
+  constexpr uint64_t PAGE_TABLE_ENTRY_XN =
+      PAGE_TABLE_ENTRY | (1ULL << 54) | (1ULL << 53);
+
+  uintptr_t start_page = reinterpret_cast<uintptr_t>(&setup_mmu) >> PAGE_SHIFT;
+  uintptr_t stackheap_page = get_stackheap_start() >> PAGE_SHIFT;
+
+  __asm__ volatile("tlbi vmalle1");
+  __arm_wsr64("TTBR0_EL1", reinterpret_cast<uint64_t>(page_table));
+  __arm_wsr64("MAIR_EL1", 0x000000000000FF44); // Attr0 NC, Attr1 WB/WA/RA.
+  __arm_wsr64("TCR_EL1", 0x0000000080813519);
+  __isb(0xF);
+
+  for (uint64_t page = 0; page < PAGE_TABLE_ENTRY_COUNT; ++page)
+    page_table[page] = 0;
+
+  page_table[start_page] = PAGE_TABLE_ENTRY | (start_page << PAGE_SHIFT);
+  if (start_page != stackheap_page)
+    page_table[stackheap_page] =
+        PAGE_TABLE_ENTRY_XN | (stackheap_page << PAGE_SHIFT);
+
+  __dsb(0xF);
+
+  uint64_t sctlr = __arm_rsr64("SCTLR_EL1");
+#ifdef __ARM_FEATURE_UNALIGNED
+  sctlr &= ~(1ULL << 1); // SCTLR_EL1.A: disable alignment checks.
+#else
+  sctlr |= 1ULL << 1; // SCTLR_EL1.A: enable alignment checks.
+#endif
+  sctlr &= ~(1ULL << 19); // SCTLR.WXN: keep the image executable.
+  sctlr |= 1ULL << 0;     // SCTLR.M: enable MMU.
+  __arm_wsr64("SCTLR_EL1", sctlr);
+  __isb(0xF);
+}
+
 // The Arm ARM for the A-profile architecture (D14.1.5) defines the exceptions.
 // However, for simplicity, we don't bother logging, and just exit.
 void GenericException_Handler() { LIBC_NAMESPACE::exit(1); }
@@ -80,6 +137,8 @@ namespace LIBC_NAMESPACE_DECL {
   // Set up exception handling
   __arm_wsr64("VBAR_EL1", reinterpret_cast<uint64_t>(&vector_table));
 
+  setup_mmu();
+
 #ifdef __ARM_FP
   // Do not trap FP/SME/SVE instructions
   static constexpr uint64_t CPACR_SHIFT_FPEN = 20;
diff --git a/libc/startup/baremetal/arm/start.cpp b/libc/startup/baremetal/arm/start.cpp
index db89828a0b45e..1caff934df036 100644
--- a/libc/startup/baremetal/arm/start.cpp
+++ b/libc/startup/baremetal/arm/start.cpp
@@ -35,6 +35,63 @@ extern uintptr_t __bss_size[];
 } // extern "C"
 
 namespace {
+#if __ARM_ARCH_PROFILE == 'A' && !defined(__ARM_ARCH_ISA_A64) &&              \
+    __ARM_ARCH >= 7
+constexpr uint32_t PAGE_TABLE_ENTRY_COUNT = 4096;
+constexpr uint32_t PAGE_TABLE_ALIGNMENT = 16384;
+
+// Put the page table in a no-init section so it doesn't later get
+// zero-initialized.
+[[gnu::section(".noinit.page_table"), gnu::aligned(PAGE_TABLE_ALIGNMENT), gnu::used]]
+volatile uint32_t page_table[PAGE_TABLE_ENTRY_COUNT];
+
+void setup_mmu() {
+  constexpr uint32_t PAGE_SHIFT = 20;
+
+  // Fill the page table with a flat mapping of 4096 1MB sections with all
+  // sections marked as normal.
+  //  base address = bits 20:31
+  //  bits 18:19 set to 0
+  //  nG = bit 17 set to 0 (global)
+  //  S = bit 16 set to 0 (non-shared)
+  //  APX = bit 15 set to 0 (full read/write)
+  //  TEX = bits 12:14 = b111 (normal)
+  //  AP = bits 10:11 set to b11 (full read/write)
+  //  P = bit 9 set to 0 (no ECC)
+  //  domain = bits 5:8 = b000
+  //  XN = bit 4 set to 0
+  //  C, B bits = bits 2:3 set to b11 (normal)
+  //  size = 1MB = bits 0:1 set to b10
+  constexpr uint32_t PAGE_TABLE_ENTRY = 0x7c0e;
+
+  uint32_t value = 3;
+  __arm_wsr("p15:0:c3:c0:0", value); // DACR: manager access to domain 0.
+  value = 0;
+  __arm_wsr("p15:0:c2:c0:2", value); // TTBCR: always use TTBR0.
+  value = reinterpret_cast<uint32_t>(page_table) | 1;
+  __arm_wsr("p15:0:c2:c0:0", value); // TTBR0: inner-cacheable walks.
+  __isb(0xF);
+
+  for (uint32_t page = 0; page < PAGE_TABLE_ENTRY_COUNT; ++page)
+    page_table[page] = PAGE_TABLE_ENTRY | (page << PAGE_SHIFT);
+
+  __dsb(0xF);
+
+  uint32_t sctlr = __arm_rsr("p15:0:c1:c0:0");
+#ifdef __ARM_FEATURE_UNALIGNED
+  sctlr &= ~(1 << 1); // SCTLR.A: disable alignment checks.
+  sctlr |= 1 << 22;   // SCTLR.U: enable unaligned access support.
+#else
+  sctlr |= 1 << 1; // SCTLR.A: enable alignment checks.
+#endif
+  sctlr |= 1 << 0;  // SCTLR.M: enable MMU.
+  sctlr |= 1 << 2;  // SCTLR.C: enable data cache.
+  sctlr |= 1 << 12; // SCTLR.I: enable instruction cache.
+  __arm_wsr("p15:0:c1:c0:0", sctlr);
+  __isb(0xF);
+}
+#endif
+
 #if __ARM_ARCH_PROFILE == 'M'
 // Based on
 // https://developer.arm.com/documentation/107565/0101/Use-case-examples/Generic-Information/What-is-inside-a-program-image-/Vector-table
@@ -131,6 +188,17 @@ namespace LIBC_NAMESPACE_DECL {
   __arm_wsr("CPSR_c", 0x13); // SVC
 #endif
 
+#if __ARM_ARCH_PROFILE == 'A' && !defined(__ARM_ARCH_ISA_A64) &&              \
+    __ARM_ARCH >= 7
+  __arm_wsr("p15:0:c12:c0:0", reinterpret_cast<uint32_t>(&vector_table));
+  setup_mmu();
+#endif
+
+#if __ARM_ARCH_PROFILE == 'M' && !defined(__ARM_FEATURE_UNALIGNED)
+  auto ccr = reinterpret_cast<volatile uint32_t *const>(0xE000ED14);
+  *ccr |= 1 << 3; // CCR.UNALIGN_TRP: trap unaligned accesses.
+#endif
+
 #if __ARM_ARCH_PROFILE == 'M' &&                                               \
     (defined(__ARM_FP) || defined(__ARM_FEATURE_MVE))
   // Enable FPU and MVE. They can't be enabled independently: the two are

>From a0326f3704da54fca8bf194053e870087773bc5d Mon Sep 17 00:00:00 2001
From: Simi Pallipurath <simi.pallipurath at arm.com>
Date: Mon, 22 Jun 2026 23:09:28 +0100
Subject: [PATCH 2/3] fixup! [LIBC][ARM] Enable MMU setup and alignment fault
 handling during startup.

1. Applied clang-format
2. Created a make it a constexpr var for page block shift value better readability.
3. Added necessary comments as requested.
---
 libc/startup/baremetal/aarch64/start.cpp | 43 +++++++++++++++++++-----
 libc/startup/baremetal/arm/start.cpp     | 10 +++---
 2 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/libc/startup/baremetal/aarch64/start.cpp b/libc/startup/baremetal/aarch64/start.cpp
index fbd565114b6af..e3cef1ec146cd 100644
--- a/libc/startup/baremetal/aarch64/start.cpp
+++ b/libc/startup/baremetal/aarch64/start.cpp
@@ -38,22 +38,41 @@ extern uintptr_t __bss_size[];
 namespace {
 constexpr uint64_t PAGE_TABLE_ENTRY_COUNT = 512;
 constexpr uint64_t PAGE_TABLE_ALIGNMENT = 4096;
+constexpr uint64_t PAGE_TABLE_BLOCK_SHIFT = 30; // 1 GiB block entries.
 
 // Put the page table in a no-init section so it doesn't later get
 // zero-initialized.
-[[gnu::section(".noinit.page_table"), gnu::aligned(PAGE_TABLE_ALIGNMENT), gnu::used]]
-volatile uint64_t page_table[PAGE_TABLE_ENTRY_COUNT];
+[[gnu::section(".noinit.page_table"), gnu::aligned(PAGE_TABLE_ALIGNMENT),
+  gnu::used]] volatile uint64_t page_table[PAGE_TABLE_ENTRY_COUNT];
 
+// Return the base address of the combined stack/heap mapping used by
+// setup_mmu().
 uintptr_t get_stackheap_start() {
+  // __heap_start is weak. If no linker script defines it, its address resolves
+  // to zero; otherwise the symbol's address is the requested heap base.
   if (reinterpret_cast<uintptr_t>(&__heap_start))
     return reinterpret_cast<uintptr_t>(&__heap_start);
 
-  uintptr_t page = reinterpret_cast<uintptr_t>(&get_stackheap_start) >> 30;
-  return (page + 1) << 30;
+  // With no linker-provided heap base, choose the 1 GiB page after this startup
+  // code as the fallback stack/heap page. The page table maps memory in 1 GiB
+  // blocks, so the shifts below convert between addresses and 1 GiB page
+  // numbers:
+  //
+  //   address >> PAGE_TABLE_BLOCK_SHIFT  gives the page number
+  //   page << PAGE_TABLE_BLOCK_SHIFT     gives the page base address
+  //
+  // Choosing page + 1 reserves the next 1 GiB page after the executable image
+  // for writable stack/heap memory.
+  uintptr_t code_page = reinterpret_cast<uintptr_t>(&get_stackheap_start) >>
+                        PAGE_TABLE_BLOCK_SHIFT;
+
+  // code_page is the page containing this function. Use the next page for
+  // stack/heap, then convert that page number back to an address to return.
+  uintptr_t stackheap_address = (code_page + 1) << PAGE_TABLE_BLOCK_SHIFT;
+  return stackheap_address;
 }
 
 void setup_mmu() {
-  constexpr uint64_t PAGE_SHIFT = 30;
   constexpr uint64_t PAGE_TABLE_ENTRY = 0x405; // Index = 1, AF=1.
   // Map the stack/heap as normal memory, but mark it non-executable for both
   // privileged and unprivileged execution. This prevents accidentally executing
@@ -61,8 +80,13 @@ void setup_mmu() {
   constexpr uint64_t PAGE_TABLE_ENTRY_XN =
       PAGE_TABLE_ENTRY | (1ULL << 54) | (1ULL << 53);
 
-  uintptr_t start_page = reinterpret_cast<uintptr_t>(&setup_mmu) >> PAGE_SHIFT;
-  uintptr_t stackheap_page = get_stackheap_start() >> PAGE_SHIFT;
+  uintptr_t start_page =
+      reinterpret_cast<uintptr_t>(&setup_mmu) >> PAGE_TABLE_BLOCK_SHIFT;
+
+  // get_stackheap_start() returns the base address of the combined stack/heap
+  // region, for example 0x80000000. The page table needs an index, for example
+  // 2, so convert the address to a page number.
+  uintptr_t stackheap_page = get_stackheap_start() >> PAGE_TABLE_BLOCK_SHIFT;
 
   __asm__ volatile("tlbi vmalle1");
   __arm_wsr64("TTBR0_EL1", reinterpret_cast<uint64_t>(page_table));
@@ -73,10 +97,11 @@ void setup_mmu() {
   for (uint64_t page = 0; page < PAGE_TABLE_ENTRY_COUNT; ++page)
     page_table[page] = 0;
 
-  page_table[start_page] = PAGE_TABLE_ENTRY | (start_page << PAGE_SHIFT);
+  page_table[start_page] =
+      PAGE_TABLE_ENTRY | (start_page << PAGE_TABLE_BLOCK_SHIFT);
   if (start_page != stackheap_page)
     page_table[stackheap_page] =
-        PAGE_TABLE_ENTRY_XN | (stackheap_page << PAGE_SHIFT);
+        PAGE_TABLE_ENTRY_XN | (stackheap_page << PAGE_TABLE_BLOCK_SHIFT);
 
   __dsb(0xF);
 
diff --git a/libc/startup/baremetal/arm/start.cpp b/libc/startup/baremetal/arm/start.cpp
index 1caff934df036..42e883175c8f3 100644
--- a/libc/startup/baremetal/arm/start.cpp
+++ b/libc/startup/baremetal/arm/start.cpp
@@ -35,15 +35,14 @@ extern uintptr_t __bss_size[];
 } // extern "C"
 
 namespace {
-#if __ARM_ARCH_PROFILE == 'A' && !defined(__ARM_ARCH_ISA_A64) &&              \
-    __ARM_ARCH >= 7
+#if __ARM_ARCH_PROFILE == 'A' && !defined(__ARM_ARCH_ISA_A64) && __ARM_ARCH >= 7
 constexpr uint32_t PAGE_TABLE_ENTRY_COUNT = 4096;
 constexpr uint32_t PAGE_TABLE_ALIGNMENT = 16384;
 
 // Put the page table in a no-init section so it doesn't later get
 // zero-initialized.
-[[gnu::section(".noinit.page_table"), gnu::aligned(PAGE_TABLE_ALIGNMENT), gnu::used]]
-volatile uint32_t page_table[PAGE_TABLE_ENTRY_COUNT];
+[[gnu::section(".noinit.page_table"), gnu::aligned(PAGE_TABLE_ALIGNMENT),
+  gnu::used]] volatile uint32_t page_table[PAGE_TABLE_ENTRY_COUNT];
 
 void setup_mmu() {
   constexpr uint32_t PAGE_SHIFT = 20;
@@ -188,8 +187,7 @@ namespace LIBC_NAMESPACE_DECL {
   __arm_wsr("CPSR_c", 0x13); // SVC
 #endif
 
-#if __ARM_ARCH_PROFILE == 'A' && !defined(__ARM_ARCH_ISA_A64) &&              \
-    __ARM_ARCH >= 7
+#if __ARM_ARCH_PROFILE == 'A' && !defined(__ARM_ARCH_ISA_A64) && __ARM_ARCH >= 7
   __arm_wsr("p15:0:c12:c0:0", reinterpret_cast<uint32_t>(&vector_table));
   setup_mmu();
 #endif

>From a65a09cc09f58c75054f4efce6d10f80d99e4071 Mon Sep 17 00:00:00 2001
From: Simi Pallipurath <simi.pallipurath at arm.com>
Date: Wed, 24 Jun 2026 21:39:22 +0100
Subject: [PATCH 3/3] fixup! fixup! [LIBC][ARM] Enable MMU setup and alignment
 fault handling during startup.

Updated to use the _end linker symbol as the AArch64
baremetal heap mapping base, matching the freelist
heap convention.
---
 libc/startup/baremetal/aarch64/start.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libc/startup/baremetal/aarch64/start.cpp b/libc/startup/baremetal/aarch64/start.cpp
index e3cef1ec146cd..bf145842d7683 100644
--- a/libc/startup/baremetal/aarch64/start.cpp
+++ b/libc/startup/baremetal/aarch64/start.cpp
@@ -32,7 +32,7 @@ extern uintptr_t __data_start[];
 extern uintptr_t __data_size[];
 extern uintptr_t __bss_start[];
 extern uintptr_t __bss_size[];
-[[gnu::weak]] extern uintptr_t __heap_start;
+[[gnu::weak]] extern uintptr_t _end;
 } // extern "C"
 
 namespace {
@@ -48,12 +48,12 @@ constexpr uint64_t PAGE_TABLE_BLOCK_SHIFT = 30; // 1 GiB block entries.
 // Return the base address of the combined stack/heap mapping used by
 // setup_mmu().
 uintptr_t get_stackheap_start() {
-  // __heap_start is weak. If no linker script defines it, its address resolves
-  // to zero; otherwise the symbol's address is the requested heap base.
-  if (reinterpret_cast<uintptr_t>(&__heap_start))
-    return reinterpret_cast<uintptr_t>(&__heap_start);
+  // _end is the heap start used by the baremetal freelist heap. If no linker
+  // script defines it, its weak address resolves to zero.
+  if (reinterpret_cast<uintptr_t>(&_end))
+    return reinterpret_cast<uintptr_t>(&_end);
 
-  // With no linker-provided heap base, choose the 1 GiB page after this startup
+  // With no linker-provided heap start, choose the 1 GiB page after this startup
   // code as the fallback stack/heap page. The page table maps memory in 1 GiB
   // blocks, so the shifts below convert between addresses and 1 GiB page
   // numbers:



More information about the libc-commits mailing list