[libc-commits] [libc] [LIBC][ARM] Enable MMU setup and alignment fault handling during startup. (PR #204803)
Simi Pallipurath via libc-commits
libc-commits at lists.llvm.org
Thu Jun 25 02:54:36 PDT 2026
https://github.com/simpal01 updated https://github.com/llvm/llvm-project/pull/204803
>From 8842b43284cbe6263f474c7f567167c07e308a26 Mon Sep 17 00:00:00 2001
From: Simi Pallipurath <simi.pallipurath at arm.com>
Date: Thu, 18 Jun 2026 14:00:33 +0100
Subject: [PATCH 1/3] [LIBC][ARM] Enable MMU setup and alignment fault handling
during startup.
This change adds MMU initialization for Arm targets
and configures alignment fault behavior.
This patch:
1. Add MMU setup for AArch64 startup.
- Create a minimal flat-mapped translation table.
- Configure TTBR0_EL1, MAIR_EL1, and TCR_EL1.
- Mark stack/heap memory as execute-never (XN) when possible.
- Enable the MMU and configure alignment checking through SCTLR_EL1.
2. Add MMU setup for ARMA startup:
- Create a flat 1 MB section mapping covering the address space.
- Configure DACR, TTBCR, and TTBR0.
- Enable MMU, instruction cache, and data cache.
- Configure alignment fault handling through SCTLR.
3. Enable unaligned access trapping on Arm M-profile targets when
`__ARM_FEATURE_UNALIGNED` is not available by setting `CCR.UNALIGN_TRP`.
This allows libc startup code to provide consistent alignment
checking behavior across Arm architectures while enabling the MMU and
basic memory protection features on A-profile systems.
Assisted-by: codex, reviewed and cross checked, also tested with
ATfE(Arm Toolchain for Embedded), by me.
---
libc/startup/baremetal/aarch64/start.cpp | 59 ++++++++++++++++++++
libc/startup/baremetal/arm/start.cpp | 68 ++++++++++++++++++++++++
2 files changed, 127 insertions(+)
diff --git a/libc/startup/baremetal/aarch64/start.cpp b/libc/startup/baremetal/aarch64/start.cpp
index ff3ea933c5240..fbd565114b6af 100644
--- a/libc/startup/baremetal/aarch64/start.cpp
+++ b/libc/startup/baremetal/aarch64/start.cpp
@@ -32,9 +32,66 @@ extern uintptr_t __data_start[];
extern uintptr_t __data_size[];
extern uintptr_t __bss_start[];
extern uintptr_t __bss_size[];
+[[gnu::weak]] extern uintptr_t __heap_start;
} // extern "C"
namespace {
+constexpr uint64_t PAGE_TABLE_ENTRY_COUNT = 512;
+constexpr uint64_t PAGE_TABLE_ALIGNMENT = 4096;
+
+// Put the page table in a no-init section so it doesn't later get
+// zero-initialized.
+[[gnu::section(".noinit.page_table"), gnu::aligned(PAGE_TABLE_ALIGNMENT), gnu::used]]
+volatile uint64_t page_table[PAGE_TABLE_ENTRY_COUNT];
+
+uintptr_t get_stackheap_start() {
+ if (reinterpret_cast<uintptr_t>(&__heap_start))
+ return reinterpret_cast<uintptr_t>(&__heap_start);
+
+ uintptr_t page = reinterpret_cast<uintptr_t>(&get_stackheap_start) >> 30;
+ return (page + 1) << 30;
+}
+
+void setup_mmu() {
+ constexpr uint64_t PAGE_SHIFT = 30;
+ constexpr uint64_t PAGE_TABLE_ENTRY = 0x405; // Index = 1, AF=1.
+ // Map the stack/heap as normal memory, but mark it non-executable for both
+ // privileged and unprivileged execution. This prevents accidentally executing
+ // code from writable stack/heap memory.
+ constexpr uint64_t PAGE_TABLE_ENTRY_XN =
+ PAGE_TABLE_ENTRY | (1ULL << 54) | (1ULL << 53);
+
+ uintptr_t start_page = reinterpret_cast<uintptr_t>(&setup_mmu) >> PAGE_SHIFT;
+ uintptr_t stackheap_page = get_stackheap_start() >> PAGE_SHIFT;
+
+ __asm__ volatile("tlbi vmalle1");
+ __arm_wsr64("TTBR0_EL1", reinterpret_cast<uint64_t>(page_table));
+ __arm_wsr64("MAIR_EL1", 0x000000000000FF44); // Attr0 NC, Attr1 WB/WA/RA.
+ __arm_wsr64("TCR_EL1", 0x0000000080813519);
+ __isb(0xF);
+
+ for (uint64_t page = 0; page < PAGE_TABLE_ENTRY_COUNT; ++page)
+ page_table[page] = 0;
+
+ page_table[start_page] = PAGE_TABLE_ENTRY | (start_page << PAGE_SHIFT);
+ if (start_page != stackheap_page)
+ page_table[stackheap_page] =
+ PAGE_TABLE_ENTRY_XN | (stackheap_page << PAGE_SHIFT);
+
+ __dsb(0xF);
+
+ uint64_t sctlr = __arm_rsr64("SCTLR_EL1");
+#ifdef __ARM_FEATURE_UNALIGNED
+ sctlr &= ~(1ULL << 1); // SCTLR_EL1.A: disable alignment checks.
+#else
+ sctlr |= 1ULL << 1; // SCTLR_EL1.A: enable alignment checks.
+#endif
+ sctlr &= ~(1ULL << 19); // SCTLR.WXN: keep the image executable.
+ sctlr |= 1ULL << 0; // SCTLR.M: enable MMU.
+ __arm_wsr64("SCTLR_EL1", sctlr);
+ __isb(0xF);
+}
+
// The Arm ARM for the A-profile architecture (D14.1.5) defines the exceptions.
// However, for simplicity, we don't bother logging, and just exit.
void GenericException_Handler() { LIBC_NAMESPACE::exit(1); }
@@ -80,6 +137,8 @@ namespace LIBC_NAMESPACE_DECL {
// Set up exception handling
__arm_wsr64("VBAR_EL1", reinterpret_cast<uint64_t>(&vector_table));
+ setup_mmu();
+
#ifdef __ARM_FP
// Do not trap FP/SME/SVE instructions
static constexpr uint64_t CPACR_SHIFT_FPEN = 20;
diff --git a/libc/startup/baremetal/arm/start.cpp b/libc/startup/baremetal/arm/start.cpp
index db89828a0b45e..1caff934df036 100644
--- a/libc/startup/baremetal/arm/start.cpp
+++ b/libc/startup/baremetal/arm/start.cpp
@@ -35,6 +35,63 @@ extern uintptr_t __bss_size[];
} // extern "C"
namespace {
+#if __ARM_ARCH_PROFILE == 'A' && !defined(__ARM_ARCH_ISA_A64) && \
+ __ARM_ARCH >= 7
+constexpr uint32_t PAGE_TABLE_ENTRY_COUNT = 4096;
+constexpr uint32_t PAGE_TABLE_ALIGNMENT = 16384;
+
+// Put the page table in a no-init section so it doesn't later get
+// zero-initialized.
+[[gnu::section(".noinit.page_table"), gnu::aligned(PAGE_TABLE_ALIGNMENT), gnu::used]]
+volatile uint32_t page_table[PAGE_TABLE_ENTRY_COUNT];
+
+void setup_mmu() {
+ constexpr uint32_t PAGE_SHIFT = 20;
+
+ // Fill the page table with a flat mapping of 4096 1MB sections with all
+ // sections marked as normal.
+ // base address = bits 20:31
+ // bits 18:19 set to 0
+ // nG = bit 17 set to 0 (global)
+ // S = bit 16 set to 0 (non-shared)
+ // APX = bit 15 set to 0 (full read/write)
+ // TEX = bits 12:14 = b111 (normal)
+ // AP = bits 10:11 set to b11 (full read/write)
+ // P = bit 9 set to 0 (no ECC)
+ // domain = bits 5:8 = b000
+ // XN = bit 4 set to 0
+ // C, B bits = bits 2:3 set to b11 (normal)
+ // size = 1MB = bits 0:1 set to b10
+ constexpr uint32_t PAGE_TABLE_ENTRY = 0x7c0e;
+
+ uint32_t value = 3;
+ __arm_wsr("p15:0:c3:c0:0", value); // DACR: manager access to domain 0.
+ value = 0;
+ __arm_wsr("p15:0:c2:c0:2", value); // TTBCR: always use TTBR0.
+ value = reinterpret_cast<uint32_t>(page_table) | 1;
+ __arm_wsr("p15:0:c2:c0:0", value); // TTBR0: inner-cacheable walks.
+ __isb(0xF);
+
+ for (uint32_t page = 0; page < PAGE_TABLE_ENTRY_COUNT; ++page)
+ page_table[page] = PAGE_TABLE_ENTRY | (page << PAGE_SHIFT);
+
+ __dsb(0xF);
+
+ uint32_t sctlr = __arm_rsr("p15:0:c1:c0:0");
+#ifdef __ARM_FEATURE_UNALIGNED
+ sctlr &= ~(1 << 1); // SCTLR.A: disable alignment checks.
+ sctlr |= 1 << 22; // SCTLR.U: enable unaligned access support.
+#else
+ sctlr |= 1 << 1; // SCTLR.A: enable alignment checks.
+#endif
+ sctlr |= 1 << 0; // SCTLR.M: enable MMU.
+ sctlr |= 1 << 2; // SCTLR.C: enable data cache.
+ sctlr |= 1 << 12; // SCTLR.I: enable instruction cache.
+ __arm_wsr("p15:0:c1:c0:0", sctlr);
+ __isb(0xF);
+}
+#endif
+
#if __ARM_ARCH_PROFILE == 'M'
// Based on
// https://developer.arm.com/documentation/107565/0101/Use-case-examples/Generic-Information/What-is-inside-a-program-image-/Vector-table
@@ -131,6 +188,17 @@ namespace LIBC_NAMESPACE_DECL {
__arm_wsr("CPSR_c", 0x13); // SVC
#endif
+#if __ARM_ARCH_PROFILE == 'A' && !defined(__ARM_ARCH_ISA_A64) && \
+ __ARM_ARCH >= 7
+ __arm_wsr("p15:0:c12:c0:0", reinterpret_cast<uint32_t>(&vector_table));
+ setup_mmu();
+#endif
+
+#if __ARM_ARCH_PROFILE == 'M' && !defined(__ARM_FEATURE_UNALIGNED)
+ auto ccr = reinterpret_cast<volatile uint32_t *const>(0xE000ED14);
+ *ccr |= 1 << 3; // CCR.UNALIGN_TRP: trap unaligned accesses.
+#endif
+
#if __ARM_ARCH_PROFILE == 'M' && \
(defined(__ARM_FP) || defined(__ARM_FEATURE_MVE))
// Enable FPU and MVE. They can't be enabled independently: the two are
>From a0326f3704da54fca8bf194053e870087773bc5d Mon Sep 17 00:00:00 2001
From: Simi Pallipurath <simi.pallipurath at arm.com>
Date: Mon, 22 Jun 2026 23:09:28 +0100
Subject: [PATCH 2/3] fixup! [LIBC][ARM] Enable MMU setup and alignment fault
handling during startup.
1. Applied clang-format
2. Created a make it a constexpr var for page block shift value better readability.
3. Added necessary comments as requested.
---
libc/startup/baremetal/aarch64/start.cpp | 43 +++++++++++++++++++-----
libc/startup/baremetal/arm/start.cpp | 10 +++---
2 files changed, 38 insertions(+), 15 deletions(-)
diff --git a/libc/startup/baremetal/aarch64/start.cpp b/libc/startup/baremetal/aarch64/start.cpp
index fbd565114b6af..e3cef1ec146cd 100644
--- a/libc/startup/baremetal/aarch64/start.cpp
+++ b/libc/startup/baremetal/aarch64/start.cpp
@@ -38,22 +38,41 @@ extern uintptr_t __bss_size[];
namespace {
constexpr uint64_t PAGE_TABLE_ENTRY_COUNT = 512;
constexpr uint64_t PAGE_TABLE_ALIGNMENT = 4096;
+constexpr uint64_t PAGE_TABLE_BLOCK_SHIFT = 30; // 1 GiB block entries.
// Put the page table in a no-init section so it doesn't later get
// zero-initialized.
-[[gnu::section(".noinit.page_table"), gnu::aligned(PAGE_TABLE_ALIGNMENT), gnu::used]]
-volatile uint64_t page_table[PAGE_TABLE_ENTRY_COUNT];
+[[gnu::section(".noinit.page_table"), gnu::aligned(PAGE_TABLE_ALIGNMENT),
+ gnu::used]] volatile uint64_t page_table[PAGE_TABLE_ENTRY_COUNT];
+// Return the base address of the combined stack/heap mapping used by
+// setup_mmu().
uintptr_t get_stackheap_start() {
+ // __heap_start is weak. If no linker script defines it, its address resolves
+ // to zero; otherwise the symbol's address is the requested heap base.
if (reinterpret_cast<uintptr_t>(&__heap_start))
return reinterpret_cast<uintptr_t>(&__heap_start);
- uintptr_t page = reinterpret_cast<uintptr_t>(&get_stackheap_start) >> 30;
- return (page + 1) << 30;
+ // With no linker-provided heap base, choose the 1 GiB page after this startup
+ // code as the fallback stack/heap page. The page table maps memory in 1 GiB
+ // blocks, so the shifts below convert between addresses and 1 GiB page
+ // numbers:
+ //
+ // address >> PAGE_TABLE_BLOCK_SHIFT gives the page number
+ // page << PAGE_TABLE_BLOCK_SHIFT gives the page base address
+ //
+ // Choosing page + 1 reserves the next 1 GiB page after the executable image
+ // for writable stack/heap memory.
+ uintptr_t code_page = reinterpret_cast<uintptr_t>(&get_stackheap_start) >>
+ PAGE_TABLE_BLOCK_SHIFT;
+
+ // code_page is the page containing this function. Use the next page for
+ // stack/heap, then convert that page number back to an address to return.
+ uintptr_t stackheap_address = (code_page + 1) << PAGE_TABLE_BLOCK_SHIFT;
+ return stackheap_address;
}
void setup_mmu() {
- constexpr uint64_t PAGE_SHIFT = 30;
constexpr uint64_t PAGE_TABLE_ENTRY = 0x405; // Index = 1, AF=1.
// Map the stack/heap as normal memory, but mark it non-executable for both
// privileged and unprivileged execution. This prevents accidentally executing
@@ -61,8 +80,13 @@ void setup_mmu() {
constexpr uint64_t PAGE_TABLE_ENTRY_XN =
PAGE_TABLE_ENTRY | (1ULL << 54) | (1ULL << 53);
- uintptr_t start_page = reinterpret_cast<uintptr_t>(&setup_mmu) >> PAGE_SHIFT;
- uintptr_t stackheap_page = get_stackheap_start() >> PAGE_SHIFT;
+ uintptr_t start_page =
+ reinterpret_cast<uintptr_t>(&setup_mmu) >> PAGE_TABLE_BLOCK_SHIFT;
+
+ // get_stackheap_start() returns the base address of the combined stack/heap
+ // region, for example 0x80000000. The page table needs an index, for example
+ // 2, so convert the address to a page number.
+ uintptr_t stackheap_page = get_stackheap_start() >> PAGE_TABLE_BLOCK_SHIFT;
__asm__ volatile("tlbi vmalle1");
__arm_wsr64("TTBR0_EL1", reinterpret_cast<uint64_t>(page_table));
@@ -73,10 +97,11 @@ void setup_mmu() {
for (uint64_t page = 0; page < PAGE_TABLE_ENTRY_COUNT; ++page)
page_table[page] = 0;
- page_table[start_page] = PAGE_TABLE_ENTRY | (start_page << PAGE_SHIFT);
+ page_table[start_page] =
+ PAGE_TABLE_ENTRY | (start_page << PAGE_TABLE_BLOCK_SHIFT);
if (start_page != stackheap_page)
page_table[stackheap_page] =
- PAGE_TABLE_ENTRY_XN | (stackheap_page << PAGE_SHIFT);
+ PAGE_TABLE_ENTRY_XN | (stackheap_page << PAGE_TABLE_BLOCK_SHIFT);
__dsb(0xF);
diff --git a/libc/startup/baremetal/arm/start.cpp b/libc/startup/baremetal/arm/start.cpp
index 1caff934df036..42e883175c8f3 100644
--- a/libc/startup/baremetal/arm/start.cpp
+++ b/libc/startup/baremetal/arm/start.cpp
@@ -35,15 +35,14 @@ extern uintptr_t __bss_size[];
} // extern "C"
namespace {
-#if __ARM_ARCH_PROFILE == 'A' && !defined(__ARM_ARCH_ISA_A64) && \
- __ARM_ARCH >= 7
+#if __ARM_ARCH_PROFILE == 'A' && !defined(__ARM_ARCH_ISA_A64) && __ARM_ARCH >= 7
constexpr uint32_t PAGE_TABLE_ENTRY_COUNT = 4096;
constexpr uint32_t PAGE_TABLE_ALIGNMENT = 16384;
// Put the page table in a no-init section so it doesn't later get
// zero-initialized.
-[[gnu::section(".noinit.page_table"), gnu::aligned(PAGE_TABLE_ALIGNMENT), gnu::used]]
-volatile uint32_t page_table[PAGE_TABLE_ENTRY_COUNT];
+[[gnu::section(".noinit.page_table"), gnu::aligned(PAGE_TABLE_ALIGNMENT),
+ gnu::used]] volatile uint32_t page_table[PAGE_TABLE_ENTRY_COUNT];
void setup_mmu() {
constexpr uint32_t PAGE_SHIFT = 20;
@@ -188,8 +187,7 @@ namespace LIBC_NAMESPACE_DECL {
__arm_wsr("CPSR_c", 0x13); // SVC
#endif
-#if __ARM_ARCH_PROFILE == 'A' && !defined(__ARM_ARCH_ISA_A64) && \
- __ARM_ARCH >= 7
+#if __ARM_ARCH_PROFILE == 'A' && !defined(__ARM_ARCH_ISA_A64) && __ARM_ARCH >= 7
__arm_wsr("p15:0:c12:c0:0", reinterpret_cast<uint32_t>(&vector_table));
setup_mmu();
#endif
>From a65a09cc09f58c75054f4efce6d10f80d99e4071 Mon Sep 17 00:00:00 2001
From: Simi Pallipurath <simi.pallipurath at arm.com>
Date: Wed, 24 Jun 2026 21:39:22 +0100
Subject: [PATCH 3/3] fixup! fixup! [LIBC][ARM] Enable MMU setup and alignment
fault handling during startup.
Updated to use the _end linker symbol as the AArch64
baremetal heap mapping base, matching the freelist
heap convention.
---
libc/startup/baremetal/aarch64/start.cpp | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/libc/startup/baremetal/aarch64/start.cpp b/libc/startup/baremetal/aarch64/start.cpp
index e3cef1ec146cd..bf145842d7683 100644
--- a/libc/startup/baremetal/aarch64/start.cpp
+++ b/libc/startup/baremetal/aarch64/start.cpp
@@ -32,7 +32,7 @@ extern uintptr_t __data_start[];
extern uintptr_t __data_size[];
extern uintptr_t __bss_start[];
extern uintptr_t __bss_size[];
-[[gnu::weak]] extern uintptr_t __heap_start;
+[[gnu::weak]] extern uintptr_t _end;
} // extern "C"
namespace {
@@ -48,12 +48,12 @@ constexpr uint64_t PAGE_TABLE_BLOCK_SHIFT = 30; // 1 GiB block entries.
// Return the base address of the combined stack/heap mapping used by
// setup_mmu().
uintptr_t get_stackheap_start() {
- // __heap_start is weak. If no linker script defines it, its address resolves
- // to zero; otherwise the symbol's address is the requested heap base.
- if (reinterpret_cast<uintptr_t>(&__heap_start))
- return reinterpret_cast<uintptr_t>(&__heap_start);
+ // _end is the heap start used by the baremetal freelist heap. If no linker
+ // script defines it, its weak address resolves to zero.
+ if (reinterpret_cast<uintptr_t>(&_end))
+ return reinterpret_cast<uintptr_t>(&_end);
- // With no linker-provided heap base, choose the 1 GiB page after this startup
+ // With no linker-provided heap start, choose the 1 GiB page after this startup
// code as the fallback stack/heap page. The page table maps memory in 1 GiB
// blocks, so the shifts below convert between addresses and 1 GiB page
// numbers:
More information about the libc-commits
mailing list