[compiler-rt] [msan] Mark allocator padding as uninitialized, with new origin tag (PR #157187)
Thurston Dang via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 5 15:03:20 PDT 2025
https://github.com/thurstond updated https://github.com/llvm/llvm-project/pull/157187
>From ed4784006c9089f9451f72c877e375b2278050b4 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Fri, 5 Sep 2025 21:33:01 +0000
Subject: [PATCH 1/2] [msan] Mark allocator padding as uninitialized, with new
origin tag
This is follow-up work per discussion in https://github.com/llvm/llvm-project/pull/155944#discussion_r2311688571.
If the allocator reserves more space than the user requested (e.g.,
malloc(7) actually has 16 bytes reserved), the padding bytes will now be
marked as uninitialized; the origin will be set as a new tag,
ALLOC_PADDING (in the case of ambiguity caused by origin
granularity, ALLOC will take precedence).
Padding poisoning is controlled by the existing flag poison_in_malloc and a new flag,
poison_in_calloc.
---
compiler-rt/lib/msan/msan.h | 1 +
compiler-rt/lib/msan/msan_allocator.cpp | 31 ++++++++---
compiler-rt/lib/msan/msan_flags.inc | 1 +
compiler-rt/lib/msan/msan_report.cpp | 5 ++
compiler-rt/test/msan/allocator_padding.cpp | 59 +++++++++++++++++++++
compiler-rt/test/msan/zero_alloc.cpp | 8 ++-
6 files changed, 97 insertions(+), 8 deletions(-)
create mode 100644 compiler-rt/test/msan/allocator_padding.cpp
diff --git a/compiler-rt/lib/msan/msan.h b/compiler-rt/lib/msan/msan.h
index 7fb58be67a02c..edb26997be07d 100644
--- a/compiler-rt/lib/msan/msan.h
+++ b/compiler-rt/lib/msan/msan.h
@@ -303,6 +303,7 @@ u32 ChainOrigin(u32 id, StackTrace *stack);
const int STACK_TRACE_TAG_POISON = StackTrace::TAG_CUSTOM + 1;
const int STACK_TRACE_TAG_FIELDS = STACK_TRACE_TAG_POISON + 1;
const int STACK_TRACE_TAG_VPTR = STACK_TRACE_TAG_FIELDS + 1;
+const int STACK_TRACE_TAG_ALLOC_PADDING = STACK_TRACE_TAG_VPTR + 1;
#define GET_MALLOC_STACK_TRACE \
UNINITIALIZED BufferedStackTrace stack; \
diff --git a/compiler-rt/lib/msan/msan_allocator.cpp b/compiler-rt/lib/msan/msan_allocator.cpp
index 64df863839c06..1ee685547de54 100644
--- a/compiler-rt/lib/msan/msan_allocator.cpp
+++ b/compiler-rt/lib/msan/msan_allocator.cpp
@@ -217,13 +217,35 @@ static void *MsanAllocate(BufferedStackTrace *stack, uptr size, uptr alignment,
}
auto *meta = reinterpret_cast<Metadata *>(allocator.GetMetaData(allocated));
meta->requested_size = size;
+ uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(allocated);
+ void* padding_start =
+ reinterpret_cast<void*>(reinterpret_cast<uptr>(allocated) + size);
+ uptr padding_size = actually_allocated_size - size;
+
+ // Origins have 4-byte granularity. Set the TAG_ALLOC_PADDING origin first,
+ // so the TAG_ALLOC origin will take precedence if necessary e.g.,
+ // - if we have malloc(7) that actually takes up 16 bytes, bytes 0-7 will
+ // have TAG_ALLOC and bytes 8-15 will have TAG_ALLOC_PADDING.
+ // - with calloc, bytes 4-15 will have TAG_ALLOC_PADDING (but bytes 4-6 are
+ // initialized, hence the origin is unused). Note that this means, unlike
+ // with malloc, byte 7 is TAG_ALLOC_PADDING instead of TAG_ALLOC.
+ if (__msan_get_track_origins() && ((zero && flags()->poison_in_calloc) ||
+ (!zero && flags()->poison_in_malloc))) {
+ stack->tag = STACK_TRACE_TAG_ALLOC_PADDING;
+ Origin o2 = Origin::CreateHeapOrigin(stack);
+ __msan_set_origin(padding_start, padding_size, o2.raw_id());
+ }
+
if (zero) {
if (allocator.FromPrimary(allocated))
__msan_clear_and_unpoison(allocated, size);
else
__msan_unpoison(allocated, size); // Mem is already zeroed.
+
+ if (flags()->poison_in_calloc)
+ __msan_poison(padding_start, padding_size);
} else if (flags()->poison_in_malloc) {
- __msan_poison(allocated, size);
+ __msan_poison(allocated, actually_allocated_size);
if (__msan_get_track_origins()) {
stack->tag = StackTrace::TAG_ALLOC;
Origin o = Origin::CreateHeapOrigin(stack);
@@ -231,11 +253,6 @@ static void *MsanAllocate(BufferedStackTrace *stack, uptr size, uptr alignment,
}
}
- uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(allocated);
- // For compatibility, the allocator converted 0-sized allocations into 1 byte
- if (size == 0 && actually_allocated_size > 0 && flags()->poison_in_malloc)
- __msan_poison(allocated, 1);
-
UnpoisonParam(2);
RunMallocHooks(allocated, size);
return allocated;
@@ -247,7 +264,7 @@ void __msan::MsanDeallocate(BufferedStackTrace *stack, void *p) {
RunFreeHooks(p);
Metadata *meta = reinterpret_cast<Metadata *>(allocator.GetMetaData(p));
- uptr size = meta->requested_size;
+ uptr size = allocator.GetActuallyAllocatedSize(p);
meta->requested_size = 0;
// This memory will not be reused by anyone else, so we are free to keep it
// poisoned. The secondary allocator will unmap and unpoison by
diff --git a/compiler-rt/lib/msan/msan_flags.inc b/compiler-rt/lib/msan/msan_flags.inc
index 16db26bd42ed9..2d600e79e2337 100644
--- a/compiler-rt/lib/msan/msan_flags.inc
+++ b/compiler-rt/lib/msan/msan_flags.inc
@@ -22,6 +22,7 @@ MSAN_FLAG(int, origin_history_size, Origin::kMaxDepth, "")
MSAN_FLAG(int, origin_history_per_stack_limit, 20000, "")
MSAN_FLAG(bool, poison_heap_with_zeroes, false, "")
MSAN_FLAG(bool, poison_stack_with_zeroes, false, "")
+MSAN_FLAG(bool, poison_in_calloc, true, "")
MSAN_FLAG(bool, poison_in_malloc, true, "")
MSAN_FLAG(bool, poison_in_free, true, "")
MSAN_FLAG(bool, poison_in_dtor, true, "")
diff --git a/compiler-rt/lib/msan/msan_report.cpp b/compiler-rt/lib/msan/msan_report.cpp
index 99bf81f66dc9e..f19e20a11efc5 100644
--- a/compiler-rt/lib/msan/msan_report.cpp
+++ b/compiler-rt/lib/msan/msan_report.cpp
@@ -90,6 +90,11 @@ static void DescribeOrigin(u32 id) {
Printf(" %sVirtual table ptr was destroyed%s\n", d.Origin(),
d.Default());
break;
+ case STACK_TRACE_TAG_ALLOC_PADDING:
+ Printf(
+ " %sUninitialized value was created by heap allocator padding%s\n",
+ d.Origin(), d.Default());
+ break;
default:
Printf(" %sUninitialized value was created%s\n", d.Origin(),
d.Default());
diff --git a/compiler-rt/test/msan/allocator_padding.cpp b/compiler-rt/test/msan/allocator_padding.cpp
new file mode 100644
index 0000000000000..a5db28a463297
--- /dev/null
+++ b/compiler-rt/test/msan/allocator_padding.cpp
@@ -0,0 +1,59 @@
+// malloc: all bytes are uninitialized
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 0 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 6 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+//
+// This test assumes the allocator allocates 16 bytes for malloc(7). Bytes
+// 7-15 are padding.
+// Edge case: when the origin granularity spans both ALLOC and ALLOC_PADDING,
+// ALLOC takes precedence
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 7 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+//
+// Bytes 8-15 are tagged as ALLOC_PADDING.
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 8 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 15 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+
+// calloc
+// Bytes 0-6 are fully initialized, so no MSan report should happen.
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && %run %t 0 2>&1
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && %run %t 6 2>&1
+//
+// Byte 7 is uninitialized. Unlike malloc, this is tagged as ALLOC_PADDING
+// (since the origin does not need to track bytes 4-6).
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 7 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+//
+// As with malloc, Bytes 8-15 are tagged as ALLOC_PADDING.
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 8 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 15 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char **argv) {
+#ifdef USE_CALLOC
+ char *p = (char *)calloc(7, 1);
+#else
+ char *p = (char *)malloc(7);
+#endif
+
+ if (argc == 2) {
+ int index = atoi(argv[1]);
+
+ printf("p[%d] = %d\n", index, p[index]);
+ // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
+ // CHECK: {{#0 0x.* in main .*allocator_padding.cpp:}}[[@LINE-2]]
+ // ORIGIN-ALLOC: Uninitialized value was created by a heap allocation
+ // ORIGIN-ALLOC-PADDING: Uninitialized value was created by heap allocator padding
+ free(p);
+ }
+
+ return 0;
+}
diff --git a/compiler-rt/test/msan/zero_alloc.cpp b/compiler-rt/test/msan/zero_alloc.cpp
index 1451e1e89e9fb..f5d8d11c71697 100644
--- a/compiler-rt/test/msan/zero_alloc.cpp
+++ b/compiler-rt/test/msan/zero_alloc.cpp
@@ -1,4 +1,7 @@
-// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory %s -o %t && not %run %t 2>&1 \
+// RUN: | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 2>&1 \
+// RUN: | FileCheck %s --check-prefixes=CHECK,ORIGINS
#include <stdio.h>
#include <stdlib.h>
@@ -10,6 +13,7 @@ int main(int argc, char **argv) {
printf("Content of p1 is: %d\n", *p1);
// CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
// CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]]
+ // ORIGINS: Uninitialized value was created by heap allocator padding
free(p1);
}
@@ -19,6 +23,7 @@ int main(int argc, char **argv) {
printf("Content of p2 is: %d\n", *p2);
// CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
// CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]]
+ // ORIGINS: Uninitialized value was created by heap allocator padding
free(p2);
}
@@ -28,6 +33,7 @@ int main(int argc, char **argv) {
printf("Content of p2 is: %d\n", *p3);
// CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
// CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]]
+ // ORIGINS: Uninitialized value was created by heap allocator padding
free(p3);
}
>From 1b1e1b828e49c635f13c08b80076bea05859efc5 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Fri, 5 Sep 2025 22:02:55 +0000
Subject: [PATCH 2/2] Update comment
---
compiler-rt/lib/msan/msan_allocator.cpp | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/compiler-rt/lib/msan/msan_allocator.cpp b/compiler-rt/lib/msan/msan_allocator.cpp
index 1ee685547de54..a657df2bdecf5 100644
--- a/compiler-rt/lib/msan/msan_allocator.cpp
+++ b/compiler-rt/lib/msan/msan_allocator.cpp
@@ -224,11 +224,13 @@ static void *MsanAllocate(BufferedStackTrace *stack, uptr size, uptr alignment,
// Origins have 4-byte granularity. Set the TAG_ALLOC_PADDING origin first,
// so the TAG_ALLOC origin will take precedence if necessary e.g.,
- // - if we have malloc(7) that actually takes up 16 bytes, bytes 0-7 will
- // have TAG_ALLOC and bytes 8-15 will have TAG_ALLOC_PADDING.
- // - with calloc, bytes 4-15 will have TAG_ALLOC_PADDING (but bytes 4-6 are
- // initialized, hence the origin is unused). Note that this means, unlike
- // with malloc, byte 7 is TAG_ALLOC_PADDING instead of TAG_ALLOC.
+ // - if we have malloc(7) that actually takes up 16 bytes:
+ // bytes 0-7: uninitialized, origin TAG_ALLOC
+ // bytes 8-15: uninitialized, origin TAG_ALLOC_PADDING
+ // - with calloc(7,1):
+ // bytes 0-6: initialized, origin not set (and irrelevant)
+ // byte 7: uninitialized, origin TAG_ALLOC_PADDING (unlike malloc)
+ // bytes 8-15: uninitialized, origin TAG_ALLOC_PADDING
if (__msan_get_track_origins() && ((zero && flags()->poison_in_calloc) ||
(!zero && flags()->poison_in_malloc))) {
stack->tag = STACK_TRACE_TAG_ALLOC_PADDING;
More information about the llvm-commits
mailing list