[compiler-rt] 6ae457d - [msan] Mark allocator padding as uninitialized, with new origin tag (#157187)

via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 14 15:48:09 PDT 2025


Author: Thurston Dang
Date: 2025-10-14T15:48:04-07:00
New Revision: 6ae457d0b2f5efa7032406239bd0684016c30d7b

URL: https://github.com/llvm/llvm-project/commit/6ae457d0b2f5efa7032406239bd0684016c30d7b
DIFF: https://github.com/llvm/llvm-project/commit/6ae457d0b2f5efa7032406239bd0684016c30d7b.diff

LOG: [msan] Mark allocator padding as uninitialized, with new origin tag (#157187)

This is follow-up work per discussion in
https://github.com/llvm/llvm-project/pull/155944#discussion_r2311688571.

If the allocator reserves more space than the user requested (e.g.,
`malloc(7)` and `calloc(7,1)` actually have 16 bytes reserved), the
padding bytes will now be marked as uninitialized.

Padding poisoning is controlled by the existing flag `poison_in_malloc`
(which applies to all allocation functions, not only malloc).

Origin tag:
- For `calloc` or with track-origins > 1, the origin will be set as a
new tag, `ALLOC_PADDING`
- Otherwise, the existing `ALLOC` tag will be used.
- In the case of ambiguity caused by origin granularity, `ALLOC` will
take precedence.

Added: 
    compiler-rt/test/msan/allocator_padding.cpp

Modified: 
    compiler-rt/lib/msan/msan.h
    compiler-rt/lib/msan/msan_allocator.cpp
    compiler-rt/lib/msan/msan_report.cpp
    compiler-rt/test/msan/zero_alloc.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/msan/msan.h b/compiler-rt/lib/msan/msan.h
index 7fb58be67a02c..edb26997be07d 100644
--- a/compiler-rt/lib/msan/msan.h
+++ b/compiler-rt/lib/msan/msan.h
@@ -303,6 +303,7 @@ u32 ChainOrigin(u32 id, StackTrace *stack);
 const int STACK_TRACE_TAG_POISON = StackTrace::TAG_CUSTOM + 1;
 const int STACK_TRACE_TAG_FIELDS = STACK_TRACE_TAG_POISON + 1;
 const int STACK_TRACE_TAG_VPTR = STACK_TRACE_TAG_FIELDS + 1;
+const int STACK_TRACE_TAG_ALLOC_PADDING = STACK_TRACE_TAG_VPTR + 1;
 
 #define GET_MALLOC_STACK_TRACE                                             \
   UNINITIALIZED BufferedStackTrace stack;                                  \

diff  --git a/compiler-rt/lib/msan/msan_allocator.cpp b/compiler-rt/lib/msan/msan_allocator.cpp
index 64df863839c06..80608aa2bbca6 100644
--- a/compiler-rt/lib/msan/msan_allocator.cpp
+++ b/compiler-rt/lib/msan/msan_allocator.cpp
@@ -217,25 +217,52 @@ static void *MsanAllocate(BufferedStackTrace *stack, uptr size, uptr alignment,
   }
   auto *meta = reinterpret_cast<Metadata *>(allocator.GetMetaData(allocated));
   meta->requested_size = size;
+  uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(allocated);
+  void* padding_start = reinterpret_cast<char*>(allocated) + size;
+  uptr padding_size = actually_allocated_size - size;
+
+  // - With calloc(7,1), we can set the ideal tagging:
+  //     bytes 0-6:  initialized,   origin not set (and irrelevant)
+  //     byte  7:    uninitialized, origin TAG_ALLOC_PADDING
+  //     bytes 8-15: uninitialized, origin TAG_ALLOC_PADDING
+  // - If we have malloc(7) and __msan_get_track_origins() > 1, the 4-byte
+  //   origin granularity only allows the slightly suboptimal tagging:
+  //     bytes 0-6:  uninitialized, origin TAG_ALLOC
+  //     byte  7:    uninitialized, origin TAG_ALLOC (suboptimal)
+  //     bytes 8-15: uninitialized, origin TAG_ALLOC_PADDING
+  // - If we have malloc(7) and __msan_get_track_origins() == 1, we use a
+  //   single origin bean to reduce overhead:
+  //     bytes 0-6:  uninitialized, origin TAG_ALLOC
+  //     byte  7:    uninitialized, origin TAG_ALLOC (suboptimal)
+  //     bytes 8-15: uninitialized, origin TAG_ALLOC (suboptimal)
+  if (__msan_get_track_origins() && flags()->poison_in_malloc &&
+      (zero || (__msan_get_track_origins() > 1))) {
+    stack->tag = STACK_TRACE_TAG_ALLOC_PADDING;
+    Origin o2 = Origin::CreateHeapOrigin(stack);
+    __msan_set_origin(padding_start, padding_size, o2.raw_id());
+  }
+
   if (zero) {
     if (allocator.FromPrimary(allocated))
       __msan_clear_and_unpoison(allocated, size);
     else
       __msan_unpoison(allocated, size);  // Mem is already zeroed.
+
+    if (flags()->poison_in_malloc)
+      __msan_poison(padding_start, padding_size);
   } else if (flags()->poison_in_malloc) {
-    __msan_poison(allocated, size);
+    __msan_poison(allocated, actually_allocated_size);
+
     if (__msan_get_track_origins()) {
       stack->tag = StackTrace::TAG_ALLOC;
       Origin o = Origin::CreateHeapOrigin(stack);
-      __msan_set_origin(allocated, size, o.raw_id());
+      __msan_set_origin(
+          allocated,
+          __msan_get_track_origins() == 1 ? actually_allocated_size : size,
+          o.raw_id());
     }
   }
 
-  uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(allocated);
-  // For compatibility, the allocator converted 0-sized allocations into 1 byte
-  if (size == 0 && actually_allocated_size > 0 && flags()->poison_in_malloc)
-    __msan_poison(allocated, 1);
-
   UnpoisonParam(2);
   RunMallocHooks(allocated, size);
   return allocated;
@@ -255,9 +282,10 @@ void __msan::MsanDeallocate(BufferedStackTrace *stack, void *p) {
   if (flags()->poison_in_free && allocator.FromPrimary(p)) {
     __msan_poison(p, size);
     if (__msan_get_track_origins()) {
+      uptr actually_allocated_size = allocator.GetActuallyAllocatedSize(p);
       stack->tag = StackTrace::TAG_DEALLOC;
       Origin o = Origin::CreateHeapOrigin(stack);
-      __msan_set_origin(p, size, o.raw_id());
+      __msan_set_origin(p, actually_allocated_size, o.raw_id());
     }
   }
   if (MsanThread *t = GetCurrentThread()) {

diff  --git a/compiler-rt/lib/msan/msan_report.cpp b/compiler-rt/lib/msan/msan_report.cpp
index 99bf81f66dc9e..cd0bf67234d79 100644
--- a/compiler-rt/lib/msan/msan_report.cpp
+++ b/compiler-rt/lib/msan/msan_report.cpp
@@ -90,6 +90,10 @@ static void DescribeOrigin(u32 id) {
         Printf("  %sVirtual table ptr was destroyed%s\n", d.Origin(),
                d.Default());
         break;
+      case STACK_TRACE_TAG_ALLOC_PADDING:
+        Printf("  %sUninitialized value is outside of heap allocation%s\n",
+               d.Origin(), d.Default());
+        break;
       default:
         Printf("  %sUninitialized value was created%s\n", d.Origin(),
                d.Default());

diff  --git a/compiler-rt/test/msan/allocator_padding.cpp b/compiler-rt/test/msan/allocator_padding.cpp
new file mode 100644
index 0000000000000..72acf31e61752
--- /dev/null
+++ b/compiler-rt/test/msan/allocator_padding.cpp
@@ -0,0 +1,94 @@
+// *** malloc: all bytes are uninitialized
+// * malloc byte 0
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 0 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 0 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+//
+// * malloc byte 6
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 6 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 6 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+//
+// This test assumes the allocator allocates 16 bytes for malloc(7). Bytes
+// 7-15 are padding.
+//
+// * malloc byte 7
+// Edge case: when the origin granularity spans both ALLOC and ALLOC_PADDING,
+//            ALLOC always takes precedence.
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 7 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 7 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+//
+// Bytes 8-15 are padding
+// For track-origins=1, ALLOC is used instead of ALLOC_PADDING.
+//
+// * malloc byte 8
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 8 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 8 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+//
+// * malloc byte 15
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 15 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 15 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+
+// *** calloc
+// Bytes 0-6 are fully initialized, so no MSan report should happen.
+//
+// * calloc byte 0
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && %run %t 0 2>&1
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && %run %t 0 2>&1
+//
+// * calloc byte 6
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && %run %t 6 2>&1
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && %run %t 6 2>&1
+//
+// * calloc byte 7
+// Byte 7 is uninitialized. Unlike malloc, this is tagged as ALLOC_PADDING
+// (since the origin does not need to track bytes 4-6).
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && not %run %t 7 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 7 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+//
+// * calloc byte 8
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && not %run %t 8 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 8 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+//
+// * calloc byte 15
+// RUN: %clang_msan -fsanitize-memory-track-origins=1 -DUSE_CALLOC %s -o %t && not %run %t 15 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+// RUN: %clang_msan -fsanitize-memory-track-origins=2 -DUSE_CALLOC %s -o %t && not %run %t 15 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGIN-ALLOC-PADDING
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char **argv) {
+#ifdef USE_CALLOC
+  char *p = (char *)calloc(7, 1);
+#else
+  char *p = (char *)malloc(7);
+#endif
+
+  if (argc == 2) {
+    int index = atoi(argv[1]);
+
+    printf("p[%d] = %d\n", index, p[index]);
+    // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
+    // CHECK: {{#0 0x.* in main .*allocator_padding.cpp:}}[[@LINE-2]]
+    // ORIGIN-ALLOC: Uninitialized value was created by a heap allocation
+    // ORIGIN-ALLOC-PADDING: Uninitialized value is outside of heap allocation
+    free(p);
+  }
+
+  return 0;
+}

diff  --git a/compiler-rt/test/msan/zero_alloc.cpp b/compiler-rt/test/msan/zero_alloc.cpp
index 1451e1e89e9fb..f4cf1d89c5b76 100644
--- a/compiler-rt/test/msan/zero_alloc.cpp
+++ b/compiler-rt/test/msan/zero_alloc.cpp
@@ -1,4 +1,9 @@
-// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory %s -o %t && not %run %t 2>&1 \
+// RUN:     | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory -fsanitize-memory-track-origins=1 %s -o %t && not %run %t 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,DISCOUNT
+// RUN: %clang_msan -Wno-alloc-size -fsanitize-recover=memory -fsanitize-memory-track-origins=2 %s -o %t && not %run %t 2>&1 \
+// RUN:     | FileCheck %s --check-prefixes=CHECK,ORIGINS
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -10,6 +15,7 @@ int main(int argc, char **argv) {
     printf("Content of p1 is: %d\n", *p1);
     // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
     // CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]]
+    // DISCOUNT,ORIGINS: Uninitialized value is outside of heap allocation
     free(p1);
   }
 
@@ -19,6 +25,7 @@ int main(int argc, char **argv) {
     printf("Content of p2 is: %d\n", *p2);
     // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
     // CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]]
+    // DISCOUNT,ORIGINS: Uninitialized value is outside of heap allocation
     free(p2);
   }
 
@@ -28,6 +35,8 @@ int main(int argc, char **argv) {
     printf("Content of p2 is: %d\n", *p3);
     // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
     // CHECK: {{#0 0x.* in main .*zero_alloc.cpp:}}[[@LINE-2]]
+    // DISCOUNT: Uninitialized value was created by a heap allocation
+    // ORIGINS: Uninitialized value is outside of heap allocation
     free(p3);
   }
 


        


More information about the llvm-commits mailing list