[compiler-rt] [dfsan] Re-exec with no ASLR if memory layout is incompatible on Linux (PR #85674)

Thurston Dang via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 18 16:31:50 PDT 2024


https://github.com/thurstond updated https://github.com/llvm/llvm-project/pull/85674

>From 271054a1feeaba3729dce1ad084d1a6e94d9d20d Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Mon, 18 Mar 2024 17:41:12 +0000
Subject: [PATCH 1/2] [dfsan] Re-exec with no ASLR if memory layout is
 incompatible on Linux

DFSan's shadow mappings are incompatible with 32 bits of ASLR entropy ('sudo sysctl vm.mmap_rnd_bits=32; ninja check-dfsan') and it is difficult to fix this via increasing the size of the shadow mappings, due to the overhead of shadow memory. This patch works around the issue by detecting if the memory layout is incompatible, and if so, re-exec'ing without ASLR.

DFSan and MSan share copy-pasted shadow memory code, hence this workaround is ported from MSan:
- "[msan] Re-exec with no ASLR if memory layout is incompatible on Linux" (https://github.com/llvm/llvm-project/commit/58f7251820b14c93168726a24816d8a094599be5)
- "[msan] Add 'MappingDesc::ALLOCATOR' type and check it is available" (https://github.com/llvm/llvm-project/commit/af2bf86a372cacf5f536bae06e2f2d3886eefb7b)
(which in turn are inspired by TSan: "Re-exec TSan with no ASLR if memory layout is incompatible on Linux" (https://github.com/llvm/llvm-project/commit/0784b1eefa36d4acbb0dacd2d18796e26313b6c5 ))

aeubanks had remarked in https://github.com/llvm/llvm-project/pull/85142#issuecomment-2004442883 that this issue occurs in Chromium: https://ci.chromium.org/ui/p/chromium/builders/try/linux_upload_clang/5066/overview
---
 compiler-rt/lib/dfsan/dfsan.cpp           | 66 ++++++++++++++++++-----
 compiler-rt/lib/dfsan/dfsan_allocator.cpp |  3 ++
 compiler-rt/lib/dfsan/dfsan_platform.h    | 24 ++++++---
 3 files changed, 74 insertions(+), 19 deletions(-)

diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp
index 5e85c8fda3e230..7d435fb140ea97 100644
--- a/compiler-rt/lib/dfsan/dfsan.cpp
+++ b/compiler-rt/lib/dfsan/dfsan.cpp
@@ -33,6 +33,9 @@
 #include "sanitizer_common/sanitizer_libc.h"
 #include "sanitizer_common/sanitizer_report_decorator.h"
 #include "sanitizer_common/sanitizer_stacktrace.h"
+#if SANITIZER_LINUX
+#  include <sys/personality.h>
+#endif
 
 using namespace __dfsan;
 
@@ -1127,11 +1130,12 @@ static void CheckMemoryLayoutSanity() {
 
 // TODO: CheckMemoryRangeAvailability is based on msan.
 // Consider refactoring these into a shared implementation.
-static bool CheckMemoryRangeAvailability(uptr beg, uptr size) {
+static bool CheckMemoryRangeAvailability(uptr beg, uptr size, bool verbose) {
   if (size > 0) {
     uptr end = beg + size - 1;
     if (!MemoryRangeIsAvailable(beg, end)) {
-      Printf("FATAL: Memory range %p - %p is not available.\n", beg, end);
+      if (verbose)
+        Printf("FATAL: Memory range %p - %p is not available.\n", beg, end);
       return false;
     }
   }
@@ -1163,7 +1167,7 @@ static bool ProtectMemoryRange(uptr beg, uptr size, const char *name) {
 
 // TODO: InitShadow is based on msan.
 // Consider refactoring these into a shared implementation.
-bool InitShadow(bool init_origins) {
+bool InitShadow(bool init_origins, bool dry_run) {
   // Let user know mapping parameters first.
   VPrintf(1, "dfsan_init %p\n", (void *)&__dfsan::dfsan_init);
   for (unsigned i = 0; i < kMemoryLayoutSize; ++i)
@@ -1173,8 +1177,9 @@ bool InitShadow(bool init_origins) {
   CheckMemoryLayoutSanity();
 
   if (!MEM_IS_APP(&__dfsan::dfsan_init)) {
-    Printf("FATAL: Code %p is out of application range. Non-PIE build?\n",
-           (uptr)&__dfsan::dfsan_init);
+    if (!dry_run)
+      Printf("FATAL: Code %p is out of application range. Non-PIE build?\n",
+             (uptr)&__dfsan::dfsan_init);
     return false;
   }
 
@@ -1195,20 +1200,26 @@ bool InitShadow(bool init_origins) {
     bool protect = type == MappingDesc::INVALID ||
                    (!init_origins && type == MappingDesc::ORIGIN);
     CHECK(!(map && protect));
-    if (!map && !protect)
-      CHECK(type == MappingDesc::APP);
+    if (!map && !protect) {
+      CHECK(type == MappingDesc::APP || type == MappingDesc::ALLOCATOR);
+
+      if (dry_run && type == MappingDesc::ALLOCATOR &&
+          !CheckMemoryRangeAvailability(start, size, !dry_run))
+        return false;
+    }
     if (map) {
-      if (!CheckMemoryRangeAvailability(start, size))
+      if (dry_run && !CheckMemoryRangeAvailability(start, size, !dry_run))
         return false;
-      if (!MmapFixedSuperNoReserve(start, size, kMemoryLayout[i].name))
+      if (!dry_run &&
+          !MmapFixedSuperNoReserve(start, size, kMemoryLayout[i].name))
         return false;
-      if (common_flags()->use_madv_dontdump)
+      if (!dry_run && common_flags()->use_madv_dontdump)
         DontDumpShadowMemory(start, size);
     }
     if (protect) {
-      if (!CheckMemoryRangeAvailability(start, size))
+      if (dry_run && !CheckMemoryRangeAvailability(start, size, !dry_run))
         return false;
-      if (!ProtectMemoryRange(start, size, kMemoryLayout[i].name))
+      if (!dry_run && !ProtectMemoryRange(start, size, kMemoryLayout[i].name))
         return false;
     }
   }
@@ -1216,6 +1227,35 @@ bool InitShadow(bool init_origins) {
   return true;
 }
 
+bool InitShadowWithReExec(bool init_origins) {
+  // Start with dry run: check layout is ok, but don't print warnings because
+  // warning messages will cause tests to fail (even if we successfully re-exec
+  // after the warning).
+  bool success = InitShadow(init_origins, true);
+  if (!success) {
+#if SANITIZER_LINUX
+    // Perhaps ASLR entropy is too high. If ASLR is enabled, re-exec without it.
+    int old_personality = personality(0xffffffff);
+    bool aslr_on =
+        (old_personality != -1) && ((old_personality & ADDR_NO_RANDOMIZE) == 0);
+
+    if (aslr_on) {
+      VReport(1,
+              "WARNING: DataflowSanitizer: memory layout is incompatible, "
+              "possibly due to high-entropy ASLR.\n"
+              "Re-execing with fixed virtual address space.\n"
+              "N.B. reducing ASLR entropy is preferable.\n");
+      CHECK_NE(personality(old_personality | ADDR_NO_RANDOMIZE), -1);
+      ReExec();
+    }
+#endif
+  }
+
+  // The earlier dry run didn't actually map or protect anything. Run again in
+  // non-dry run mode.
+  return success && InitShadow(init_origins, false);
+}
+
 static void DFsanInit(int argc, char **argv, char **envp) {
   CHECK(!dfsan_init_is_running);
   if (dfsan_inited)
@@ -1229,7 +1269,7 @@ static void DFsanInit(int argc, char **argv, char **envp) {
 
   CheckASLR();
 
-  InitShadow(dfsan_get_track_origins());
+  InitShadowWithReExec(dfsan_get_track_origins());
 
   initialize_interceptors();
 
diff --git a/compiler-rt/lib/dfsan/dfsan_allocator.cpp b/compiler-rt/lib/dfsan/dfsan_allocator.cpp
index df8be2cf5ae04c..63475f434cd100 100644
--- a/compiler-rt/lib/dfsan/dfsan_allocator.cpp
+++ b/compiler-rt/lib/dfsan/dfsan_allocator.cpp
@@ -37,6 +37,9 @@ struct DFsanMapUnmapCallback {
   void OnUnmap(uptr p, uptr size) const { dfsan_set_label(0, (void *)p, size); }
 };
 
+// Note: to ensure that the allocator is compatible with the application memory
+// layout (especially with high-entropy ASLR), kSpaceBeg and kSpaceSize must be
+// duplicated as MappingDesc::ALLOCATOR in dfsan_platform.h.
 #if defined(__aarch64__)
 const uptr kAllocatorSpace = 0xE00000000000ULL;
 #else
diff --git a/compiler-rt/lib/dfsan/dfsan_platform.h b/compiler-rt/lib/dfsan/dfsan_platform.h
index b849b4b528ad29..01f0de47d960d3 100644
--- a/compiler-rt/lib/dfsan/dfsan_platform.h
+++ b/compiler-rt/lib/dfsan/dfsan_platform.h
@@ -27,10 +27,19 @@ using __sanitizer::uptr;
 struct MappingDesc {
   uptr start;
   uptr end;
-  enum Type { INVALID, APP, SHADOW, ORIGIN } type;
+  enum Type {
+    INVALID = 1,
+    ALLOCATOR = 2,
+    APP = 4,
+    SHADOW = 8,
+    ORIGIN = 16,
+  } type;
   const char *name;
 };
 
+// Note: MappingDesc::ALLOCATOR entries are only used to check for memory
+// layout compatibility. The actual allocation settings are in
+// dfsan_allocator.cpp, which need to be kept in sync.
 #if SANITIZER_LINUX && SANITIZER_WORDSIZE == 64
 
 #  if defined(__aarch64__)
@@ -53,7 +62,8 @@ const MappingDesc kMemoryLayout[] = {
     {0X0B00000000000, 0X0C00000000000, MappingDesc::SHADOW, "shadow-10-13"},
     {0X0C00000000000, 0X0D00000000000, MappingDesc::INVALID, "invalid"},
     {0X0D00000000000, 0X0E00000000000, MappingDesc::ORIGIN, "origin-10-13"},
-    {0X0E00000000000, 0X1000000000000, MappingDesc::APP, "app-15"},
+    {0X0E00000000000, 0X0E40000000000, MappingDesc::ALLOCATOR, "allocator"},
+    {0X0E40000000000, 0X1000000000000, MappingDesc::APP, "app-15"},
 };
 #    define MEM_TO_SHADOW(mem) ((uptr)mem ^ 0xB00000000000ULL)
 #    define SHADOW_TO_ORIGIN(shadow) (((uptr)(shadow)) + 0x200000000000ULL)
@@ -76,7 +86,8 @@ const MappingDesc kMemoryLayout[] = {
     {0x510000000000ULL, 0x600000000000ULL, MappingDesc::APP, "app-2"},
     {0x600000000000ULL, 0x610000000000ULL, MappingDesc::ORIGIN, "origin-1"},
     {0x610000000000ULL, 0x700000000000ULL, MappingDesc::INVALID, "invalid"},
-    {0x700000000000ULL, 0x800000000000ULL, MappingDesc::APP, "app-3"}};
+    {0x700000000000ULL, 0x740000000000ULL, MappingDesc::ALLOCATOR, "allocator"},
+    {0x740000000000ULL, 0x800000000000ULL, MappingDesc::APP, "app-3"}};
 #    define MEM_TO_SHADOW(mem) (((uptr)(mem)) ^ 0x500000000000ULL)
 #    define SHADOW_TO_ORIGIN(mem) (((uptr)(mem)) + 0x100000000000ULL)
 #  endif
@@ -93,20 +104,21 @@ const uptr kMemoryLayoutSize = sizeof(kMemoryLayout) / sizeof(kMemoryLayout[0]);
 __attribute__((optimize("unroll-loops")))
 #endif
 inline bool
-addr_is_type(uptr addr, MappingDesc::Type mapping_type) {
+addr_is_type(uptr addr, int mapping_types) {
 // It is critical for performance that this loop is unrolled (because then it is
 // simplified into just a few constant comparisons).
 #ifdef __clang__
 #  pragma unroll
 #endif
   for (unsigned i = 0; i < kMemoryLayoutSize; ++i)
-    if (kMemoryLayout[i].type == mapping_type &&
+    if ((kMemoryLayout[i].type & mapping_types) &&
         addr >= kMemoryLayout[i].start && addr < kMemoryLayout[i].end)
       return true;
   return false;
 }
 
-#define MEM_IS_APP(mem) addr_is_type((uptr)(mem), MappingDesc::APP)
+#define MEM_IS_APP(mem) \
+  (addr_is_type((uptr)(mem), MappingDesc::APP | MappingDesc::ALLOCATOR))
 #define MEM_IS_SHADOW(mem) addr_is_type((uptr)(mem), MappingDesc::SHADOW)
 #define MEM_IS_ORIGIN(mem) addr_is_type((uptr)(mem), MappingDesc::ORIGIN)
 

>From f1ecb0c2ea347cc79b9f4b869f65c57a9a0b2df3 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Mon, 18 Mar 2024 23:31:01 +0000
Subject: [PATCH 2/2] Abort if InitShadowWithReExec fails, instead of silently
 failing ( likely leading to a segfault down the line)

---
 compiler-rt/lib/dfsan/dfsan.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp
index 7d435fb140ea97..302e3c3032ac59 100644
--- a/compiler-rt/lib/dfsan/dfsan.cpp
+++ b/compiler-rt/lib/dfsan/dfsan.cpp
@@ -1269,7 +1269,11 @@ static void DFsanInit(int argc, char **argv, char **envp) {
 
   CheckASLR();
 
-  InitShadowWithReExec(dfsan_get_track_origins());
+  if (!InitShadowWithReExec(dfsan_get_track_origins())) {
+    Printf("FATAL: DataflowSanitizer can not mmap the shadow memory.\n");
+    DumpProcessMap();
+    Die();
+  }
 
   initialize_interceptors();
 



More information about the llvm-commits mailing list