[compiler-rt] r176818 - [msan] intercept dlopen and clear shadow for it

Reid Kleckner reid at kleckner.net
Mon Mar 11 11:07:42 PDT 2013


Author: rnk
Date: Mon Mar 11 13:07:42 2013
New Revision: 176818

URL: http://llvm.org/viewvc/llvm-project?rev=176818&view=rev
Log:
[msan] intercept dlopen and clear shadow for it

Summary:
The loader does not call mmap() through the PLT because it has to
bootstrap the process before libc is present.  Hooking dlopen() isn't
enough either because the loader runs module initializers before
returning, and they could run arbitrary msan instrumented code.

If msandr is present, then we can intercept the mmaps from dlopen at the
syscall layer and clear the shadow there.  If msandr is missing, we
clear the shadow after dlopen() and hope any initializers are trivial.

Reviewers: eugenis

CC: kcc, llvm-commits

Differential Revision: http://llvm-reviews.chandlerc.com/D509

Added:
    compiler-rt/trunk/lib/msan/tests/msan_loadable.cc   (with props)
Modified:
    compiler-rt/trunk/lib/msan/msan.cc
    compiler-rt/trunk/lib/msan/msan.h
    compiler-rt/trunk/lib/msan/msan_interceptors.cc
    compiler-rt/trunk/lib/msan/msan_interface_internal.h
    compiler-rt/trunk/lib/msan/msan_linux.cc
    compiler-rt/trunk/lib/msan/tests/CMakeLists.txt
    compiler-rt/trunk/lib/msan/tests/msan_test.cc
    compiler-rt/trunk/lib/msandr/msandr.cc

Modified: compiler-rt/trunk/lib/msan/msan.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/msan/msan.cc?rev=176818&r1=176817&r2=176818&view=diff
==============================================================================
--- compiler-rt/trunk/lib/msan/msan.cc (original)
+++ compiler-rt/trunk/lib/msan/msan.cc Mon Mar 11 13:07:42 2013
@@ -59,6 +59,7 @@ static THREADLOCAL struct {
 } __msan_stack_bounds;
 
 static THREADLOCAL bool is_in_symbolizer;
+static THREADLOCAL bool is_in_loader;
 
 extern "C" const int __msan_track_origins;
 int __msan_get_track_origins() {
@@ -87,6 +88,14 @@ void EnterSymbolizer() { is_in_symbolize
 void ExitSymbolizer()  { is_in_symbolizer = false; }
 bool IsInSymbolizer() { return is_in_symbolizer; }
 
+void EnterLoader() { is_in_loader = true; }
+void ExitLoader()  { is_in_loader = false; }
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+bool __msan_is_in_loader() { return is_in_loader; }
+}
+
 static Flags msan_flags;
 
 Flags *flags() {

Modified: compiler-rt/trunk/lib/msan/msan.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/msan/msan.h?rev=176818&r1=176817&r2=176818&view=diff
==============================================================================
--- compiler-rt/trunk/lib/msan/msan.h (original)
+++ compiler-rt/trunk/lib/msan/msan.h Mon Mar 11 13:07:42 2013
@@ -26,6 +26,8 @@
 #define MEM_IS_SHADOW(mem) ((uptr)mem >=         0x200000000000ULL && \
                             (uptr)mem <=         0x400000000000ULL)
 
+struct link_map;  // Opaque type returned by dlopen().
+
 const int kMsanParamTlsSizeInWords = 100;
 const int kMsanRetvalTlsSizeInWords = 100;
 
@@ -55,6 +57,9 @@ struct SymbolizerScope {
   ~SymbolizerScope() { ExitSymbolizer(); }
 };
 
+void EnterLoader();
+void ExitLoader();
+
 void MsanDie();
 void PrintWarning(uptr pc, uptr bp);
 void PrintWarningWithOrigin(uptr pc, uptr bp, u32 origin);
@@ -66,6 +71,8 @@ void ReportUMR(StackTrace *stack, u32 or
 void ReportExpectedUMRNotFound(StackTrace *stack);
 void ReportAtExitStatistics();
 
+void UnpoisonMappedDSO(struct link_map *map);
+
 #define GET_MALLOC_STACK_TRACE                                     \
   StackTrace stack;                                                \
   stack.size = 0;                                                  \

Modified: compiler-rt/trunk/lib/msan/msan_interceptors.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/msan/msan_interceptors.cc?rev=176818&r1=176817&r2=176818&view=diff
==============================================================================
--- compiler-rt/trunk/lib/msan/msan_interceptors.cc (original)
+++ compiler-rt/trunk/lib/msan/msan_interceptors.cc Mon Mar 11 13:07:42 2013
@@ -762,6 +762,25 @@ INTERCEPTOR(int, dladdr, void *addr, dli
   return res;
 }
 
+// dlopen() ultimately calls mmap() down inside the loader, which generally
+// doesn't participate in dynamic symbol resolution.  Therefore we won't
+// intercept its calls to mmap, and we have to hook it here.  The loader
+// initializes the module before returning, so without the dynamic component, we
+// won't be able to clear the shadow before the initializers.  Fixing this would
+// require putting our own initializer first to clear the shadow.
+INTERCEPTOR(void *, dlopen, const char *filename, int flag) {
+  ENSURE_MSAN_INITED();
+  EnterLoader();
+  link_map *map = (link_map *)REAL(dlopen)(filename, flag);
+  ExitLoader();
+  if (!__msan_has_dynamic_component()) {
+    // If msandr didn't clear the shadow before the initializers ran, we do it
+    // ourselves afterwards.
+    UnpoisonMappedDSO(map);
+  }
+  return (void *)map;
+}
+
 INTERCEPTOR(int, getrusage, int who, void *usage) {
   ENSURE_MSAN_INITED();
   int res = REAL(getrusage)(who, usage);
@@ -973,6 +992,7 @@ void InitializeInterceptors() {
   INTERCEPT_FUNCTION(recvfrom);
   INTERCEPT_FUNCTION(recvmsg);
   INTERCEPT_FUNCTION(dladdr);
+  INTERCEPT_FUNCTION(dlopen);
   INTERCEPT_FUNCTION(getrusage);
   inited = 1;
 }

Modified: compiler-rt/trunk/lib/msan/msan_interface_internal.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/msan/msan_interface_internal.h?rev=176818&r1=176817&r2=176818&view=diff
==============================================================================
--- compiler-rt/trunk/lib/msan/msan_interface_internal.h (original)
+++ compiler-rt/trunk/lib/msan/msan_interface_internal.h Mon Mar 11 13:07:42 2013
@@ -104,6 +104,10 @@ int __msan_get_retval_tls_offset();
 SANITIZER_INTERFACE_ATTRIBUTE
 int __msan_get_param_tls_offset();
 
+// For intercepting mmap from ld.so in msandr.
+SANITIZER_INTERFACE_ATTRIBUTE
+bool __msan_is_in_loader();
+
 // For testing.
 SANITIZER_INTERFACE_ATTRIBUTE
 u32 __msan_get_umr_origin();

Modified: compiler-rt/trunk/lib/msan/msan_linux.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/msan/msan_linux.cc?rev=176818&r1=176817&r2=176818&view=diff
==============================================================================
--- compiler-rt/trunk/lib/msan/msan_linux.cc (original)
+++ compiler-rt/trunk/lib/msan/msan_linux.cc Mon Mar 11 13:07:42 2013
@@ -16,6 +16,9 @@
 
 #include "msan.h"
 
+#include <algorithm>
+#include <elf.h>
+#include <link.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <signal.h>
@@ -87,6 +90,42 @@ static void MsanAtExit(void) {
 void InstallAtExitHandler() {
   atexit(MsanAtExit);
 }
+
+void UnpoisonMappedDSO(link_map *map) {
+  typedef ElfW(Phdr) Elf_Phdr;
+  typedef ElfW(Ehdr) Elf_Ehdr;
+  char *base = (char *)map->l_addr;
+  Elf_Ehdr *ehdr = (Elf_Ehdr *)base;
+  char *phdrs = base + ehdr->e_phoff;
+  char *phdrs_end = phdrs + ehdr->e_phnum * ehdr->e_phentsize;
+
+  // Find the segment with the minimum base so we can "relocate" the p_vaddr
+  // fields.  Typically ET_DYN objects (DSOs) have base of zero and ET_EXEC
+  // objects have a non-zero base.
+  uptr preferred_base = ~0ULL;
+  for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) {
+    Elf_Phdr *phdr = (Elf_Phdr *)iter;
+    if (phdr->p_type == PT_LOAD)
+      preferred_base = std::min(preferred_base, (uptr)phdr->p_vaddr);
+  }
+
+  // Compute the delta from the real base to get a relocation delta.
+  ptrdiff_t delta = (uptr)base - preferred_base;
+  // Now we can figure out what the loader really mapped.
+  for (char *iter = phdrs; iter != phdrs_end; iter += ehdr->e_phentsize) {
+    Elf_Phdr *phdr = (Elf_Phdr *)iter;
+    if (phdr->p_type == PT_LOAD) {
+      uptr seg_start = phdr->p_vaddr + delta;
+      uptr seg_end = seg_start + phdr->p_memsz;
+      // None of these values are aligned.  We consider the ragged edges of the
+      // load command as defined, since they are mapped from the file.
+      seg_start = RoundDownTo(seg_start, GetPageSizeCached());
+      seg_end = RoundUpTo(seg_end, GetPageSizeCached());
+      __msan_unpoison((void *)seg_start, seg_end - seg_start);
+    }
+  }
+}
+
 }  // namespace __msan
 
 #endif  // __linux__

Modified: compiler-rt/trunk/lib/msan/tests/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/msan/tests/CMakeLists.txt?rev=176818&r1=176817&r2=176818&view=diff
==============================================================================
--- compiler-rt/trunk/lib/msan/tests/CMakeLists.txt (original)
+++ compiler-rt/trunk/lib/msan/tests/CMakeLists.txt Mon Mar 11 13:07:42 2013
@@ -32,6 +32,7 @@ set(MSAN_LIBCXX_LINK_FLAGS
 
 # Unittest sources and build flags.
 set(MSAN_UNITTEST_SOURCE msan_test.cc)
+set(MSAN_LOADABLE_SOURCE msan_loadable.cc)
 set(MSAN_UNITTEST_HEADERS
   msandr_test_so.h
   ../../../include/sanitizer/msan_interface.h
@@ -65,6 +66,10 @@ set(MSAN_UNITTEST_LINK_FLAGS
   # FIXME: we build libcxx without cxxabi and need libstdc++ to provide it.
   -lstdc++
 )
+set(MSAN_LOADABLE_LINK_FLAGS
+  -fsanitize=memory
+  -shared
+)
 
 # Compile source for the given architecture, using compiler
 # options in ${ARGN}, and add it to the object list.
@@ -96,6 +101,7 @@ macro(add_msan_test test_suite test_name
   add_compiler_rt_test(${test_suite} ${test_name}
                        OBJECTS ${ARGN}
                        DEPS ${MSAN_RUNTIME_LIBRARIES} ${ARGN}
+                            ${MSAN_LOADABLE_SO}
                        LINK_FLAGS ${MSAN_UNITTEST_LINK_FLAGS}
                                   ${TARGET_LINK_FLAGS}
                                   "-Wl,-rpath=${CMAKE_CURRENT_BINARY_DIR}")
@@ -130,11 +136,22 @@ macro(add_msan_tests_for_arch arch)
   msan_compile(MSAN_INST_TEST_OBJECTS ${MSAN_UNITTEST_SOURCE} ${arch}
                ${MSAN_UNITTEST_INSTRUMENTED_CFLAGS})
 
+  # Instrumented loadable module objects.
+  set(MSAN_INST_LOADABLE_OBJECTS)
+  msan_compile(MSAN_INST_LOADABLE_OBJECTS ${MSAN_LOADABLE_SOURCE} ${arch}
+               ${MSAN_UNITTEST_INSTRUMENTED_CFLAGS})
+
   # Uninstrumented shared object for MSanDR tests.
   set(MSANDR_TEST_OBJECTS)
   msan_compile(MSANDR_TEST_OBJECTS ${MSANDR_UNITTEST_SOURCE} ${arch}
                ${MSAN_UNITTEST_COMMON_CFLAGS})
 
+  # Instrumented loadable library tests.
+  set(MSAN_LOADABLE_SO)
+  msan_link_shared(MSAN_LOADABLE_SO "libmsan_loadable" ${arch}
+                   OBJECTS ${MSAN_INST_LOADABLE_OBJECTS}
+                   DEPS ${MSAN_INST_LOADABLE_OBJECTS} ${MSAN_RUNTIME_LIBRARIES})
+
   # Uninstrumented shared library tests.
   set(MSANDR_TEST_SO)
   msan_link_shared(MSANDR_TEST_SO "libmsandr_test" ${arch}

Added: compiler-rt/trunk/lib/msan/tests/msan_loadable.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/msan/tests/msan_loadable.cc?rev=176818&view=auto
==============================================================================
--- compiler-rt/trunk/lib/msan/tests/msan_loadable.cc (added)
+++ compiler-rt/trunk/lib/msan/tests/msan_loadable.cc Mon Mar 11 13:07:42 2013
@@ -0,0 +1,45 @@
+//===-- msan_loadable.cc --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemorySanitizer.
+//
+// MemorySanitizer unit tests.
+//===----------------------------------------------------------------------===//
+
+#include "msan/msan_interface_internal.h"
+#include <stdlib.h>
+
+static void *dso_global;
+
+// No name mangling.
+extern "C" {
+
+__attribute__((constructor))
+void loadable_module_init(void) {
+  if (!__msan_has_dynamic_component())
+    return;
+  // The real test is that this compare should not make an uninit.
+  if (dso_global == NULL)
+    dso_global = malloc(4);
+}
+
+__attribute__((destructor))
+void loadable_module_fini(void) {
+  if (!__msan_has_dynamic_component())
+    return;
+  free(dso_global);
+  // *Don't* overwrite it with NULL!  That would unpoison it, but our test
+  // relies on reloading at the same address and keeping the poison.
+}
+
+void **get_dso_global() {
+  return &dso_global;
+}
+
+}

Propchange: compiler-rt/trunk/lib/msan/tests/msan_loadable.cc
------------------------------------------------------------------------------
    svn:eol-style = LF

Modified: compiler-rt/trunk/lib/msan/tests/msan_test.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/msan/tests/msan_test.cc?rev=176818&r1=176817&r2=176818&view=diff
==============================================================================
--- compiler-rt/trunk/lib/msan/tests/msan_test.cc (original)
+++ compiler-rt/trunk/lib/msan/tests/msan_test.cc Mon Mar 11 13:07:42 2013
@@ -1288,6 +1288,55 @@ TEST(MemorySanitizer, dladdr) {
   EXPECT_NOT_POISONED((unsigned long)info.dli_saddr);
 }
 
+#ifdef __GLIBC__
+extern "C" {
+  extern void *__libc_stack_end;
+}
+
+static char **GetArgv(void) {
+  uintptr_t *stack_end = (uintptr_t *)__libc_stack_end;
+  return (char**)(stack_end + 1);
+}
+
+#else  // __GLIBC__
+# error "TODO: port this"
+#endif
+
+TEST(MemorySanitizer, dlopen) {
+  // Compute the path to our loadable DSO.  We assume it's in the same
+  // directory.  Only use string routines that we intercept so far to do this.
+  char **argv = GetArgv();
+  const char *basename = "libmsan_loadable.x86_64.so";
+  size_t path_max = strlen(argv[0]) + 1 + strlen(basename) + 1;
+  char *path = new char[path_max];
+  char *last_slash = strrchr(argv[0], '/');
+  assert(last_slash);
+  snprintf(path, path_max, "%.*s/%s", int(last_slash - argv[0]),
+           argv[0], basename);
+
+  // We need to clear shadow for globals when doing dlopen.  In order to test
+  // this, we have to poison the shadow for the DSO before we load it.  In
+  // general this is difficult, but the loader tends to reload things in the
+  // same place, so we open, close, and then reopen.  The global should always
+  // start out clean after dlopen.
+  for (int i = 0; i < 2; i++) {
+    void *lib = dlopen(path, RTLD_LAZY);
+    if (lib == NULL) {
+      printf("dlerror: %s\n", dlerror());
+      assert(lib != NULL);
+    }
+    void **(*get_dso_global)() = (void **(*)())dlsym(lib, "get_dso_global");
+    assert(get_dso_global);
+    void **dso_global = get_dso_global();
+    EXPECT_NOT_POISONED(*dso_global);
+    __msan_poison(dso_global, sizeof(*dso_global));
+    EXPECT_POISONED(*dso_global);
+    dlclose(lib);
+  }
+
+  delete[] path;
+}
+
 TEST(MemorySanitizer, scanf) {
   const char *input = "42 hello";
   int* d = new int;

Modified: compiler-rt/trunk/lib/msandr/msandr.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/msandr/msandr.cc?rev=176818&r1=176817&r2=176818&view=diff
==============================================================================
--- compiler-rt/trunk/lib/msandr/msandr.cc (original)
+++ compiler-rt/trunk/lib/msandr/msandr.cc Mon Mar 11 13:07:42 2013
@@ -37,6 +37,7 @@
 #include <drsyscall.h>
 
 #include <sys/mman.h>
+#include <sys/syscall.h>  /* for SYS_mmap */
 
 #include <algorithm>
 #include <string>
@@ -103,6 +104,17 @@ ModuleData::ModuleData(const module_data
 
 int(*__msan_get_retval_tls_offset)();
 int(*__msan_get_param_tls_offset)();
+void (*__msan_unpoison)(void *base, size_t size);
+bool (*__msan_is_in_loader)();
+
+static generic_func_t LookupCallback(module_data_t *app, const char *name) {
+  generic_func_t callback = dr_get_proc_address(app->handle, name);
+  if (callback == NULL) {
+    dr_printf("Couldn't find `%s` in %s\n", name, app->full_path);
+    CHECK(callback);
+  }
+  return callback;
+}
 
 void InitializeMSanCallbacks() {
   module_data_t *app = dr_lookup_module_by_name(dr_get_application_name());
@@ -113,25 +125,18 @@ void InitializeMSanCallbacks() {
   }
   g_app_path = app->full_path;
 
-  const char *callback_name = "__msan_get_retval_tls_offset";
-  __msan_get_retval_tls_offset =
-      (int(*)()) dr_get_proc_address(app->handle, callback_name);
-  if (__msan_get_retval_tls_offset == NULL) {
-    dr_printf("Couldn't find `%s` in %s\n", callback_name, app->full_path);
-    CHECK(__msan_get_retval_tls_offset);
-  }
+  __msan_get_retval_tls_offset = (int (*)())
+      LookupCallback(app, "__msan_get_retval_tls_offset");
+  __msan_get_param_tls_offset = (int (*)())
+      LookupCallback(app, "__msan_get_param_tls_offset");
+  __msan_unpoison = (void(*)(void *, size_t))
+      LookupCallback(app, "__msan_unpoison");
+  __msan_is_in_loader = (bool (*)())
+      LookupCallback(app, "__msan_is_in_loader");
 
-  callback_name = "__msan_get_param_tls_offset";
-  __msan_get_param_tls_offset =
-      (int(*)()) dr_get_proc_address(app->handle, callback_name);
-  if (__msan_get_param_tls_offset == NULL) {
-    dr_printf("Couldn't find `%s` in %s\n", callback_name, app->full_path);
-    CHECK(__msan_get_param_tls_offset);
-  }
+  dr_free_module_data(app);
 }
 
-#define MEM_TO_SHADOW(mem) ((mem) & ~0x400000000000ULL)
-
 // FIXME: Handle absolute addresses and PC-relative addresses.
 // FIXME: Handle TLS accesses via FS or GS.  DR assumes all other segments have
 // a zero base anyway.
@@ -520,7 +525,7 @@ bool drsys_iter_memarg_cb(drsys_arg_t *a
 
   if (arg->pre)
     return true;
-  if (arg->mode != DRSYS_PARAM_OUT)
+  if (!TESTANY(DRSYS_PARAM_OUT, arg->mode))
     return true;
 
   size_t sz = arg->size;
@@ -538,8 +543,19 @@ bool drsys_iter_memarg_cb(drsys_arg_t *a
               (unsigned long long)(sz & 0xFFFFFFFF));
   }
 
-  void *p = (void *)MEM_TO_SHADOW((ptr_uint_t) arg->start_addr);
-  memset(p, 0, sz);
+  if (VERBOSITY > 0) {
+    drmf_status_t res;
+    drsys_syscall_t *syscall = (drsys_syscall_t *)user_data;
+    const char *name;
+    res = drsys_syscall_name(syscall, &name);
+    dr_printf("drsyscall: syscall '%s' arg %d wrote range [%p, %p)\n",
+              name, arg->ordinal, arg->start_addr,
+              (char *)arg->start_addr + sz);
+  }
+
+  // We don't switch to the app context because __msan_unpoison() doesn't need
+  // TLS segments.
+  __msan_unpoison(arg->start_addr, sz);
 
   return true; /* keep going */
 }
@@ -576,6 +592,19 @@ bool event_pre_syscall(void *drcontext,
   return true;
 }
 
+static bool IsInLoader(void *drcontext) {
+  // TODO: This segment swap is inefficient.  DR should just let us query the
+  // app segment base, which it has.  Alternatively, if we disable
+  // -mangle_app_seg, then we won't need the swap.
+  bool need_swap = !dr_using_app_state(drcontext);
+  if (need_swap)
+    dr_switch_to_app_state(drcontext);
+  bool is_in_loader = __msan_is_in_loader();
+  if (need_swap)
+    dr_switch_to_dr_state(drcontext);
+  return is_in_loader;
+}
+
 void event_post_syscall(void *drcontext, int sysnum) {
   drsys_syscall_t *syscall;
   drsys_sysnum_t sysnum_full;
@@ -598,6 +627,30 @@ void event_post_syscall(void *drcontext,
         drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, (void *)syscall);
     CHECK(res == DRMF_SUCCESS);
   }
+
+  // Our normal mmap interceptor can't intercept calls from the loader itself.
+  // This means we don't clear the shadow for calls to dlopen.  For now, we
+  // solve this by intercepting mmap from ld.so here, but ideally we'd have a
+  // solution that doesn't rely on msandr.
+  //
+  // Be careful not to intercept maps done by the msan rtl.  Otherwise we end up
+  // unpoisoning vast regions of memory and OOMing.
+  // TODO: __msan_unpoison() could "flush" large regions of memory like tsan
+  // does instead of doing a large memset.  However, we need the memory to be
+  // zeroed, where as tsan does not, so plain madvise is not enough.
+  if (success && (sysnum == SYS_mmap IF_NOT_X64(|| sysnum == SYS_mmap2))) {
+    if (IsInLoader(drcontext)) {
+      app_pc base = (app_pc)dr_syscall_get_result(drcontext);
+      ptr_uint_t size;
+      drmf_status_t res = drsys_pre_syscall_arg(drcontext, 1, &size);
+      CHECK(res == DRMF_SUCCESS);
+      if (VERBOSITY > 0)
+        dr_printf("unpoisoning for dlopen: [%p-%p]\n", base, base + size);
+      // We don't switch to the app context because __msan_unpoison() doesn't
+      // need TLS segments.
+      __msan_unpoison(base, size);
+    }
+  }
 }
 
 } // namespace





More information about the llvm-commits mailing list