[compiler-rt] r175883 - [msan] MSanDR: initial commit.

Evgeniy Stepanov eugeni.stepanov at gmail.com
Fri Feb 22 01:34:19 PST 2013


Author: eugenis
Date: Fri Feb 22 03:34:19 2013
New Revision: 175883

URL: http://llvm.org/viewvc/llvm-project?rev=175883&view=rev
Log:
[msan] MSanDR: initial commit.

MSanDR is a DynamoRio-based tool that handles uninstrumented libraries and
dynamically generated code for MSan.

Added:
    compiler-rt/trunk/lib/msandr/
    compiler-rt/trunk/lib/msandr/CMakeLists.txt   (with props)
    compiler-rt/trunk/lib/msandr/README.txt   (with props)
    compiler-rt/trunk/lib/msandr/msandr.cc   (with props)
Modified:
    compiler-rt/trunk/lib/CMakeLists.txt

Modified: compiler-rt/trunk/lib/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/CMakeLists.txt?rev=175883&r1=175882&r2=175883&view=diff
==============================================================================
--- compiler-rt/trunk/lib/CMakeLists.txt (original)
+++ compiler-rt/trunk/lib/CMakeLists.txt Fri Feb 22 03:34:19 2013
@@ -14,6 +14,7 @@ if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linu
   # ThreadSanitizer and MemorySanitizer are supported on Linux only.
   add_subdirectory(tsan)
   add_subdirectory(msan)
+  add_subdirectory(msandr)
 endif()
 
 # FIXME: Add support for the profile library.

Added: compiler-rt/trunk/lib/msandr/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/msandr/CMakeLists.txt?rev=175883&view=auto
==============================================================================
--- compiler-rt/trunk/lib/msandr/CMakeLists.txt (added)
+++ compiler-rt/trunk/lib/msandr/CMakeLists.txt Fri Feb 22 03:34:19 2013
@@ -0,0 +1,26 @@
+
+if(DynamoRIO_DIR AND DrMemoryFramework_DIR)
+  set(CMAKE_COMPILER_IS_GNUCC 1)
+  find_package(DynamoRIO)
+  find_package(DrMemoryFramework)
+
+  set(arch "x86_64")
+  add_library(clang_rt.msandr-${arch} SHARED msandr.cc)
+  configure_DynamoRIO_client(clang_rt.msandr-${arch})
+
+  function(append_target_cflags tgt cflags)
+    get_property(old_cflags TARGET clang_rt.msandr-${arch} PROPERTY COMPILE_FLAGS)
+    set_property(TARGET clang_rt.msandr-${arch} PROPERTY COMPILE_FLAGS "${old_cflags} ${cflags}")
+  endfunction(append_target_cflags)
+
+  append_target_cflags(clang_rt.msandr-${arch} "-Wno-c++11-extensions")
+
+  use_DynamoRIO_extension(clang_rt.msandr-${arch} drutil)
+  use_DynamoRIO_extension(clang_rt.msandr-${arch} drmgr)
+  use_DynamoRIO_extension(clang_rt.msandr-${arch} drsyscall)
+
+  set_target_properties(clang_rt.msandr-${arch} PROPERTIES
+    LIBRARY_OUTPUT_DIRECTORY ${COMPILER_RT_LIBRARY_OUTPUT_DIR})
+  install(TARGETS clang_rt.msandr-${arch}
+    LIBRARY DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR})
+endif()

Propchange: compiler-rt/trunk/lib/msandr/CMakeLists.txt
------------------------------------------------------------------------------
    svn:eol-style = LF

Added: compiler-rt/trunk/lib/msandr/README.txt
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/msandr/README.txt?rev=175883&view=auto
==============================================================================
--- compiler-rt/trunk/lib/msandr/README.txt (added)
+++ compiler-rt/trunk/lib/msandr/README.txt Fri Feb 22 03:34:19 2013
@@ -0,0 +1,33 @@
+Experimental DynamoRIO-MSAN plugin (codename "MSanDR").
+Supports Linux/x86_64 only.
+
+Building:
+  1. First, download and build DynamoRIO:
+     (svn co https://dynamorio.googlecode.com/svn/trunk dr && \
+      cd dr && mkdir build && cd build && \
+      cmake -DDR_EXT_DRMGR_STATIC=ON -DDR_EXT_DRSYMS_STATIC=ON \
+            -DDR_EXT_DRUTIL_STATIC=ON -DDR_EXT_DRWRAP_STATIC=ON .. && \
+      make -j10 && make install)
+
+  2. Download and build DrMemory (for DrSyscall extension)
+     (svn co http://drmemory.googlecode.com/svn/trunk/ drmemory && \
+      cd drmemory && mkdir build && cd build && \
+      cmake -DDynamoRIO_DIR=`pwd`/../../dr/exports/cmake .. && \
+      make -j10 && make install)
+
+  NOTE: The line above will build a shared DrSyscall library in a non-standard
+  location. This will require the use of LD_LIBRARY_PATH when running MSanDR.
+  To build a static DrSyscall library (and link it into MSanDR), add
+  -DDR_EXT_DRSYSCALL_STATIC=ON to the CMake invocation above, but
+  beware: DrSyscall is LGPL.
+
+  3. Now, build LLVM with two extra CMake flags:
+       -DDynamoRIO_DIR=<path_to_dynamorio>/exports/cmake
+       -DDrMemoryFramework_DIR=<path_to_drmemory>/exports64/drmf
+
+  This will build a lib/clang/$VERSION/lib/linux/libclang_rt.msandr-x86_64.so
+
+Running:
+  <path_to_dynamorio>/exports/bin64/drrun -c lib/clang/$VERSION/lib/linux/libclang_rt.msandr-x86_64.so -- test_binary
+
+MSan unit tests contain several tests for MSanDR (use MemorySanitizerDr.* gtest filter).

Propchange: compiler-rt/trunk/lib/msandr/README.txt
------------------------------------------------------------------------------
    svn:eol-style = LF

Added: compiler-rt/trunk/lib/msandr/msandr.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/msandr/msandr.cc?rev=175883&view=auto
==============================================================================
--- compiler-rt/trunk/lib/msandr/msandr.cc (added)
+++ compiler-rt/trunk/lib/msandr/msandr.cc Fri Feb 22 03:34:19 2013
@@ -0,0 +1,670 @@
+//===-- msandr.cc ---------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemorySanitizer.
+//
+// DynamoRio client for MemorySanitizer.
+//
+// MemorySanitizer requires that all program code is instrumented. Any memory
+// store that can turn an uninitialized value into an initialized value must be
+// observed by the tool, otherwise we risk reporting a false UMR.
+//
+// This also includes any libraries that the program depends on.
+//
+// In the case when rebuilding all program dependencies with MemorySanitizer is
+// problematic, an experimental MSanDR tool (the code you are currently looking
+// at) can be used. It is a DynamoRio-based tool that uses dynamic
+// instrumentation to
+// * Unpoison all memory stores.
+// * Unpoison TLS slots used by MemorySanitizer to pass function arguments and
+//   return value shadow on anything that looks like a function call or a return
+//   from a function.
+//
+// This tool does not detect the use of uninitialized values in uninstrumented
+// libraries. It merely gets rid of false positives by marking all data that
+// passes through uninstrumented code as fully initialized.
+//===----------------------------------------------------------------------===//
+
+#include <dr_api.h>
+#include <drutil.h>
+#include <drmgr.h>
+#include <drsyscall.h>
+
+#include <sys/mman.h>
+
+#include <algorithm>
+#include <string>
+#include <set>
+#include <vector>
+#include <string.h>
+
+using std::string;
+
+#define TESTALL(mask, var) (((mask) & (var)) == (mask))
+#define TESTANY(mask, var) (((mask) & (var)) != 0)
+
+#define CHECK_IMPL(condition, file, line)                                      \
+  do {                                                                         \
+    if (!(condition)) {                                                        \
+      dr_printf("Check failed: `%s`\nat %s:%d\n", #condition, file, line);     \
+      dr_abort();                                                              \
+    }                                                                          \
+  } while (0) // TODO: stacktrace
+
+#define CHECK(condition) CHECK_IMPL(condition, __FILE__, __LINE__)
+
+#define VERBOSITY 0
+
+namespace {
+
+class ModuleData {
+public:
+  ModuleData();
+  ModuleData(const module_data_t *info);
+  // Yes, we want default copy, assign, and dtor semantics.
+
+public:
+  app_pc start_;
+  app_pc end_;
+  // Full path to the module.
+  string path_;
+  module_handle_t handle_;
+  bool should_instrument_;
+  bool executed_;
+};
+
+string g_app_path;
+
+int msan_retval_tls_offset;
+int msan_param_tls_offset;
+
+// A vector of loaded modules sorted by module bounds.  We lookup the current PC
+// in here from the bb event.  This is better than an rb tree because the lookup
+// is faster and the bb event occurs far more than the module load event.
+std::vector<ModuleData> g_module_list;
+
+ModuleData::ModuleData()
+    : start_(NULL), end_(NULL), path_(""), handle_(NULL),
+      should_instrument_(false), executed_(false) {
+}
+
+ModuleData::ModuleData(const module_data_t *info)
+    : start_(info->start), end_(info->end), path_(info->full_path),
+      handle_(info->handle),
+      // We'll check the black/white lists later and adjust this.
+      should_instrument_(true), executed_(false) {
+}
+
+int(*__msan_get_retval_tls_offset)();
+int(*__msan_get_param_tls_offset)();
+
+void InitializeMSanCallbacks() {
+  module_data_t *app = dr_lookup_module_by_name(dr_get_application_name());
+  if (!app) {
+    dr_printf("%s - oops, dr_lookup_module_by_name failed!\n",
+              dr_get_application_name());
+    CHECK(app);
+  }
+  g_app_path = app->full_path;
+
+  const char *callback_name = "__msan_get_retval_tls_offset";
+  __msan_get_retval_tls_offset =
+      (int(*)()) dr_get_proc_address(app->handle, callback_name);
+  if (__msan_get_retval_tls_offset == NULL) {
+    dr_printf("Couldn't find `%s` in %s\n", callback_name, app->full_path);
+    CHECK(__msan_get_retval_tls_offset);
+  }
+
+  callback_name = "__msan_get_param_tls_offset";
+  __msan_get_param_tls_offset =
+      (int(*)()) dr_get_proc_address(app->handle, callback_name);
+  if (__msan_get_param_tls_offset == NULL) {
+    dr_printf("Couldn't find `%s` in %s\n", callback_name, app->full_path);
+    CHECK(__msan_get_param_tls_offset);
+  }
+}
+
+#define MEM_TO_SHADOW(mem) ((mem) & ~0x400000000000ULL)
+
+// FIXME: Handle absolute addresses and PC-relative addresses.
+// FIXME: Handle TLS accesses via FS or GS.  DR assumes all other segments have
+// a zero base anyway.
+bool OperandIsInteresting(opnd_t opnd) {
+  return (opnd_is_base_disp(opnd) && opnd_get_segment(opnd) != DR_SEG_FS &&
+          opnd_get_segment(opnd) != DR_SEG_GS);
+}
+
+bool WantToInstrument(instr_t *instr) {
+  // TODO: skip push instructions?
+  switch (instr_get_opcode(instr)) {
+    // FIXME: support the instructions excluded below:
+  case OP_rep_cmps:
+    // f3 a6    rep cmps %ds:(%rsi) %es:(%rdi) %rsi %rdi %rcx -> %rsi %rdi %rcx
+    return false;
+  }
+
+  // Labels appear due to drutil_expand_rep_string()
+  if (instr_is_label(instr))
+    return false;
+
+  CHECK(instr_ok_to_mangle(instr) == true);
+
+  if (instr_writes_memory(instr)) {
+    for (int d = 0; d < instr_num_dsts(instr); d++) {
+      opnd_t op = instr_get_dst(instr, d);
+      if (OperandIsInteresting(op))
+        return true;
+    }
+  }
+
+  return false;
+}
+
+#define PRE(at, what) instrlist_meta_preinsert(bb, at, INSTR_CREATE_##what);
+#define PREF(at, what) instrlist_meta_preinsert(bb, at, what);
+
+void InstrumentMops(void *drcontext, instrlist_t *bb, instr_t *instr, opnd_t op,
+                    bool is_write) {
+  bool need_to_restore_eflags = false;
+  uint flags = instr_get_arith_flags(instr);
+  // TODO: do something smarter with flags and spills in general?
+  // For example, spill them only once for a sequence of instrumented
+  // instructions that don't change/read flags.
+
+  if (!TESTALL(EFLAGS_WRITE_6, flags) || TESTANY(EFLAGS_READ_6, flags)) {
+    if (VERBOSITY > 1)
+      dr_printf("Spilling eflags...\n");
+    need_to_restore_eflags = true;
+    // TODO: Maybe sometimes don't need to 'seto'.
+    // TODO: Maybe sometimes don't want to spill XAX here?
+    // TODO: No need to spill XAX here if XAX is not used in the BB.
+    dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
+    dr_save_arith_flags_to_xax(drcontext, bb, instr);
+    dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3);
+    dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
+  }
+
+#if 0
+  dr_printf("==DRMSAN== DEBUG: %d %d %d %d %d %d\n",
+            opnd_is_memory_reference(op), opnd_is_base_disp(op),
+            opnd_is_base_disp(op) ? opnd_get_index(op) : -1,
+            opnd_is_far_memory_reference(op), opnd_is_reg_pointer_sized(op),
+            opnd_is_base_disp(op) ? opnd_get_disp(op) : -1);
+#endif
+
+  reg_id_t R1;
+  bool address_in_R1 = false;
+  if (opnd_is_base_disp(op) && opnd_get_index(op) == DR_REG_NULL &&
+      opnd_get_disp(op) == 0) {
+    // If this is a simple access with no offset or index, we can just use the
+    // base for R1.
+    address_in_R1 = true;
+    R1 = opnd_get_base(op);
+  } else {
+    // Otherwise, we need to compute the addr into R1.
+    // TODO: reuse some spare register? e.g. r15 on x64
+    // TODO: might be used as a non-mem-ref register?
+    R1 = DR_REG_XAX;
+  }
+  CHECK(reg_is_pointer_sized(R1)); // otherwise R2 may be wrong.
+
+  // Pick R2 that's not R1 or used by the operand.  It's OK if the instr uses
+  // R2 elsewhere, since we'll restore it before instr.
+  reg_id_t GPR_TO_USE_FOR_R2[] = {
+    DR_REG_XAX, DR_REG_XBX, DR_REG_XCX, DR_REG_XDX
+    // Don't forget to update the +4 below if you add anything else!
+  };
+  std::set<reg_id_t> unused_registers(GPR_TO_USE_FOR_R2, GPR_TO_USE_FOR_R2 + 4);
+  unused_registers.erase(R1);
+  for (int j = 0; j < opnd_num_regs_used(op); j++) {
+    unused_registers.erase(opnd_get_reg_used(op, j));
+  }
+
+  CHECK(unused_registers.size() > 0);
+  reg_id_t R2 = *unused_registers.begin();
+  CHECK(R1 != R2);
+
+  // Save the current values of R1 and R2.
+  dr_save_reg(drcontext, bb, instr, R1, SPILL_SLOT_1);
+  // TODO: Something smarter than spilling a "fixed" register R2?
+  dr_save_reg(drcontext, bb, instr, R2, SPILL_SLOT_2);
+
+  if (!address_in_R1)
+    CHECK(drutil_insert_get_mem_addr(drcontext, bb, instr, op, R1, R2));
+  PRE(instr, mov_imm(drcontext, opnd_create_reg(R2),
+                     OPND_CREATE_INT64(0xffffbfffffffffff)));
+  PRE(instr, and(drcontext, opnd_create_reg(R1), opnd_create_reg(R2)));
+  // There is no mov_st of a 64-bit immediate, so...
+  opnd_size_t op_size = opnd_get_size(op);
+  CHECK(op_size != OPSZ_NA);
+  uint access_size = opnd_size_in_bytes(op_size);
+  if (access_size <= 4) {
+    PRE(instr,
+        mov_st(drcontext, opnd_create_base_disp(R1, DR_REG_NULL, 0, 0, op_size),
+               opnd_create_immed_int((ptr_int_t) 0, op_size)));
+  } else {
+    // FIXME: tail?
+    for (uint ofs = 0; ofs < access_size; ofs += 4) {
+      PRE(instr,
+          mov_st(drcontext, OPND_CREATE_MEM32(R1, ofs), OPND_CREATE_INT32(0)));
+    }
+  }
+
+  // Restore the registers and flags.
+  dr_restore_reg(drcontext, bb, instr, R1, SPILL_SLOT_1);
+  dr_restore_reg(drcontext, bb, instr, R2, SPILL_SLOT_2);
+
+  if (need_to_restore_eflags) {
+    if (VERBOSITY > 1)
+      dr_printf("Restoring eflags\n");
+    // TODO: Check if it's reverse to the dr_restore_reg above and optimize.
+    dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
+    dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_3);
+    dr_restore_arith_flags_from_xax(drcontext, bb, instr);
+    dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
+  }
+
+  // The original instruction is left untouched. The above instrumentation is just
+  // a prefix.
+}
+
+void InstrumentReturn(void *drcontext, instrlist_t *bb, instr_t *instr) {
+  dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
+
+  // Clobbers nothing except xax.
+  bool res =
+      dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX);
+  CHECK(res);
+
+  // TODO: unpoison more bytes?
+  PRE(instr,
+      mov_st(drcontext, OPND_CREATE_MEM64(DR_REG_XAX, msan_retval_tls_offset),
+             OPND_CREATE_INT32(0)));
+
+  dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
+
+  // The original instruction is left untouched. The above instrumentation is just
+  // a prefix.
+}
+
+void InstrumentIndirectBranch(void *drcontext, instrlist_t *bb,
+                              instr_t *instr) {
+  dr_save_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
+
+  // Clobbers nothing except xax.
+  bool res =
+      dr_insert_get_seg_base(drcontext, bb, instr, DR_SEG_FS, DR_REG_XAX);
+  CHECK(res);
+
+  // TODO: unpoison more bytes?
+  for (int i = 0; i < 6; ++i) {
+    PRE(instr,
+        mov_st(drcontext, OPND_CREATE_MEMPTR(DR_REG_XAX, msan_param_tls_offset +
+                                                         i * sizeof(void *)),
+               OPND_CREATE_INT32(0)));
+  }
+
+  dr_restore_reg(drcontext, bb, instr, DR_REG_XAX, SPILL_SLOT_1);
+
+  // The original instruction is left untouched. The above instrumentation is just
+  // a prefix.
+}
+
+// For use with binary search.  Modules shouldn't overlap, so we shouldn't have
+// to look at end_.  If that can happen, we won't support such an application.
+bool ModuleDataCompareStart(const ModuleData &left, const ModuleData &right) {
+  return left.start_ < right.start_;
+}
+
+// Look up the module containing PC.  Should be relatively fast, as its called
+// for each bb instrumentation.
+ModuleData *LookupModuleByPC(app_pc pc) {
+  ModuleData fake_mod_data;
+  fake_mod_data.start_ = pc;
+  std::vector<ModuleData>::iterator it =
+      lower_bound(g_module_list.begin(), g_module_list.end(), fake_mod_data,
+                  ModuleDataCompareStart);
+  // if (it == g_module_list.end())
+  //   return NULL;
+  if (it == g_module_list.end() || pc < it->start_)
+    --it;
+  CHECK(it->start_ <= pc);
+  if (pc >= it->end_) {
+    // We're past the end of this module.  We shouldn't be in the next module,
+    // or lower_bound lied to us.
+    ++it;
+    CHECK(it == g_module_list.end() || pc < it->start_);
+    return NULL;
+  }
+
+  // OK, we found the module.
+  return &*it;
+}
+
+bool ShouldInstrumentNonModuleCode() { return true; }
+
+bool ShouldInstrumentModule(ModuleData *mod_data) {
+  // TODO(rnk): Flags for blacklist would get wired in here.
+  generic_func_t p =
+      dr_get_proc_address(mod_data->handle_, "__msan_track_origins");
+  return !p;
+}
+
+bool ShouldInstrumentPc(app_pc pc, ModuleData **pmod_data) {
+  ModuleData *mod_data = LookupModuleByPC(pc);
+  if (pmod_data)
+    *pmod_data = mod_data;
+  if (mod_data != NULL) {
+    // This module is on a blacklist.
+    if (!mod_data->should_instrument_) {
+      return false;
+    }
+  } else if (!ShouldInstrumentNonModuleCode()) {
+    return false;
+  }
+  return true;
+}
+
+// TODO(rnk): Make sure we instrument after __msan_init.
+dr_emit_flags_t
+event_basic_block_app2app(void *drcontext, void *tag, instrlist_t *bb,
+                          bool for_trace, bool translating) {
+  app_pc pc = dr_fragment_app_pc(tag);
+
+  if (ShouldInstrumentPc(pc, NULL))
+    CHECK(drutil_expand_rep_string(drcontext, bb));
+
+  return DR_EMIT_PERSISTABLE;
+}
+
+dr_emit_flags_t event_basic_block(void *drcontext, void *tag, instrlist_t *bb,
+                                  bool for_trace, bool translating) {
+  app_pc pc = dr_fragment_app_pc(tag);
+  ModuleData *mod_data;
+
+  if (!ShouldInstrumentPc(pc, &mod_data))
+    return DR_EMIT_PERSISTABLE;
+
+  if (VERBOSITY > 1)
+    dr_printf("============================================================\n");
+  if (VERBOSITY > 0) {
+    string mod_path = (mod_data ? mod_data->path_ : "<no module, JITed?>");
+    if (mod_data && !mod_data->executed_) {
+      mod_data->executed_ = true; // Nevermind this race.
+      dr_printf("Executing from new module: %s\n", mod_path.c_str());
+    }
+    dr_printf("BB to be instrumented: %p [from %s]; translating = %s\n", pc,
+        mod_path.c_str(), translating ? "true" : "false");
+    if (mod_data) {
+      // Match standard sanitizer trace format for free symbols.
+      // #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
+      dr_printf(" #0 %p (%s+%p)\n", pc, mod_data->path_.c_str(),
+          pc - mod_data->start_);
+    }
+  }
+  if (VERBOSITY > 1) {
+    instrlist_disassemble(drcontext, pc, bb, STDOUT);
+    instr_t *instr;
+    for (instr = instrlist_first(bb); instr; instr = instr_get_next(instr)) {
+      dr_printf("opcode: %d\n", instr_get_opcode(instr));
+    }
+  }
+
+  for (instr_t *i = instrlist_first(bb); i != NULL; i = instr_get_next(i)) {
+    int opcode = instr_get_opcode(i);
+    if (opcode == OP_ret || opcode == OP_ret_far) {
+      InstrumentReturn(drcontext, bb, i);
+      continue;
+    }
+
+    // These instructions hopefully cover all cases where control is transferred
+    // to a function in a different module (we only care about calls into
+    // compiler-instrumented modules).
+    // * call_ind is used for normal indirect calls.
+    // * jmp_ind is used for indirect tail calls, and calls through PLT (PLT
+    //   stub includes a jump to an address from GOT).
+    if (opcode == OP_call_ind || opcode == OP_call_far_ind ||
+        opcode == OP_jmp_ind || opcode == OP_jmp_far_ind) {
+      InstrumentIndirectBranch(drcontext, bb, i);
+      continue;
+    }
+
+    if (!WantToInstrument(i))
+      continue;
+
+    if (VERBOSITY > 1) {
+      app_pc orig_pc = dr_fragment_app_pc(tag);
+      uint flags = instr_get_arith_flags(i);
+      dr_printf("+%d -> to be instrumented! [opcode=%d, flags = 0x%08X]\n",
+          instr_get_app_pc(i) - orig_pc, instr_get_opcode(i), flags);
+    }
+
+    if (instr_writes_memory(i)) {
+      // Instrument memory writes
+      // bool instrumented_anything = false;
+      for (int d = 0; d < instr_num_dsts(i); d++) {
+        opnd_t op = instr_get_dst(i, d);
+        if (!OperandIsInteresting(op))
+          continue;
+
+        // CHECK(!instrumented_anything);
+        // instrumented_anything = true;
+        InstrumentMops(drcontext, bb, i, op, true);
+        break; // only instrumenting the first dst
+      }
+    }
+  }
+
+// TODO: optimize away redundant restore-spill pairs?
+
+  if (VERBOSITY > 1) {
+    pc = dr_fragment_app_pc(tag);
+    dr_printf("\nFinished instrumenting dynamorio_basic_block(PC=" PFX ")\n", pc);
+    instrlist_disassemble(drcontext, pc, bb, STDOUT);
+  }
+  return DR_EMIT_PERSISTABLE;
+}
+
+void event_module_load(void *drcontext, const module_data_t *info,
+                       bool loaded) {
+  // Insert the module into the list while maintaining the ordering.
+  ModuleData mod_data(info);
+  std::vector<ModuleData>::iterator it =
+      upper_bound(g_module_list.begin(), g_module_list.end(), mod_data,
+                  ModuleDataCompareStart);
+  it = g_module_list.insert(it, mod_data);
+  // Check if we should instrument this module.
+  it->should_instrument_ = ShouldInstrumentModule(&*it);
+  dr_module_set_should_instrument(info->handle, it->should_instrument_);
+
+  if (VERBOSITY > 0)
+    dr_printf("==DRMSAN== Loaded module: %s [%p...%p], instrumentation is %s\n",
+        info->full_path, info->start, info->end,
+        it->should_instrument_ ? "on" : "off");
+}
+
+void event_module_unload(void *drcontext, const module_data_t *info) {
+  if (VERBOSITY > 0)
+    dr_printf("==DRMSAN== Unloaded module: %s [%p...%p]\n", info->full_path,
+        info->start, info->end);
+
+  // Remove the module from the list.
+  ModuleData mod_data(info);
+  std::vector<ModuleData>::iterator it =
+      lower_bound(g_module_list.begin(), g_module_list.end(), mod_data,
+                  ModuleDataCompareStart);
+  // It's a bug if we didn't actually find the module.
+  CHECK(it != g_module_list.end() && it->start_ == mod_data.start_ &&
+        it->end_ == mod_data.end_ && it->path_ == mod_data.path_);
+  g_module_list.erase(it);
+}
+
+void event_exit() {
+  if (VERBOSITY > 0)
+    dr_printf("==DRMSAN== DONE\n");
+}
+
+bool event_filter_syscall(void *drcontext, int sysnum) {
+  // FIXME: only intercept syscalls with memory effects.
+  return true; /* intercept everything */
+}
+
+bool drsys_iter_memarg_cb(drsys_arg_t *arg, void *user_data) {
+  CHECK(arg->valid);
+
+  if (arg->pre)
+    return true;
+  if (arg->mode != DRSYS_PARAM_OUT)
+    return true;
+
+  size_t sz = arg->size;
+
+  if (sz > 0xFFFFFFFF) {
+    drmf_status_t res;
+    drsys_syscall_t *syscall = (drsys_syscall_t *)user_data;
+    const char *name;
+    res = drsys_syscall_name(syscall, &name);
+    CHECK(res == DRMF_SUCCESS);
+
+    dr_printf("SANITY: syscall '%s' arg %d writes %llu bytes memory?!"
+              " Clipping to %llu.\n",
+              name, arg->ordinal, (unsigned long long) sz,
+              (unsigned long long)(sz & 0xFFFFFFFF));
+  }
+
+  void *p = (void *)MEM_TO_SHADOW((ptr_uint_t) arg->start_addr);
+  memset(p, 0, sz);
+
+  return true; /* keep going */
+}
+
+bool event_pre_syscall(void *drcontext, int sysnum) {
+  drsys_syscall_t *syscall;
+  drsys_sysnum_t sysnum_full;
+  bool known;
+  drsys_param_type_t ret_type;
+  drmf_status_t res;
+  const char *name;
+
+  res = drsys_cur_syscall(drcontext, &syscall);
+  CHECK(res == DRMF_SUCCESS);
+
+  res = drsys_syscall_number(syscall, &sysnum_full);
+  CHECK(res == DRMF_SUCCESS);
+  CHECK(sysnum == sysnum_full.number);
+
+  res = drsys_syscall_is_known(syscall, &known);
+  CHECK(res == DRMF_SUCCESS);
+
+  res = drsys_syscall_name(syscall, &name);
+  CHECK(res == DRMF_SUCCESS);
+
+  res = drsys_syscall_return_type(syscall, &ret_type);
+  CHECK(res == DRMF_SUCCESS);
+  CHECK(ret_type != DRSYS_TYPE_INVALID);
+  CHECK(!known || ret_type != DRSYS_TYPE_UNKNOWN);
+
+  res = drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, NULL);
+  CHECK(res == DRMF_SUCCESS);
+
+  return true;
+}
+
+void event_post_syscall(void *drcontext, int sysnum) {
+  drsys_syscall_t *syscall;
+  drsys_sysnum_t sysnum_full;
+  bool success = false;
+  drmf_status_t res;
+
+  res = drsys_cur_syscall(drcontext, &syscall);
+  CHECK(res == DRMF_SUCCESS);
+
+  res = drsys_syscall_number(syscall, &sysnum_full);
+  CHECK(res == DRMF_SUCCESS);
+  CHECK(sysnum == sysnum_full.number);
+
+  res = drsys_syscall_succeeded(syscall, dr_syscall_get_result(drcontext),
+                                &success);
+  CHECK(res == DRMF_SUCCESS);
+
+  if (success) {
+    res =
+        drsys_iterate_memargs(drcontext, drsys_iter_memarg_cb, (void *)syscall);
+    CHECK(res == DRMF_SUCCESS);
+  }
+}
+
+} // namespace
+
+DR_EXPORT void dr_init(client_id_t id) {
+  drmf_status_t res;
+
+  drmgr_init();
+  drutil_init();
+
+  string app_name = dr_get_application_name();
+  // This blacklist will still run these apps through DR's code cache.  On the
+  // other hand, we are able to follow children of these apps.
+  // FIXME: Once DR has detach, we could just detach here.  Alternatively,
+  // if DR had a fork or exec hook to let us decide there, that would be nice.
+  // FIXME: make the blacklist cmd-adjustable.
+  if (app_name == "python" || app_name == "python2.7" || app_name == "bash" ||
+      app_name == "sh" || app_name == "true" || app_name == "exit" ||
+      app_name == "yes" || app_name == "echo")
+    return;
+
+  drsys_options_t ops;
+  memset(&ops, 0, sizeof(ops));
+  ops.struct_size = sizeof(ops);
+  ops.analyze_unknown_syscalls = false;
+
+  res = drsys_init(id, &ops);
+  CHECK(res == DRMF_SUCCESS);
+
+  dr_register_filter_syscall_event(event_filter_syscall);
+  drmgr_register_pre_syscall_event(event_pre_syscall);
+  drmgr_register_post_syscall_event(event_post_syscall);
+  res = drsys_filter_all_syscalls();
+  CHECK(res == DRMF_SUCCESS);
+
+  InitializeMSanCallbacks();
+
+  // FIXME: the shadow is initialized earlier when DR calls one of our wrapper
+  // functions. This may change one day.
+  // TODO: make this more robust.
+
+  void *drcontext = dr_get_current_drcontext();
+
+  dr_switch_to_app_state(drcontext);
+  msan_retval_tls_offset = __msan_get_retval_tls_offset();
+  msan_param_tls_offset = __msan_get_param_tls_offset();
+  dr_switch_to_dr_state(drcontext);
+  if (VERBOSITY > 0) {
+    dr_printf("__msan_retval_tls offset: %d\n", msan_retval_tls_offset);
+    dr_printf("__msan_param_tls offset: %d\n", msan_param_tls_offset);
+  }
+
+  // Standard DR events.
+  dr_register_exit_event(event_exit);
+
+  drmgr_priority_t priority = {
+    sizeof(priority), /* size of struct */
+    "msandr",         /* name of our operation */
+    NULL,             /* optional name of operation we should precede */
+    NULL,             /* optional name of operation we should follow */
+    0
+  };                  /* numeric priority */
+
+  drmgr_register_bb_app2app_event(event_basic_block_app2app, &priority);
+  drmgr_register_bb_instru2instru_event(event_basic_block, &priority);
+  drmgr_register_module_load_event(event_module_load);
+  drmgr_register_module_unload_event(event_module_unload);
+  if (VERBOSITY > 0)
+    dr_printf("==MSANDR== Starting!\n");
+}

Propchange: compiler-rt/trunk/lib/msandr/msandr.cc
------------------------------------------------------------------------------
    svn:eol-style = LF





More information about the llvm-commits mailing list