[compiler-rt] r243895 - [asan] Print VAs instead of RVAs for module offsets on Windows

Reid Kleckner reid at kleckner.net
Mon Aug 3 12:51:18 PDT 2015


Author: rnk
Date: Mon Aug  3 14:51:18 2015
New Revision: 243895

URL: http://llvm.org/viewvc/llvm-project?rev=243895&view=rev
Log:
[asan] Print VAs instead of RVAs for module offsets on Windows

Summary:
This is consistent with binutils and ASan behavior on other platforms,
and makes it easier to use llvm-symbolizer with WinASan. The
--relative-address flag to llvm-symbolizer is also no longer needed.

An RVA is a "relative virtual address", meaning it is the address of
something inside the image minus the base of the mapping at runtime.

A VA in this context is an RVA plus the "preferred base" of the module,
and not a real runtime address. The real runtime address of a symbol
will equal the VA iff the module is loaded at its preferred base at
runtime.

On Windows, the preferred base is stored in the ImageBase field of one
of the PE file header, and this change adds the necessary code to
extract it. On Linux, this offset is typically included in program and
section headers of executables.

ELF shared objects typically use a preferred base of zero, meaning the
smallest p_vaddr field in the program headers is zero. This makes it so
that PIC and PIE module offsets come out looking like RVAs, but they're
actually VAs. The difference between them simply happens to be zero.

Reviewers: samsonov, majnemer

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D11681

Added:
    compiler-rt/trunk/test/asan/TestCases/Windows/unsymbolized.cc
Modified:
    compiler-rt/trunk/lib/sanitizer_common/sanitizer_common.h
    compiler-rt/trunk/lib/sanitizer_common/sanitizer_win.cc
    compiler-rt/trunk/test/asan/lit.cfg

Modified: compiler-rt/trunk/lib/sanitizer_common/sanitizer_common.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_common.h?rev=243895&r1=243894&r2=243895&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/sanitizer_common.h (original)
+++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_common.h Mon Aug  3 14:51:18 2015
@@ -224,6 +224,13 @@ bool WriteToFile(fd_t fd, const void *bu
 bool RenameFile(const char *oldpath, const char *newpath,
                 error_t *error_p = nullptr);
 
+// Scoped file handle closer.
+struct FileCloser {
+  explicit FileCloser(fd_t fd) : fd(fd) {}
+  ~FileCloser() { CloseFile(fd); }
+  fd_t fd;
+};
+
 bool SupportsColoredOutput(fd_t fd);
 
 // Opens the file 'file_name" and reads up to 'max_len' bytes.

Modified: compiler-rt/trunk/lib/sanitizer_common/sanitizer_win.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_win.cc?rev=243895&r1=243894&r2=243895&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/sanitizer_win.cc (original)
+++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_win.cc Mon Aug  3 14:51:18 2015
@@ -323,6 +323,58 @@ void Abort() {
   internal__exit(3);
 }
 
+// Read the file to extract the ImageBase field from the PE header. If ASLR is
+// disabled and this virtual address is available, the loader will typically
+// load the image at this address. Therefore, we call it the preferred base. Any
+// addresses in the DWARF typically assume that the object has been loaded at
+// this address.
+static uptr GetPreferredBase(const char *modname) {
+  fd_t fd = OpenFile(modname, RdOnly, nullptr);
+  if (fd == kInvalidFd)
+    return 0;
+  FileCloser closer(fd);
+
+  // Read just the DOS header.
+  IMAGE_DOS_HEADER dos_header;
+  uptr bytes_read;
+  if (!ReadFromFile(fd, &dos_header, sizeof(dos_header), &bytes_read) ||
+      bytes_read != sizeof(dos_header))
+    return 0;
+
+  // The file should start with the right signature.
+  if (dos_header.e_magic != IMAGE_DOS_SIGNATURE)
+    return 0;
+
+  // The layout at e_lfanew is:
+  // "PE\0\0"
+  // IMAGE_FILE_HEADER
+  // IMAGE_OPTIONAL_HEADER
+  // Seek to e_lfanew and read all that data.
+  char buf[4 + sizeof(IMAGE_FILE_HEADER) + sizeof(IMAGE_OPTIONAL_HEADER)];
+  if (::SetFilePointer(fd, dos_header.e_lfanew, nullptr, FILE_BEGIN) ==
+      INVALID_SET_FILE_POINTER)
+    return 0;
+  if (!ReadFromFile(fd, &buf[0], sizeof(buf), &bytes_read) ||
+      bytes_read != sizeof(buf))
+    return 0;
+
+  // Check for "PE\0\0" before the PE header.
+  char *pe_sig = &buf[0];
+  if (internal_memcmp(pe_sig, "PE\0\0", 4) != 0)
+    return 0;
+
+  // Skip over IMAGE_FILE_HEADER. We could do more validation here if we wanted.
+  IMAGE_OPTIONAL_HEADER *pe_header =
+      (IMAGE_OPTIONAL_HEADER *)(pe_sig + 4 + sizeof(IMAGE_FILE_HEADER));
+
+  // Check for more magic in the PE header.
+  if (pe_header->Magic != IMAGE_NT_OPTIONAL_HDR_MAGIC)
+    return 0;
+
+  // Finally, return the ImageBase.
+  return (uptr)pe_header->ImageBase;
+}
+
 uptr GetListOfModules(LoadedModule *modules, uptr max_modules,
                       string_predicate_t filter) {
   HANDLE cur_process = GetCurrentProcess();
@@ -355,19 +407,33 @@ uptr GetListOfModules(LoadedModule *modu
     if (!GetModuleInformation(cur_process, handle, &mi, sizeof(mi)))
       continue;
 
-    char module_name[MAX_PATH];
-    bool got_module_name =
-        GetModuleFileNameA(handle, module_name, sizeof(module_name));
-    if (!got_module_name)
-      module_name[0] = '\0';
+    // Get the UTF-16 path and convert to UTF-8.
+    wchar_t modname_utf16[kMaxPathLength];
+    int modname_utf16_len =
+        GetModuleFileNameW(handle, modname_utf16, kMaxPathLength);
+    if (modname_utf16_len == 0)
+      modname_utf16[0] = '\0';
+    char module_name[kMaxPathLength];
+    int module_name_len =
+        ::WideCharToMultiByte(CP_UTF8, 0, modname_utf16, modname_utf16_len + 1,
+                              &module_name[0], kMaxPathLength, NULL, NULL);
+    module_name[module_name_len] = '\0';
 
     if (filter && !filter(module_name))
       continue;
 
     uptr base_address = (uptr)mi.lpBaseOfDll;
     uptr end_address = (uptr)mi.lpBaseOfDll + mi.SizeOfImage;
+
+    // Adjust the base address of the module so that we get a VA instead of an
+    // RVA when computing the module offset. This helps llvm-symbolizer find the
+    // right DWARF CU. In the common case that the image is loaded at it's
+    // preferred address, we will now print normal virtual addresses.
+    uptr preferred_base = GetPreferredBase(&module_name[0]);
+    uptr adjusted_base = base_address - preferred_base;
+
     LoadedModule *cur_module = &modules[count];
-    cur_module->set(module_name, base_address);
+    cur_module->set(module_name, adjusted_base);
     // We add the whole module as one single address range.
     cur_module->addAddressRange(base_address, end_address, /*executable*/ true);
     count++;
@@ -402,10 +468,17 @@ static __declspec(allocate(".CRT$XID"))
 
 // ------------------ sanitizer_libc.h
 fd_t OpenFile(const char *filename, FileAccessMode mode, error_t *last_error) {
-  if (mode != WrOnly)
+  fd_t res;
+  if (mode == RdOnly) {
+    res = CreateFile(filename, GENERIC_READ,
+                     FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
+                     nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);
+  } else if (mode == WrOnly) {
+    res = CreateFile(filename, GENERIC_WRITE, 0, nullptr, CREATE_ALWAYS,
+                     FILE_ATTRIBUTE_NORMAL, nullptr);
+  } else {
     UNIMPLEMENTED();
-  fd_t res = CreateFile(filename, GENERIC_WRITE, 0, nullptr, CREATE_ALWAYS,
-                        FILE_ATTRIBUTE_NORMAL, nullptr);
+  }
   CHECK(res != kStdoutFd || kStdoutFd == kInvalidFd);
   CHECK(res != kStderrFd || kStderrFd == kInvalidFd);
   if (res == kInvalidFd && last_error)
@@ -419,7 +492,11 @@ void CloseFile(fd_t fd) {
 
 bool ReadFromFile(fd_t fd, void *buff, uptr buff_size, uptr *bytes_read,
                   error_t *error_p) {
-  UNIMPLEMENTED();
+  CHECK(fd != kInvalidFd);
+  bool success = ::ReadFile(fd, buff, buff_size, bytes_read, nullptr);
+  if (!success && error_p)
+    *error_p = GetLastError();
+  return success;
 }
 
 bool SupportsColoredOutput(fd_t fd) {

Added: compiler-rt/trunk/test/asan/TestCases/Windows/unsymbolized.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/asan/TestCases/Windows/unsymbolized.cc?rev=243895&view=auto
==============================================================================
--- compiler-rt/trunk/test/asan/TestCases/Windows/unsymbolized.cc (added)
+++ compiler-rt/trunk/test/asan/TestCases/Windows/unsymbolized.cc Mon Aug  3 14:51:18 2015
@@ -0,0 +1,25 @@
+// When we link a binary without the -debug flag, ASan should print out VAs
+// instead of RVAs. The frames for main and do_uaf should be above 0x400000,
+// which is the default image base of an executable.
+
+// RUN: rm -f %t.pdb
+// RUN: %clangxx_asan -c -O2 %s -o %t.obj
+// RUN: link /nologo /OUT:%t.exe %t.obj %asan_lib %asan_cxx_lib
+// RUN: not %run %t.exe 2>&1 | FileCheck %s
+
+#include <stdlib.h>
+#include <stdio.h>
+int __attribute__((noinline)) do_uaf(void);
+int main() {
+  int r = do_uaf();
+  printf("r: %d\n", r);
+  return r;
+}
+int do_uaf(void) {
+  char *x = (char*)malloc(10 * sizeof(char));
+  free(x);
+  return x[5];
+  // CHECK: AddressSanitizer: heap-use-after-free
+  // CHECK: #0 {{0x[a-f0-9]+ \(.*[\\/]unsymbolized.cc.*.exe\+0x40[a-f0-9]{4}\)}}
+  // CHECK: #1 {{0x[a-f0-9]+ \(.*[\\/]unsymbolized.cc.*.exe\+0x40[a-f0-9]{4}\)}}
+}

Modified: compiler-rt/trunk/test/asan/lit.cfg
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/asan/lit.cfg?rev=243895&r1=243894&r2=243895&view=diff
==============================================================================
--- compiler-rt/trunk/test/asan/lit.cfg (original)
+++ compiler-rt/trunk/test/asan/lit.cfg Mon Aug  3 14:51:18 2015
@@ -113,8 +113,10 @@ if platform.system() == 'Windows':
   clang_invocation = build_invocation(clang_cl_asan_cxxflags)
   clang_cl_invocation = clang_invocation.replace("clang.exe","clang-cl.exe")
   config.substitutions.append( ("%clang_cl_asan ", clang_cl_invocation) )
-  config.substitutions.append( ("%asan_dll_thunk",
-                               os.path.join(config.compiler_rt_libdir, "clang_rt.asan_dll_thunk-i386.lib")))
+  base_lib = os.path.join(config.compiler_rt_libdir, "clang_rt.asan%%s-%s.lib" % config.target_arch)
+  config.substitutions.append( ("%asan_lib", base_lib % "") )
+  config.substitutions.append( ("%asan_cxx_lib", base_lib % "_cxx") )
+  config.substitutions.append( ("%asan_dll_thunk", base_lib % "_dll_thunk") )
 
 # FIXME: De-hardcode this path.
 asan_source_dir = os.path.join(





More information about the llvm-commits mailing list