[compiler-rt] r288537 - [sanitizer] Track architecture and UUID of modules in LoadedModule

Kuba Mracek via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 2 13:27:14 PST 2016


Author: kuba.brecka
Date: Fri Dec  2 15:27:14 2016
New Revision: 288537

URL: http://llvm.org/viewvc/llvm-project?rev=288537&view=rev
Log:
[sanitizer] Track architecture and UUID of modules in LoadedModule

When we enumerate loaded modules, we only track the module name and base address, which then has several problems on macOS. Dylibs and executables often have several architecture slices and not storing which architecture/UUID is actually loaded creates problems with symbolication: A file path + offset isn't enough to correctly symbolicate, since the offset can be valid in multiple slices. This is especially common for Haswell+ X86_64 machines, where x86_64h slices are preferred, but if one is not available, a regular x86_64 is loaded instead. But the same issue exists for i386 vs. x86_64 as well.

This patch adds tracking of arch and UUID for each LoadedModule. At this point, this information isn't used in reports, but this is the first step. The goal is to correctly identify which slice is loaded in symbolication, and also to output this information in reports so that we can tell which exact slices were loaded in post-mortem analysis.

Differential Revision: https://reviews.llvm.org/D26632


Modified:
    compiler-rt/trunk/lib/sanitizer_common/sanitizer_common.cc
    compiler-rt/trunk/lib/sanitizer_common/sanitizer_common.h
    compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps.h
    compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps_freebsd.cc
    compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps_linux.cc
    compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps_mac.cc
    compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_procmaps_test.cc

Modified: compiler-rt/trunk/lib/sanitizer_common/sanitizer_common.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_common.cc?rev=288537&r1=288536&r2=288537&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/sanitizer_common.cc (original)
+++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_common.cc Fri Dec  2 15:27:14 2016
@@ -259,9 +259,18 @@ void LoadedModule::set(const char *modul
   base_address_ = base_address;
 }
 
+void LoadedModule::set(const char *module_name, uptr base_address,
+                       ModuleArch arch, u8 uuid[kModuleUUIDSize]) {
+  set(module_name, base_address);
+  arch_ = arch;
+  internal_memcpy(uuid_, uuid, sizeof(uuid_));
+}
+
 void LoadedModule::clear() {
   InternalFree(full_name_);
   full_name_ = nullptr;
+  arch_ = kModuleArchUnknown;
+  internal_memset(uuid_, 0, kModuleUUIDSize);
   while (!ranges_.empty()) {
     AddressRange *r = ranges_.front();
     ranges_.pop_front();

Modified: compiler-rt/trunk/lib/sanitizer_common/sanitizer_common.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_common.h?rev=288537&r1=288536&r2=288537&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/sanitizer_common.h (original)
+++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_common.h Fri Dec  2 15:27:14 2016
@@ -646,18 +646,40 @@ uptr InternalLowerBound(const Container
   return first;
 }
 
+enum ModuleArch {
+  kModuleArchUnknown,
+  kModuleArchI386,
+  kModuleArchX86_64,
+  kModuleArchX86_64H,
+  kModuleArchARMV6,
+  kModuleArchARMV7,
+  kModuleArchARMV7S,
+  kModuleArchARMV7K,
+  kModuleArchARM64
+};
+
+const uptr kModuleUUIDSize = 16;
+
 // Represents a binary loaded into virtual memory (e.g. this can be an
 // executable or a shared object).
 class LoadedModule {
  public:
-  LoadedModule() : full_name_(nullptr), base_address_(0) { ranges_.clear(); }
+  LoadedModule()
+      : full_name_(nullptr), base_address_(0), arch_(kModuleArchUnknown) {
+    internal_memset(uuid_, 0, kModuleUUIDSize);
+    ranges_.clear();
+  }
   void set(const char *module_name, uptr base_address);
+  void set(const char *module_name, uptr base_address, ModuleArch arch,
+           u8 uuid[kModuleUUIDSize]);
   void clear();
   void addAddressRange(uptr beg, uptr end, bool executable);
   bool containsAddress(uptr address) const;
 
   const char *full_name() const { return full_name_; }
   uptr base_address() const { return base_address_; }
+  ModuleArch arch() const { return arch_; }
+  const u8 *uuid() const { return uuid_; }
 
   struct AddressRange {
     AddressRange *next;
@@ -674,6 +696,8 @@ class LoadedModule {
  private:
   char *full_name_;  // Owned.
   uptr base_address_;
+  ModuleArch arch_;
+  u8 uuid_[kModuleUUIDSize];
   IntrusiveList<AddressRange> ranges_;
 };
 

Modified: compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps.h?rev=288537&r1=288536&r2=288537&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps.h (original)
+++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps.h Fri Dec  2 15:27:14 2016
@@ -35,8 +35,9 @@ class MemoryMappingLayout {
  public:
   explicit MemoryMappingLayout(bool cache_enabled);
   ~MemoryMappingLayout();
-  bool Next(uptr *start, uptr *end, uptr *offset,
-            char filename[], uptr filename_size, uptr *protection);
+  bool Next(uptr *start, uptr *end, uptr *offset, char filename[],
+            uptr filename_size, uptr *protection, ModuleArch *arch = nullptr,
+            u8 *uuid = nullptr);
   void Reset();
   // In some cases, e.g. when running under a sandbox on Linux, ASan is unable
   // to obtain the memory mappings. It should fall back to pre-cached data
@@ -65,13 +66,15 @@ class MemoryMappingLayout {
   static ProcSelfMapsBuff cached_proc_self_maps_;
   static StaticSpinMutex cache_lock_;  // protects cached_proc_self_maps_.
 # elif SANITIZER_MAC
-  template<u32 kLCSegment, typename SegmentCommand>
-  bool NextSegmentLoad(uptr *start, uptr *end, uptr *offset,
-                       char filename[], uptr filename_size,
+  template <u32 kLCSegment, typename SegmentCommand>
+  bool NextSegmentLoad(uptr *start, uptr *end, uptr *offset, char filename[],
+                       uptr filename_size, ModuleArch *arch, u8 *uuid,
                        uptr *protection);
   int current_image_;
   u32 current_magic_;
   u32 current_filetype_;
+  ModuleArch current_arch_;
+  u8 current_uuid_[kModuleUUIDSize];
   int current_load_cmd_count_;
   char *current_load_cmd_addr_;
 # endif

Modified: compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps_freebsd.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps_freebsd.cc?rev=288537&r1=288536&r2=288537&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps_freebsd.cc (original)
+++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps_freebsd.cc Fri Dec  2 15:27:14 2016
@@ -50,7 +50,9 @@ void ReadProcMaps(ProcSelfMapsBuff *proc
 
 bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset,
                                char filename[], uptr filename_size,
-                               uptr *protection) {
+                               uptr *protection, ModuleArch *arch, u8 *uuid) {
+  CHECK(!arch && "not implemented");
+  CHECK(!uuid && "not implemented");
   char *last = proc_self_maps_.data + proc_self_maps_.len;
   if (current_ >= last) return false;
   uptr dummy;

Modified: compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps_linux.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps_linux.cc?rev=288537&r1=288536&r2=288537&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps_linux.cc (original)
+++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps_linux.cc Fri Dec  2 15:27:14 2016
@@ -28,7 +28,9 @@ static bool IsOneOf(char c, char c1, cha
 
 bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset,
                                char filename[], uptr filename_size,
-                               uptr *protection) {
+                               uptr *protection, ModuleArch *arch, u8 *uuid) {
+  CHECK(!arch && "not implemented");
+  CHECK(!uuid && "not implemented");
   char *last = proc_self_maps_.data + proc_self_maps_.len;
   if (current_ >= last) return false;
   uptr dummy;

Modified: compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps_mac.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps_mac.cc?rev=288537&r1=288536&r2=288537&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps_mac.cc (original)
+++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_procmaps_mac.cc Fri Dec  2 15:27:14 2016
@@ -53,6 +53,8 @@ void MemoryMappingLayout::Reset() {
   current_load_cmd_addr_ = 0;
   current_magic_ = 0;
   current_filetype_ = 0;
+  current_arch_ = kModuleArchUnknown;
+  internal_memset(current_uuid_, 0, kModuleUUIDSize);
 }
 
 // static
@@ -71,11 +73,12 @@ void MemoryMappingLayout::LoadFromCache(
 // and returns the start and end addresses and file offset of the corresponding
 // segment.
 // Note that the segment addresses are not necessarily sorted.
-template<u32 kLCSegment, typename SegmentCommand>
-bool MemoryMappingLayout::NextSegmentLoad(
-    uptr *start, uptr *end, uptr *offset,
-    char filename[], uptr filename_size, uptr *protection) {
-  const char* lc = current_load_cmd_addr_;
+template <u32 kLCSegment, typename SegmentCommand>
+bool MemoryMappingLayout::NextSegmentLoad(uptr *start, uptr *end, uptr *offset,
+                                          char filename[], uptr filename_size,
+                                          ModuleArch *arch, u8 *uuid,
+                                          uptr *protection) {
+  const char *lc = current_load_cmd_addr_;
   current_load_cmd_addr_ += ((const load_command *)lc)->cmdsize;
   if (((const load_command *)lc)->cmd == kLCSegment) {
     const sptr dlloff = _dyld_get_image_vmaddr_slide(current_image_);
@@ -97,14 +100,61 @@ bool MemoryMappingLayout::NextSegmentLoa
       internal_strncpy(filename, _dyld_get_image_name(current_image_),
                        filename_size);
     }
+    if (arch) {
+      *arch = current_arch_;
+    }
+    if (uuid) {
+      internal_memcpy(uuid, current_uuid_, kModuleUUIDSize);
+    }
     return true;
   }
   return false;
 }
 
+ModuleArch ModuleArchFromCpuType(cpu_type_t cputype, cpu_subtype_t cpusubtype) {
+  cpusubtype = cpusubtype & ~CPU_SUBTYPE_MASK;
+  switch (cputype) {
+    case CPU_TYPE_I386:
+      return kModuleArchI386;
+    case CPU_TYPE_X86_64:
+      if (cpusubtype == CPU_SUBTYPE_X86_64_ALL) return kModuleArchX86_64;
+      if (cpusubtype == CPU_SUBTYPE_X86_64_H) return kModuleArchX86_64H;
+      CHECK(0 && "Invalid subtype of x86_64");
+      return kModuleArchUnknown;
+    case CPU_TYPE_ARM:
+      if (cpusubtype == CPU_SUBTYPE_ARM_V6) return kModuleArchARMV6;
+      if (cpusubtype == CPU_SUBTYPE_ARM_V7) return kModuleArchARMV7;
+      if (cpusubtype == CPU_SUBTYPE_ARM_V7S) return kModuleArchARMV7S;
+      if (cpusubtype == CPU_SUBTYPE_ARM_V7K) return kModuleArchARMV7K;
+      CHECK(0 && "Invalid subtype of ARM");
+      return kModuleArchUnknown;
+    case CPU_TYPE_ARM64:
+      return kModuleArchARM64;
+    default:
+      CHECK(0 && "Invalid CPU type");
+      return kModuleArchUnknown;
+  }
+}
+
+static void FindUUID(const load_command *first_lc, u8 *uuid_output) {
+  const load_command *current_lc = first_lc;
+  while (1) {
+    if (current_lc->cmd == 0) return;
+    if (current_lc->cmd == LC_UUID) {
+      const uuid_command *uuid_lc = (const uuid_command *)current_lc;
+      const uint8_t *uuid = &uuid_lc->uuid[0];
+      internal_memcpy(uuid_output, uuid, kModuleUUIDSize);
+      return;
+    }
+
+    current_lc =
+        (const load_command *)(((char *)current_lc) + current_lc->cmdsize);
+  }
+}
+
 bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset,
                                char filename[], uptr filename_size,
-                               uptr *protection) {
+                               uptr *protection, ModuleArch *arch, u8 *uuid) {
   for (; current_image_ >= 0; current_image_--) {
     const mach_header* hdr = _dyld_get_image_header(current_image_);
     if (!hdr) continue;
@@ -113,6 +163,7 @@ bool MemoryMappingLayout::Next(uptr *sta
       current_load_cmd_count_ = hdr->ncmds;
       current_magic_ = hdr->magic;
       current_filetype_ = hdr->filetype;
+      current_arch_ = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype);
       switch (current_magic_) {
 #ifdef MH_MAGIC_64
         case MH_MAGIC_64: {
@@ -130,20 +181,24 @@ bool MemoryMappingLayout::Next(uptr *sta
       }
     }
 
+    FindUUID((const load_command *)current_load_cmd_addr_, &current_uuid_[0]);
+
     for (; current_load_cmd_count_ >= 0; current_load_cmd_count_--) {
       switch (current_magic_) {
         // current_magic_ may be only one of MH_MAGIC, MH_MAGIC_64.
 #ifdef MH_MAGIC_64
         case MH_MAGIC_64: {
           if (NextSegmentLoad<LC_SEGMENT_64, struct segment_command_64>(
-                  start, end, offset, filename, filename_size, protection))
+                  start, end, offset, filename, filename_size, arch, uuid,
+                  protection))
             return true;
           break;
         }
 #endif
         case MH_MAGIC: {
           if (NextSegmentLoad<LC_SEGMENT, struct segment_command>(
-                  start, end, offset, filename, filename_size, protection))
+                  start, end, offset, filename, filename_size, arch, uuid,
+                  protection))
             return true;
           break;
         }
@@ -159,9 +214,11 @@ void MemoryMappingLayout::DumpListOfModu
     InternalMmapVector<LoadedModule> *modules) {
   Reset();
   uptr cur_beg, cur_end, prot;
+  ModuleArch cur_arch;
+  u8 cur_uuid[kModuleUUIDSize];
   InternalScopedString module_name(kMaxPathLength);
   for (uptr i = 0; Next(&cur_beg, &cur_end, 0, module_name.data(),
-                        module_name.size(), &prot);
+                        module_name.size(), &prot, &cur_arch, &cur_uuid[0]);
        i++) {
     const char *cur_name = module_name.data();
     if (cur_name[0] == '\0')
@@ -173,7 +230,7 @@ void MemoryMappingLayout::DumpListOfModu
     } else {
       modules->push_back(LoadedModule());
       cur_module = &modules->back();
-      cur_module->set(cur_name, cur_beg);
+      cur_module->set(cur_name, cur_beg, cur_arch, cur_uuid);
     }
     cur_module->addAddressRange(cur_beg, cur_end, prot & kProtectionExecute);
   }

Modified: compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_procmaps_test.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_procmaps_test.cc?rev=288537&r1=288536&r2=288537&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_procmaps_test.cc (original)
+++ compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_procmaps_test.cc Fri Dec  2 15:27:14 2016
@@ -52,5 +52,26 @@ TEST(MemoryMappingLayout, DumpListOfModu
   EXPECT_TRUE(found);
 }
 
+TEST(MemoryMapping, LoadedModuleArchAndUUID) {
+  if (SANITIZER_MAC) {
+    MemoryMappingLayout memory_mapping(false);
+    const uptr kMaxModules = 100;
+    InternalMmapVector<LoadedModule> modules(kMaxModules);
+    memory_mapping.DumpListOfModules(&modules);
+    for (uptr i = 0; i < modules.size(); ++i) {
+      ModuleArch arch = modules[i].arch();
+      // Darwin unit tests are only run on i386/x86_64/x86_64h.
+      if (SANITIZER_WORDSIZE == 32) {
+        EXPECT_EQ(arch, kModuleArchI386);
+      } else if (SANITIZER_WORDSIZE == 64) {
+        EXPECT_TRUE(arch == kModuleArchX86_64 || arch == kModuleArchX86_64H);
+      }
+      const u8 *uuid = modules[i].uuid();
+      u8 null_uuid[kModuleUUIDSize] = {0};
+      EXPECT_NE(memcmp(null_uuid, uuid, kModuleUUIDSize), 0);
+    }
+  }
+}
+
 }  // namespace __sanitizer
 #endif  // !defined(_WIN32)




More information about the llvm-commits mailing list