[compiler-rt] r222535 - [ASan] Allow the users of SymbolizationLoop to make use of the --dsym_hint option in llvm-symbolizer

Alexander Potapenko glider at google.com
Fri Nov 21 06:12:00 PST 2014


Author: glider
Date: Fri Nov 21 08:12:00 2014
New Revision: 222535

URL: http://llvm.org/viewvc/llvm-project?rev=222535&view=rev
Log:
[ASan] Allow the users of SymbolizationLoop to make use of the --dsym_hint option in llvm-symbolizer

Let the users of SymbolizationLoop define a function that produces the list of .dSYM hints (possible path to the .dSYM bundle) for the given binary.
Because the hints can't be added to an existing llvm-symbolizer process, we spawn a new symbolizer process ones each time a new hint appears.
Those can only appear for binaries that we haven't seen before.

Modified:
    compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py

Modified: compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py?rev=222535&r1=222534&r2=222535&view=diff
==============================================================================
--- compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py (original)
+++ compiler-rt/trunk/lib/asan/scripts/asan_symbolize.py Fri Nov 21 08:12:00 2014
@@ -66,10 +66,12 @@ class Symbolizer(object):
 
 
 class LLVMSymbolizer(Symbolizer):
-  def __init__(self, symbolizer_path, addr):
+  def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]):
     super(LLVMSymbolizer, self).__init__()
     self.symbolizer_path = symbolizer_path
-    self.default_arch = guess_arch(addr)
+    self.default_arch = default_arch
+    self.system = system
+    self.dsym_hints = dsym_hints
     self.pipe = self.open_llvm_symbolizer()
 
   def open_llvm_symbolizer(self):
@@ -79,6 +81,9 @@ class LLVMSymbolizer(Symbolizer):
            '--functions=short',
            '--inlining=true',
            '--default-arch=%s' % self.default_arch]
+    if self.system == 'Darwin':
+      for hint in self.dsym_hints:
+        cmd.append('--dsym-hint=%s' % hint)
     if DEBUG:
       print ' '.join(cmd)
     try:
@@ -94,7 +99,7 @@ class LLVMSymbolizer(Symbolizer):
       return None
     result = []
     try:
-      symbolizer_input = '%s %s' % (binary, offset)
+      symbolizer_input = '"%s" %s' % (binary, offset)
       if DEBUG:
         print symbolizer_input
       print >> self.pipe.stdin, symbolizer_input
@@ -116,14 +121,14 @@ class LLVMSymbolizer(Symbolizer):
     return result
 
 
-def LLVMSymbolizerFactory(system, addr):
+def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]):
   symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
   if not symbolizer_path:
     symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH')
     if not symbolizer_path:
       # Assume llvm-symbolizer is in PATH.
       symbolizer_path = 'llvm-symbolizer'
-  return LLVMSymbolizer(symbolizer_path, addr)
+  return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints)
 
 
 class Addr2LineSymbolizer(Symbolizer):
@@ -335,26 +340,49 @@ class BreakpadSymbolizer(Symbolizer):
 
 
 class SymbolizationLoop(object):
-  def __init__(self, binary_name_filter=None):
+  def __init__(self, binary_name_filter=None, dsym_hint_producer=None):
     # Used by clients who may want to supply a different binary name.
     # E.g. in Chrome several binaries may share a single .dSYM.
     self.binary_name_filter = binary_name_filter
+    self.dsym_hint_producer = dsym_hint_producer
     self.system = os.uname()[0]
     if self.system not in ['Linux', 'Darwin', 'FreeBSD']:
       raise Exception('Unknown system')
-    self.llvm_symbolizer = None
+    self.llvm_symbolizers = {}
+    self.last_llvm_symbolizer = None
+    self.dsym_hints = set([])
     self.frame_no = 0
 
   def symbolize_address(self, addr, binary, offset):
-    # Initialize llvm-symbolizer lazily.
-    if not self.llvm_symbolizer:
-      self.llvm_symbolizer = LLVMSymbolizerFactory(self.system, addr)
+    # On non-Darwin (i.e. on platforms without .dSYM debug info) always use
+    # a single symbolizer binary.
+    # On Darwin, if the dsym hint producer is present:
+    #  1. check whether we've seen this binary already; if so,
+    #     use |llvm_symbolizers[binary]|, which has already loaded the debug
+    #     info for this binary (might not be the case for
+    #     |last_llvm_symbolizer|);
+    #  2. otherwise check if we've seen all the hints for this binary already;
+    #     if so, reuse |last_llvm_symbolizer| which has the full set of hints;
+    #  3. otherwise create a new symbolizer and pass all currently known
+    #     .dSYM hints to it.
+    if not binary in self.llvm_symbolizers:
+      use_last_symbolizer = True
+      if self.system == 'Darwin' and self.dsym_hint_producer:
+        dsym_hints_for_binary = set(self.dsym_hint_producer(binary))
+        use_last_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints)
+        self.dsym_hints |= dsym_hints_for_binary
+      if self.last_llvm_symbolizer and use_last_symbolizer:
+          self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
+      else:
+        self.last_llvm_symbolizer = LLVMSymbolizerFactory(
+            self.system, guess_arch(addr), self.dsym_hints)
+        self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
     # Use the chain of symbolizers:
     # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
     # (fall back to next symbolizer if the previous one fails).
     if not binary in symbolizers:
       symbolizers[binary] = ChainSymbolizer(
-          [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer])
+          [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]])
     result = symbolizers[binary].symbolize(addr, binary, offset)
     if result is None:
       # Initialize system symbolizer only if other symbolizers failed.





More information about the llvm-commits mailing list