[Lldb-commits] [lldb] 4b84682 - [crashlog] Move crash log parsing into its own class

Jonas Devlieghere via lldb-commits lldb-commits at lists.llvm.org
Tue Nov 3 09:04:42 PST 2020

Author: Jonas Devlieghere
Date: 2020-11-03T09:04:35-08:00
New Revision: 4b846820445ef33a099a19b5df983ed2f9d6e067

URL: https://github.com/llvm/llvm-project/commit/4b846820445ef33a099a19b5df983ed2f9d6e067
DIFF: https://github.com/llvm/llvm-project/commit/4b846820445ef33a099a19b5df983ed2f9d6e067.diff

LOG: [crashlog] Move crash log parsing into its own class

Move crash log parsing out of the CrashLog class and into its own class
and add more tests.

Differential revision: https://reviews.llvm.org/D90664




diff  --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py
index 85387756ce18..373065f165ca 100755
--- a/lldb/examples/python/crashlog.py
+++ b/lldb/examples/python/crashlog.py
@@ -71,41 +71,7 @@ def read_plist(s):
         return plistlib.readPlistFromString(s)
-class CrashLogParseMode:
-    NORMAL = 0
-    THREAD = 1
-    IMAGES = 2
-    THREGS = 3
-    SYSTEM = 4
-    INSTRS = 5
 class CrashLog(symbolication.Symbolicator):
-    """Class that does parses darwin crash logs"""
-    parent_process_regex = re.compile('^Parent Process:\s*(.*)\[(\d+)\]')
-    thread_state_regex = re.compile('^Thread ([0-9]+) crashed with')
-    thread_instrs_regex = re.compile('^Thread ([0-9]+) instruction stream')
-    thread_regex = re.compile('^Thread ([0-9]+)([^:]*):(.*)')
-    app_backtrace_regex = re.compile(
-        '^Application Specific Backtrace ([0-9]+)([^:]*):(.*)')
-    version = r'(\(.+\)|(arm|x86_)[0-9a-z]+)\s+'
-    frame_regex = re.compile(r'^([0-9]+)' r'\s'                # id
-                             r'+(.+?)'    r'\s+'               # img_name
-                             r'(' +version+ r')?'              # img_version
-                             r'(0x[0-9a-fA-F]{7}[0-9a-fA-F]+)' # addr
-                             r' +(.*)'                         # offs
-                            )
-    null_frame_regex = re.compile(r'^([0-9]+)\s+\?\?\?\s+(0{7}0+) +(.*)')
-    image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)'            # img_lo
-                                  r'\s+' '-' r'\s+'              #   -
-                                  r'(0x[0-9a-fA-F]+)'     r'\s+' # img_hi
-                                  r'[+]?(.+?)'            r'\s+' # img_name
-                                  r'(' +version+ ')?'            # img_version
-                                  r'(<([-0-9a-fA-F]+)>\s+)?'     # img_uuid
-                                  r'(/.*)'                       # img_path
-                                 )
-    empty_line_regex = re.compile('^$')
     class Thread:
         """Class that represents a thread in a darwin crash log"""
@@ -355,88 +321,174 @@ def __init__(self, path, verbose):
         self.idents = list()  # A list of the required identifiers for doing all stack backtraces
         self.crashed_thread_idx = -1
         self.version = -1
-        self.error = None
         self.target = None
         self.verbose = verbose
-        # With possible initial component of ~ or ~user replaced by that user's
-        # home directory.
-        try:
-            f = open(self.path)
-        except IOError:
-            self.error = 'error: cannot open "%s"' % self.path
-            return
-        self.file_lines = f.read().splitlines()
-        parse_mode = CrashLogParseMode.NORMAL
-        thread = None
-        app_specific_backtrace = False
-        for line in self.file_lines:
-            # print line
+    def dump(self):
+        print("Crash Log File: %s" % (self.path))
+        if self.backtraces:
+            print("\nApplication Specific Backtraces:")
+            for thread in self.backtraces:
+                thread.dump('  ')
+        print("\nThreads:")
+        for thread in self.threads:
+            thread.dump('  ')
+        print("\nImages:")
+        for image in self.images:
+            image.dump('  ')
+    def find_image_with_identifier(self, identifier):
+        for image in self.images:
+            if image.identifier == identifier:
+                return image
+        regex_text = '^.*\.%s$' % (re.escape(identifier))
+        regex = re.compile(regex_text)
+        for image in self.images:
+            if regex.match(image.identifier):
+                return image
+        return None
+    def create_target(self):
+        if self.target is None:
+            self.target = symbolication.Symbolicator.create_target(self)
+            if self.target:
+                return self.target
+            # We weren't able to open the main executable as, but we can still
+            # symbolicate
+            print('crashlog.create_target()...2')
+            if self.idents:
+                for ident in self.idents:
+                    image = self.find_image_with_identifier(ident)
+                    if image:
+                        self.target = image.create_target()
+                        if self.target:
+                            return self.target  # success
+            print('crashlog.create_target()...3')
+            for image in self.images:
+                self.target = image.create_target()
+                if self.target:
+                    return self.target  # success
+            print('crashlog.create_target()...4')
+            print('error: Unable to locate any executables from the crash log.')
+            print('       Try loading the executable into lldb before running crashlog')
+            print('       and/or make sure the .dSYM bundles can be found by Spotlight.')
+        return self.target
+    def get_target(self):
+        return self.target
+class CrashLogParseMode:
+    NORMAL = 0
+    THREAD = 1
+    IMAGES = 2
+    THREGS = 3
+    SYSTEM = 4
+    INSTRS = 5
+class CrashLogParser:
+    parent_process_regex = re.compile('^Parent Process:\s*(.*)\[(\d+)\]')
+    thread_state_regex = re.compile('^Thread ([0-9]+) crashed with')
+    thread_instrs_regex = re.compile('^Thread ([0-9]+) instruction stream')
+    thread_regex = re.compile('^Thread ([0-9]+)([^:]*):(.*)')
+    app_backtrace_regex = re.compile('^Application Specific Backtrace ([0-9]+)([^:]*):(.*)')
+    version = r'(\(.+\)|(arm|x86_)[0-9a-z]+)\s+'
+    frame_regex = re.compile(r'^([0-9]+)' r'\s'                # id
+                             r'+(.+?)'    r'\s+'               # img_name
+                             r'(' +version+ r')?'              # img_version
+                             r'(0x[0-9a-fA-F]{7}[0-9a-fA-F]+)' # addr
+                             r' +(.*)'                         # offs
+                            )
+    null_frame_regex = re.compile(r'^([0-9]+)\s+\?\?\?\s+(0{7}0+) +(.*)')
+    image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)'            # img_lo
+                                  r'\s+' '-' r'\s+'              #   -
+                                  r'(0x[0-9a-fA-F]+)'     r'\s+' # img_hi
+                                  r'[+]?(.+?)'            r'\s+' # img_name
+                                  r'(' +version+ ')?'            # img_version
+                                  r'(<([-0-9a-fA-F]+)>\s+)?'     # img_uuid
+                                  r'(/.*)'                       # img_path
+                                 )
+    def __init__(self, path, verbose):
+        self.path = os.path.expanduser(path)
+        self.verbose = verbose
+        self.parse_mode = CrashLogParseMode.NORMAL
+        self.thread = None
+        self.app_specific_backtrace = False
+        self.crashlog = CrashLog(self.path, self.verbose)
+    def parse(self):
+        with open(self.path,'r') as f:
+            lines = f.read().splitlines()
+        for line in lines:
             line_len = len(line)
             if line_len == 0:
-                if thread:
-                    if parse_mode == CrashLogParseMode.THREAD:
-                        if thread.index == self.crashed_thread_idx:
-                            thread.reason = ''
-                            if self.thread_exception:
-                                thread.reason += self.thread_exception
-                            if self.thread_exception_data:
-                                thread.reason += " (%s)" % self.thread_exception_data
-                        if app_specific_backtrace:
-                            self.backtraces.append(thread)
+                if self.thread:
+                    if self.parse_mode == CrashLogParseMode.THREAD:
+                        if self.thread.index == self.crashlog.crashed_thread_idx:
+                            self.thread.reason = ''
+                            if self.crashlog.thread_exception:
+                                self.thread.reason += self.crashlog.thread_exception
+                            if self.crashlog.thread_exception_data:
+                                self.thread.reason += " (%s)" % self.crashlog.thread_exception_data
+                        if self.app_specific_backtrace:
+                            self.crashlog.backtraces.append(self.thread)
-                            self.threads.append(thread)
-                    thread = None
+                            self.crashlog.threads.append(self.thread)
+                    self.thread = None
                     # only append an extra empty line if the previous line
                     # in the info_lines wasn't empty
-                    if len(self.info_lines) > 0 and len(self.info_lines[-1]):
-                        self.info_lines.append(line)
-                parse_mode = CrashLogParseMode.NORMAL
-            elif parse_mode == CrashLogParseMode.NORMAL:
+                    if len(self.crashlog.info_lines) > 0 and len(self.crashlog.info_lines[-1]):
+                        self.crashlog.info_lines.append(line)
+                self.parse_mode = CrashLogParseMode.NORMAL
+            elif self.parse_mode == CrashLogParseMode.NORMAL:
                 if line.startswith('Process:'):
-                    (self.process_name, pid_with_brackets) = line[
+                    (self.crashlog.process_name, pid_with_brackets) = line[
                         8:].strip().split(' [')
-                    self.process_id = pid_with_brackets.strip('[]')
+                    self.crashlog.process_id = pid_with_brackets.strip('[]')
                 elif line.startswith('Path:'):
-                    self.process_path = line[5:].strip()
+                    self.crashlog.process_path = line[5:].strip()
                 elif line.startswith('Identifier:'):
-                    self.process_identifier = line[11:].strip()
+                    self.crashlog.process_identifier = line[11:].strip()
                 elif line.startswith('Version:'):
                     version_string = line[8:].strip()
                     matched_pair = re.search("(.+)\((.+)\)", version_string)
                     if matched_pair:
-                        self.process_version = matched_pair.group(1)
-                        self.process_compatability_version = matched_pair.group(
+                        self.crashlog.process_version = matched_pair.group(1)
+                        self.crashlog.process_compatability_version = matched_pair.group(
-                        self.process = version_string
-                        self.process_compatability_version = version_string
+                        self.crashlog.process = version_string
+                        self.crashlog.process_compatability_version = version_string
                 elif self.parent_process_regex.search(line):
                     parent_process_match = self.parent_process_regex.search(
-                    self.parent_process_name = parent_process_match.group(1)
-                    self.parent_process_id = parent_process_match.group(2)
+                    self.crashlog.parent_process_name = parent_process_match.group(1)
+                    self.crashlog.parent_process_id = parent_process_match.group(2)
                 elif line.startswith('Exception Type:'):
-                    self.thread_exception = line[15:].strip()
+                    self.crashlog.thread_exception = line[15:].strip()
                 elif line.startswith('Exception Codes:'):
-                    self.thread_exception_data = line[16:].strip()
+                    self.crashlog.thread_exception_data = line[16:].strip()
                 elif line.startswith('Exception Subtype:'): # iOS
-                    self.thread_exception_data = line[18:].strip()
+                    self.crashlog.thread_exception_data = line[18:].strip()
                 elif line.startswith('Crashed Thread:'):
-                    self.crashed_thread_idx = int(line[15:].strip().split()[0])
+                    self.crashlog.crashed_thread_idx = int(line[15:].strip().split()[0])
                 elif line.startswith('Triggered by Thread:'): # iOS
-                    self.crashed_thread_idx = int(line[20:].strip().split()[0])
+                    self.crashlog.crashed_thread_idx = int(line[20:].strip().split()[0])
                 elif line.startswith('Report Version:'):
-                    self.version = int(line[15:].strip())
+                    self.crashlog.version = int(line[15:].strip())
                 elif line.startswith('System Profile:'):
-                    parse_mode = CrashLogParseMode.SYSTEM
+                    self.parse_mode = CrashLogParseMode.SYSTEM
                 elif (line.startswith('Interval Since Last Report:') or
                       line.startswith('Crashes Since Last Report:') or
@@ -449,41 +501,41 @@ def __init__(self, path, verbose):
                 elif line.startswith('Thread'):
                     thread_state_match = self.thread_state_regex.search(line)
                     if thread_state_match:
-                        app_specific_backtrace = False
+                        self.app_specific_backtrace = False
                         thread_state_match = self.thread_regex.search(line)
                         thread_idx = int(thread_state_match.group(1))
-                        parse_mode = CrashLogParseMode.THREGS
-                        thread = self.threads[thread_idx]
+                        self.parse_mode = CrashLogParseMode.THREGS
+                        self.thread = self.crashlog.threads[thread_idx]
                     thread_insts_match  = self.thread_instrs_regex.search(line)
                     if thread_insts_match:
-                        parse_mode = CrashLogParseMode.INSTRS
+                        self.parse_mode = CrashLogParseMode.INSTRS
                     thread_match = self.thread_regex.search(line)
                     if thread_match:
-                        app_specific_backtrace = False
-                        parse_mode = CrashLogParseMode.THREAD
+                        self.app_specific_backtrace = False
+                        self.parse_mode = CrashLogParseMode.THREAD
                         thread_idx = int(thread_match.group(1))
-                        thread = CrashLog.Thread(thread_idx, False)
+                        self.thread = self.crashlog.Thread(thread_idx, False)
                 elif line.startswith('Binary Images:'):
-                    parse_mode = CrashLogParseMode.IMAGES
+                    self.parse_mode = CrashLogParseMode.IMAGES
                 elif line.startswith('Application Specific Backtrace'):
                     app_backtrace_match = self.app_backtrace_regex.search(line)
                     if app_backtrace_match:
-                        parse_mode = CrashLogParseMode.THREAD
-                        app_specific_backtrace = True
+                        self.parse_mode = CrashLogParseMode.THREAD
+                        self.app_specific_backtrace = True
                         idx = int(app_backtrace_match.group(1))
-                        thread = CrashLog.Thread(idx, True)
+                        self.thread = self.crashlog.Thread(idx, True)
                 elif line.startswith('Last Exception Backtrace:'): # iOS
-                    parse_mode = CrashLogParseMode.THREAD
-                    app_specific_backtrace = True
+                    self.parse_mode = CrashLogParseMode.THREAD
+                    self.app_specific_backtrace = True
                     idx = 1
-                    thread = CrashLog.Thread(idx, True)
-                self.info_lines.append(line.strip())
-            elif parse_mode == CrashLogParseMode.THREAD:
+                    self.thread = self.crashlog.Thread(idx, True)
+                self.crashlog.info_lines.append(line.strip())
+            elif self.parse_mode == CrashLogParseMode.THREAD:
                 if line.startswith('Thread'):
                 if self.null_frame_regex.search(line):
@@ -494,94 +546,43 @@ def __init__(self, path, verbose):
                     (frame_id, frame_img_name, _, frame_img_version, _,
                      frame_addr, frame_ofs) = frame_match.groups()
                     ident = frame_img_name
-                    thread.add_ident(ident)
-                    if ident not in self.idents:
-                        self.idents.append(ident)
-                    thread.frames.append(CrashLog.Frame(int(frame_id), int(
+                    self.thread.add_ident(ident)
+                    if ident not in self.crashlog.idents:
+                        self.crashlog.idents.append(ident)
+                    self.thread.frames.append(self.crashlog.Frame(int(frame_id), int(
                         frame_addr, 0), frame_ofs))
                     print('error: frame regex failed for line: "%s"' % line)
-            elif parse_mode == CrashLogParseMode.IMAGES:
+            elif self.parse_mode == CrashLogParseMode.IMAGES:
                 image_match = self.image_regex_uuid.search(line)
                 if image_match:
                     (img_lo, img_hi, img_name, _, img_version, _,
                      _, img_uuid, img_path) = image_match.groups()
-                    image = CrashLog.DarwinImage(int(img_lo, 0), int(img_hi, 0),
+                    image = self.crashlog.DarwinImage(int(img_lo, 0), int(img_hi, 0),
                                                  if img_version else "",
                                                  uuid.UUID(img_uuid), img_path,
-                    self.images.append(image)
+                    self.crashlog.images.append(image)
                     print("error: image regex failed for: %s" % line)
-            elif parse_mode == CrashLogParseMode.THREGS:
+            elif self.parse_mode == CrashLogParseMode.THREGS:
                 stripped_line = line.strip()
                 # "r12: 0x00007fff6b5939c8  r13: 0x0000000007000006  r14: 0x0000000000002a03  r15: 0x0000000000000c00"
                 reg_values = re.findall(
                     '([a-zA-Z0-9]+: 0[Xx][0-9a-fA-F]+) *', stripped_line)
                 for reg_value in reg_values:
                     (reg, value) = reg_value.split(': ')
-                    thread.registers[reg.strip()] = int(value, 0)
-            elif parse_mode == CrashLogParseMode.SYSTEM:
-                self.system_profile.append(line)
-            elif parse_mode == CrashLogParseMode.INSTRS:
+                    self.thread.registers[reg.strip()] = int(value, 0)
+            elif self.parse_mode == CrashLogParseMode.SYSTEM:
+                self.crashlog.system_profile.append(line)
+            elif self.parse_mode == CrashLogParseMode.INSTRS:
-        f.close()
-    def dump(self):
-        print("Crash Log File: %s" % (self.path))
-        if self.backtraces:
-            print("\nApplication Specific Backtraces:")
-            for thread in self.backtraces:
-                thread.dump('  ')
-        print("\nThreads:")
-        for thread in self.threads:
-            thread.dump('  ')
-        print("\nImages:")
-        for image in self.images:
-            image.dump('  ')
+        return self.crashlog
-    def find_image_with_identifier(self, identifier):
-        for image in self.images:
-            if image.identifier == identifier:
-                return image
-        regex_text = '^.*\.%s$' % (re.escape(identifier))
-        regex = re.compile(regex_text)
-        for image in self.images:
-            if regex.match(image.identifier):
-                return image
-        return None
-    def create_target(self):
-        if self.target is None:
-            self.target = symbolication.Symbolicator.create_target(self)
-            if self.target:
-                return self.target
-            # We weren't able to open the main executable as, but we can still
-            # symbolicate
-            print('crashlog.create_target()...2')
-            if self.idents:
-                for ident in self.idents:
-                    image = self.find_image_with_identifier(ident)
-                    if image:
-                        self.target = image.create_target()
-                        if self.target:
-                            return self.target  # success
-            print('crashlog.create_target()...3')
-            for image in self.images:
-                self.target = image.create_target()
-                if self.target:
-                    return self.target  # success
-            print('crashlog.create_target()...4')
-            print('error: Unable to locate any executables from the crash log.')
-            print('       Try loading the executable into lldb before running crashlog')
-            print('       and/or make sure the .dSYM bundles can be found by Spotlight.')
-        return self.target
-    def get_target(self):
-        return self.target
 def usage():
@@ -702,9 +703,10 @@ def interactive_crashlogs(options, args):
     crash_logs = list()
     for crash_log_file in crash_log_files:
-        crash_log = CrashLog(crash_log_file, options.verbose)
-        if crash_log.error:
-            print(crash_log.error)
+        try:
+            crash_log = CrashLogParser(crash_log_file, options.verbose).parse()
+        except Exception as e:
+            print(e)
         if options.debug:
@@ -836,9 +838,6 @@ def Symbolicate(debugger, command, result, dict):
 def SymbolicateCrashLog(crash_log, options):
-    if crash_log.error:
-        print(crash_log.error)
-        return
     if options.debug:
     if not crash_log.images:
@@ -1040,7 +1039,8 @@ def SymbolicateCrashLogs(command_args):
             interactive_crashlogs(options, args)
             for crash_log_file in args:
-                crash_log = CrashLog(crash_log_file, options.verbose)
+                crash_log_parser = CrashLogParser(crash_log_file, options.verbose)
+                crash_log = crash_log_parser.parse()
                 SymbolicateCrashLog(crash_log, options)
 if __name__ == '__main__':
     # Create a new debugger instance
@@ -1052,4 +1052,3 @@ def SymbolicateCrashLogs(command_args):
         'command script add -f lldb.macosx.crashlog.Symbolicate crashlog')
         'command script add -f lldb.macosx.crashlog.save_crashlog save_crashlog')
-    print('"crashlog" and "save_crashlog" command installed, use the "--help" option for detailed help')

diff  --git a/lldb/test/Shell/ScriptInterpreter/Python/crashlog.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/crashlog.test
similarity index 91%
rename from lldb/test/Shell/ScriptInterpreter/Python/crashlog.test
rename to lldb/test/Shell/ScriptInterpreter/Python/Crashlog/crashlog.test
index 293d34514fdc..6ac9392c87c1 100644
--- a/lldb/test/Shell/ScriptInterpreter/Python/crashlog.test
+++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/crashlog.test
@@ -1,12 +1,11 @@
 #                                                                 -*- python -*-
-# REQUIRES: system-darwin
-# UNSUPPORTED: lldb-repro
-# DEBUG: cd %S/../../../../examples/python && cat %s | %lldb && false
-# RUN: cd %S/../../../../examples/python && cat %s | %lldb | FileCheck %s
+# DEBUG: cd %S/../../../../../examples/python && cat %s | %lldb && false
+# RUN: cd %S/../../../../../examples/python && cat %s | %lldb | FileCheck %s
 import crashlog
-cl = crashlog.CrashLog
+crash_log_parser = crashlog.CrashLogParser
+crash_log = crashlog.CrashLog
 images = [
 "0x10b60b000 - 0x10f707fff com.apple.LLDB.framework (1.1000.11.38.2 - 1000.11.38.2) <96E36F5C-1A83-39A1-8713-5FDD9701C3F1> /Applications/Xcode.app/Contents/SharedFrameworks/LLDB.framework/LLDB",
 # CHECK: 0x10b60b000
@@ -120,7 +119,7 @@ print("SKIP BEYOND CHECKS")
 for image in images:
-    match = cl.image_regex_uuid.search(image)
+    match = crash_log_parser.image_regex_uuid.search(image)
     for group in match.groups():
@@ -128,7 +127,7 @@ print("FRAMES")
 for frame in frames:
-    match = cl.frame_regex.search(frame)
+    match = crash_log_parser.frame_regex.search(frame)
     for group in match.groups():

diff  --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive.test
new file mode 100644
index 000000000000..2690b7fa2122
--- /dev/null
+++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/interactive.test
@@ -0,0 +1,8 @@
+# RUN: echo "quit" | %S/../../../../../examples/python/crashlog.py -i %s 2>&1 | FileCheck %s
+# CHECK: 1 crash logs are loaded:
+# CHECK: [0] = {{.*}}interactive.test
+# CHECK: Interactive crashlogs prompt, type "help" to see a list of supported commands.
+Binary Images:
+       0x10ab87000 -        0x10abdafff +lldb (10.0.0) <87BD1384-BAE9-3625-A838-9D241CBAEF87> /Volumes/VOLUME/*/lldb
+       0x10ac45000 -        0x10ae94fff  com.apple.python3 (3.8.2 - 3.8.2) <20BC3FC4-CAAD-3002-ACDF-423A3188F24C> /Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/Python3

diff  --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/lit.local.cfg b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/lit.local.cfg
new file mode 100644
index 000000000000..417069653d68
--- /dev/null
+++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/lit.local.cfg
@@ -0,0 +1,5 @@
+if 'system-darwin' not in config.available_features:
+  config.unsupported = True
+if 'lldb-repro' in config.available_features:
+  config.unsupported = True


More information about the lldb-commits mailing list