[Lldb-commits] [lldb] 8f75c4d - [lldb/crashlog] Make TextCrashLogParser more resilient to new lines

Sat Aug 12 00:00:18 PDT 2023

Author: Med Ismail Bennani
Date: 2023-08-11T23:59:42-07:00
New Revision: 8f75c4d01eff3c65d7ae40bfd05582de7dffa590

URL: https://github.com/llvm/llvm-project/commit/8f75c4d01eff3c65d7ae40bfd05582de7dffa590
DIFF: https://github.com/llvm/llvm-project/commit/8f75c4d01eff3c65d7ae40bfd05582de7dffa590.diff

LOG: [lldb/crashlog] Make TextCrashLogParser more resilient to new lines

This patch changes the parsing logic for the legacy crash report format
to avoid interrupting the parsing if there are new lines in the middle
of a section.

To do, the parser starts to skip all consecutive empty lines. If the
number of lines skipped is greater than 1, the parser considers that it
reached a new setion of the report and should reset the parsing mode to
back to normal.

Otherwise, it tries to parse the next line in the current parsing mode.
If it succeeds, the parser will also skip that line since it has already
been parsed and continue the parsing.

rdar://107022595

Differential Revision: https://reviews.llvm.org/D157043

Signed-off-by: Med Ismail Bennani <ismail at bennani.ma>

Added: 
    

Modified: 
    lldb/examples/python/crashlog.py

Removed: 
    


################################################################################
diff  --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py
index ccf3fb1aa9146a..7f1a43b435689f 100755

--- a/lldb/examples/python/crashlog.py
+++ b/lldb/examples/python/crashlog.py
@@ -537,21 +537,21 @@ class InteractiveCrashLogException(Exception):
 
 class CrashLogParser:
     @staticmethod
-    def create(debugger, path, verbose):
+    def create(debugger, path, options):
         data = JSONCrashLogParser.is_valid_json(path)
         if data:
-            parser = JSONCrashLogParser(debugger, path, verbose)
+            parser = JSONCrashLogParser(debugger, path, options)
             parser.data = data
             return parser
         else:
-            return TextCrashLogParser(debugger, path, verbose)
+            return TextCrashLogParser(debugger, path, options)
 
-    def __init__(self, debugger, path, verbose):
+    def __init__(self, debugger, path, options):
         self.path = os.path.expanduser(path)
-        self.verbose = verbose
+        self.options = options
         # List of DarwinImages sorted by their index.
         self.images = list()
-        self.crashlog = CrashLog(debugger, self.path, self.verbose)
+        self.crashlog = CrashLog(debugger, self.path, self.options.verbose)
 
     @abc.abstractmethod
     def parse(self):
@@ -577,8 +577,8 @@ def parse_json(buffer):
         except:
             return None
 
-    def __init__(self, debugger, path, verbose):
-        super().__init__(debugger, path, verbose)
+    def __init__(self, debugger, path, options):
+        super().__init__(debugger, path, options)
 
     def parse(self):
         try:
@@ -639,7 +639,7 @@ def parse_images(self, json_images):
             path = json_image["path"] if "path" in json_image else ""
             version = ""
             darwin_image = self.crashlog.DarwinImage(
-                low, high, name, version, img_uuid, path, self.verbose
+                low, high, name, version, img_uuid, path, self.options.verbose
             )
             if "arch" in json_image:
                 darwin_image.arch = json_image["arch"]
@@ -898,8 +898,8 @@ def get(cls):
     )
     exception_extra_regex = re.compile(r"^Exception\s+.*:\s+(.*)")
 
-    def __init__(self, debugger, path, verbose):
-        super().__init__(debugger, path, verbose)
+    def __init__(self, debugger, path, options):
+        super().__init__(debugger, path, options)
         self.thread = None
         self.app_specific_backtrace = False
         self.parse_mode = CrashLogParseMode.NORMAL
@@ -917,8 +917,15 @@ def parse(self):
         with open(self.path, "r", encoding="utf-8") as f:
             lines = f.read().splitlines()
 
-        for line in lines:
+        idx = 0
+        lines_count = len(lines)
+        while True:
+            if idx >= lines_count:
+                break
+
+            line = lines[idx]
             line_len = len(line)
+
             if line_len == 0:
                 if self.thread:
                     if self.parse_mode == CrashLogParseMode.THREAD:
@@ -935,22 +942,36 @@ def parse(self):
                         else:
                             self.crashlog.threads.append(self.thread)
                     self.thread = None
-                else:
-                    # only append an extra empty line if the previous line
-                    # in the info_lines wasn't empty
-                    if len(self.crashlog.info_lines) > 0 and len(
-                        self.crashlog.info_lines[-1]
-                    ):
-                        self.crashlog.info_lines.append(line)
+
+                empty_lines = 1
+                while (
+                    idx + empty_lines < lines_count
+                    and len(lines[idx + empty_lines]) == 0
+                ):
+                    empty_lines = empty_lines + 1
+
+                if (
+                    empty_lines == 1
+                    and idx + empty_lines < lines_count - 1
+                    and self.parse_mode != CrashLogParseMode.NORMAL
+                ):
+                    # check if next line can be parsed with the current parse mode
+                    next_line_idx = idx + empty_lines
+                    if self.parsers[self.parse_mode](lines[next_line_idx]):
+                        # If that suceeded, skip the empty line and the next line.
+                        idx = next_line_idx + 1
+                        continue
                 self.parse_mode = CrashLogParseMode.NORMAL
-            else:
-                self.parsers[self.parse_mode](line)
+
+            self.parsers[self.parse_mode](line)
+
+            idx = idx + 1
 
         return self.crashlog
 
     def parse_exception(self, line):
         if not line.startswith("Exception"):
-            return
+            return False
         if line.startswith("Exception Type:"):
             self.crashlog.thread_exception = line[15:].strip()
             exception_type_match = self.exception_type_regex.search(line)
@@ -968,7 +989,7 @@ def parse_exception(self, line):
         elif line.startswith("Exception Codes:"):
             self.crashlog.thread_exception_data = line[16:].strip()
             if "type" not in self.crashlog.exception:
-                return
+                return False
             exception_codes_match = self.exception_codes_regex.search(line)
             if exception_codes_match:
                 self.crashlog.exception["codes"] = self.crashlog.thread_exception_data
@@ -979,10 +1000,11 @@ def parse_exception(self, line):
                 ]
         else:
             if "type" not in self.crashlog.exception:
-                return
+                return False
             exception_extra_match = self.exception_extra_regex.search(line)
             if exception_extra_match:
                 self.crashlog.exception["message"] = exception_extra_match.group(1)
+        return True
 
     def parse_normal(self, line):
         if line.startswith("Process:"):
@@ -1081,14 +1103,14 @@ def parse_normal(self, line):
 
     def parse_thread(self, line):
         if line.startswith("Thread"):
-            return
+            return False
         if self.null_frame_regex.search(line):
             print('warning: thread parser ignored null-frame: "%s"' % line)
-            return
+            return False
         frame_match = self.frame_regex.search(line)
         if not frame_match:
             print('error: frame regex failed for line: "%s"' % line)
-            return
+            return False
 
         frame_id = (
             frame_img_name
@@ -1155,6 +1177,8 @@ def parse_thread(self, line):
             self.crashlog.Frame(int(frame_id), int(frame_addr, 0), description)
         )
 
+        return True
+
     def parse_images(self, line):
         image_match = self.image_regex_uuid.search(line)
         if image_match:
@@ -1174,7 +1198,7 @@ def parse_images(self, line):
                 img_version.strip() if img_version else "",
                 uuid.UUID(img_uuid),
                 img_path,
-                self.verbose,
+                self.options.verbose,
             )
             unqualified_img_name = os.path.basename(img_path)
             if unqualified_img_name in self.symbols:
@@ -1188,17 +1212,22 @@ def parse_images(self, line):
 
             self.images.append(image)
             self.crashlog.images.append(image)
+            return True
         else:
-            print("error: image regex failed for: %s" % line)
+            if self.options.debug:
+                print("error: image regex failed for: %s" % line)
+            return False
 
     def parse_thread_registers(self, line):
         # "r12: 0x00007fff6b5939c8  r13: 0x0000000007000006  r14: 0x0000000000002a03  r15: 0x0000000000000c00"
         reg_values = re.findall("([a-z0-9]+): (0x[0-9a-f]+)", line, re.I)
         for reg, value in reg_values:
             self.thread.registers[reg] = int(value, 16)
+        return len(reg_values) != 0
 
     def parse_system(self, line):
         self.crashlog.system_profile.append(line)
+        return True
 
     def parse_instructions(self, line):
         pass
@@ -1412,7 +1441,7 @@ def add_module(image, target, obj_dir):
 
 
 def load_crashlog_in_scripted_process(debugger, crashlog_path, options, result):
-    crashlog = CrashLogParser.create(debugger, crashlog_path, False).parse()
+    crashlog = CrashLogParser.create(debugger, crashlog_path, options).parse()
 
     target = lldb.SBTarget()
     # 1. Try to use the user-provided target
@@ -1735,7 +1764,7 @@ def should_run_in_interactive_mode(options, ci):
                     result.SetError(str(e))
             else:
                 crash_log = CrashLogParser.create(
-                    debugger, crashlog_path, options.verbose
+                    debugger, crashlog_path, options
                 ).parse()
                 SymbolicateCrashLog(crash_log, options)