[Lldb-commits] [PATCH] D131305: [lldb] Tidy some regex in crashlog.py (NFC)

Fri Aug 5 16:26:39 PDT 2022

kastiglione created this revision.
kastiglione added reviewers: JDevlieghere, mib.
Herald added a project: All.
kastiglione requested review of this revision.
Herald added a project: LLDB.
Herald added a subscriber: lldb-commits.

A spiritual follow up to D131032 <https://reviews.llvm.org/D131032>. I noticed some regex could be simplified.

This does some of the following:

1. Removes unused capture groups
2. Uses non-capturing `(?:...)` groups where grouping is needed but capturing isn't
3. Removes trailing `.*`
4. Uses `\d` over `[0-9]`
5. Uses raw strings
6. Uses `{N,}` to indicate N-or-more

Also improves the call site of a `re.findall`.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D131305

Files:
  lldb/examples/python/crashlog.py


Index: lldb/examples/python/crashlog.py
===================================================================

--- lldb/examples/python/crashlog.py
+++ lldb/examples/python/crashlog.py
@@ -592,26 +592,26 @@
 
 
 class TextCrashLogParser:
-    parent_process_regex = re.compile('^Parent Process:\s*(.*)\[(\d+)\]')
-    thread_state_regex = re.compile('^Thread ([0-9]+) crashed with')
-    thread_instrs_regex = re.compile('^Thread ([0-9]+) instruction stream')
-    thread_regex = re.compile('^Thread ([0-9]+)([^:]*):(.*)')
-    app_backtrace_regex = re.compile('^Application Specific Backtrace ([0-9]+)([^:]*):(.*)')
-    version = r'(\(.+\)|(arm|x86_)[0-9a-z]+)\s+'
-    frame_regex = re.compile(r'^([0-9]+)' r'\s'                # id
-                             r'+(.+?)'    r'\s+'               # img_name
-                             r'(' +version+ r')?'              # img_version
-                             r'(0x[0-9a-fA-F]{7}[0-9a-fA-F]+)' # addr
-                             r' +(.*)'                         # offs
+    parent_process_regex = re.compile(r'^Parent Process:\s*(.*)\[(\d+)\]')
+    thread_state_regex = re.compile(r'^Thread \d+ crashed with')
+    thread_instrs_regex = re.compile(r'^Thread \d+ instruction stream')
+    thread_regex = re.compile(r'^Thread (\d+).*:')
+    app_backtrace_regex = re.compile(r'^Application Specific Backtrace (\d+).*:')
+    version = r'\(.+\)|(?:arm|x86_)[0-9a-z]+'
+    frame_regex = re.compile(r'^(\d+)\s+'              # id
+                             r'(.+?)\s+'               # img_name
+                             r'(?:' +version+ r'\s+)?' # img_version
+                             r'(0x[0-9a-fA-F]{7,})'    # addr
+                             r' +(.*)'                 # offs
                             )
-    null_frame_regex = re.compile(r'^([0-9]+)\s+\?\?\?\s+(0{7}0+) +(.*)')
-    image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)'            # img_lo
-                                  r'\s+' '-' r'\s+'              #   -
-                                  r'(0x[0-9a-fA-F]+)'     r'\s+' # img_hi
-                                  r'[+]?(.+?)'            r'\s+' # img_name
-                                  r'(' +version+ ')?'            # img_version
-                                  r'(<([-0-9a-fA-F]+)>\s+)?'     # img_uuid
-                                  r'(/.*)'                       # img_path
+    null_frame_regex = re.compile(r'^\d+\s+\?\?\?\s+0{7,} +')
+    image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)'          # img_lo
+                                  r'\s+-\s+'                   #   -
+                                  r'(0x[0-9a-fA-F]+)\s+'       # img_hi
+                                  r'[+]?(.+?)\s+'              # img_name
+                                  r'(?:(' +version+ r')\s+)?'  # img_version
+                                  r'(?:<([-0-9a-fA-F]+)>\s+)?' # img_uuid
+                                  r'(/.*)'                     # img_path
                                  )
 
 
@@ -762,8 +762,8 @@
             return
         frame_match = self.frame_regex.search(line)
         if frame_match:
-            (frame_id, frame_img_name, _, frame_img_version, _,
-                frame_addr, frame_ofs) = frame_match.groups()
+            (frame_id, frame_img_name, frame_addr,
+                frame_ofs) = frame_match.groups()
             ident = frame_img_name
             self.thread.add_ident(ident)
             if ident not in self.crashlog.idents:
@@ -776,8 +776,8 @@
     def parse_images(self, line):
         image_match = self.image_regex_uuid.search(line)
         if image_match:
-            (img_lo, img_hi, img_name, _, img_version, _,
-                _, img_uuid, img_path) = image_match.groups()
+            (img_lo, img_hi, img_name, img_version,
+                img_uuid, img_path) = image_match.groups()
             image = self.crashlog.DarwinImage(int(img_lo, 0), int(img_hi, 0),
                                             img_name.strip(),
                                             img_version.strip()
@@ -790,13 +790,10 @@
 
 
     def parse_thread_registers(self, line):
-        stripped_line = line.strip()
         # "r12: 0x00007fff6b5939c8  r13: 0x0000000007000006  r14: 0x0000000000002a03  r15: 0x0000000000000c00"
-        reg_values = re.findall(
-            '([a-zA-Z0-9]+: 0[Xx][0-9a-fA-F]+) *', stripped_line)
-        for reg_value in reg_values:
-            (reg, value) = reg_value.split(': ')
-            self.thread.registers[reg.strip()] = int(value, 0)
+        reg_values = re.findall('([a-z0-9]+): (0x[0-9a-f]+)', line, re.I)
+        for reg, value in reg_values:
+            self.thread.registers[reg] = int(value, 16)
 
     def parse_system(self, line):
         self.crashlog.system_profile.append(line)


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D131305.450446.patch
Type: text/x-patch
Size: 4801 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/lldb-commits/attachments/20220805/409d1dbe/attachment.bin>