[Lldb-commits] [lldb] 28d0c0c - [lldb] Tidy some regex in crashlog.py (NFC)

Dave Lee via lldb-commits lldb-commits at lists.llvm.org
Thu Aug 11 15:25:13 PDT 2022


Author: Dave Lee
Date: 2022-08-11T15:24:57-07:00
New Revision: 28d0c0c2c8e88d2b6599ee34c84eb58eed43a626

URL: https://github.com/llvm/llvm-project/commit/28d0c0c2c8e88d2b6599ee34c84eb58eed43a626
DIFF: https://github.com/llvm/llvm-project/commit/28d0c0c2c8e88d2b6599ee34c84eb58eed43a626.diff

LOG: [lldb] Tidy some regex in crashlog.py (NFC)

A spiritual follow up to D131032. I noticed some regex could be simplified.

This does some of the following:
1. Removes unused capture groups
2. Uses non-capturing `(?:...)` groups where grouping is needed but capturing isn't
3. Removes trailing `.*`
4. Uses `\d` over `[0-9]`
5. Uses raw strings
6. Uses `{N,}` to indicate N-or-more

Also improves the call site of a `re.findall`.

Differential Revision: https://reviews.llvm.org/D131305

Added: 
    

Modified: 
    lldb/examples/python/crashlog.py
    lldb/test/Shell/ScriptInterpreter/Python/Crashlog/parser_text.test

Removed: 
    


################################################################################
diff  --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py
index 33f4c4f3b3680..ad0d70a669a85 100755
--- a/lldb/examples/python/crashlog.py
+++ b/lldb/examples/python/crashlog.py
@@ -603,26 +603,26 @@ class CrashLogParseMode:
 
 
 class TextCrashLogParser(CrashLogParser):
-    parent_process_regex = re.compile('^Parent Process:\s*(.*)\[(\d+)\]')
-    thread_state_regex = re.compile('^Thread ([0-9]+) crashed with')
-    thread_instrs_regex = re.compile('^Thread ([0-9]+) instruction stream')
-    thread_regex = re.compile('^Thread ([0-9]+)([^:]*):(.*)')
-    app_backtrace_regex = re.compile('^Application Specific Backtrace ([0-9]+)([^:]*):(.*)')
-    version = r'(\(.+\)|(arm|x86_)[0-9a-z]+)\s+'
-    frame_regex = re.compile(r'^([0-9]+)' r'\s+'                # id
-                             r'(.+?)' r'\s+'                    # img_name
-                             r'(' +version+ r')?'               # img_version
-                             r'(0x[0-9a-fA-F]{7,})'             # addr (7 chars or more)
-                             r' +(.*)'                          # offs
+    parent_process_regex = re.compile(r'^Parent Process:\s*(.*)\[(\d+)\]')
+    thread_state_regex = re.compile(r'^Thread \d+ crashed with')
+    thread_instrs_regex = re.compile(r'^Thread \d+ instruction stream')
+    thread_regex = re.compile(r'^Thread (\d+).*:')
+    app_backtrace_regex = re.compile(r'^Application Specific Backtrace (\d+).*:')
+    version = r'\(.+\)|(?:arm|x86_)[0-9a-z]+'
+    frame_regex = re.compile(r'^(\d+)\s+'              # id
+                             r'(.+?)\s+'               # img_name
+                             r'(?:' +version+ r'\s+)?' # img_version
+                             r'(0x[0-9a-fA-F]{7,})'    # addr (7 chars or more)
+                             r' +(.*)'                 # offs
                             )
-    null_frame_regex = re.compile(r'^([0-9]+)\s+\?\?\?\s+(0{7}0+) +(.*)')
-    image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)'            # img_lo
-                                  r'\s+' '-' r'\s+'              #   -
-                                  r'(0x[0-9a-fA-F]+)'     r'\s+' # img_hi
-                                  r'[+]?(.+?)'            r'\s+' # img_name
-                                  r'(' +version+ ')?'            # img_version
-                                  r'(<([-0-9a-fA-F]+)>\s+)?'     # img_uuid
-                                  r'(/.*)'                       # img_path
+    null_frame_regex = re.compile(r'^\d+\s+\?\?\?\s+0{7,} +')
+    image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)'          # img_lo
+                                  r'\s+-\s+'                   #   -
+                                  r'(0x[0-9a-fA-F]+)\s+'       # img_hi
+                                  r'[+]?(.+?)\s+'              # img_name
+                                  r'(?:(' +version+ r')\s+)?'  # img_version
+                                  r'(?:<([-0-9a-fA-F]+)>\s+)?' # img_uuid
+                                  r'(/.*)'                     # img_path
                                  )
 
     def __init__(self, debugger, path, verbose):
@@ -768,8 +768,8 @@ def parse_thread(self, line):
             return
         frame_match = self.frame_regex.search(line)
         if frame_match:
-            (frame_id, frame_img_name, _, frame_img_version, _,
-                frame_addr, frame_ofs) = frame_match.groups()
+            (frame_id, frame_img_name, frame_addr,
+                frame_ofs) = frame_match.groups()
             ident = frame_img_name
             self.thread.add_ident(ident)
             if ident not in self.crashlog.idents:
@@ -782,8 +782,8 @@ def parse_thread(self, line):
     def parse_images(self, line):
         image_match = self.image_regex_uuid.search(line)
         if image_match:
-            (img_lo, img_hi, img_name, _, img_version, _,
-                _, img_uuid, img_path) = image_match.groups()
+            (img_lo, img_hi, img_name, img_version,
+                img_uuid, img_path) = image_match.groups()
             image = self.crashlog.DarwinImage(int(img_lo, 0), int(img_hi, 0),
                                             img_name.strip(),
                                             img_version.strip()
@@ -796,13 +796,10 @@ def parse_images(self, line):
 
 
     def parse_thread_registers(self, line):
-        stripped_line = line.strip()
         # "r12: 0x00007fff6b5939c8  r13: 0x0000000007000006  r14: 0x0000000000002a03  r15: 0x0000000000000c00"
-        reg_values = re.findall(
-            '([a-zA-Z0-9]+: 0[Xx][0-9a-fA-F]+) *', stripped_line)
-        for reg_value in reg_values:
-            (reg, value) = reg_value.split(': ')
-            self.thread.registers[reg.strip()] = int(value, 0)
+        reg_values = re.findall('([a-z0-9]+): (0x[0-9a-f]+)', line, re.I)
+        for reg, value in reg_values:
+            self.thread.registers[reg] = int(value, 16)
 
     def parse_system(self, line):
         self.crashlog.system_profile.append(line)

diff  --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/parser_text.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/parser_text.test
index 7251d8541be10..af3f9ad48662b 100644
--- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/parser_text.test
+++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/parser_text.test
@@ -52,7 +52,6 @@ images = [
 # CHECK: MyFramework Plus.dylib
 # CHECK: ({{.*}}
 # CHECK: 1.11 - MyFramework 1.11
-# CHECK: <{{.*}}
 # CHECK: 01234
 # CHECK: /tmp/MyFramework Plus.dylib
 
@@ -62,7 +61,6 @@ images = [
 # CHECK: MyFramework-dev.dylib
 # CHECK: ({{.*}}
 # CHECK: 1.0.0svn - 1.0.0svn
-# CHECK: <{{.*}}
 # CHECK: 01234
 # CHECK: /MyFramework-dev.dylib
 
@@ -73,7 +71,6 @@ images = [
 # CHECK: ({{.*}}
 # CHECK: 400.9.4
 # CHECK: None
-# CHECK: None
 # CHECK: /usr/lib/libc++.1.dylib
 
 "0x1047b8000 - 0x10481ffff dyld arm64e  <cfa789d10da63f9a8996daf84ed9d04f> /usr/lib/dyld"
@@ -82,7 +79,6 @@ images = [
 # CHECK: dyld
 # CHECK: {{.*}}
 # CHECK: arm64e
-# CHECK: <{{.*}}
 # CHECK: cfa789d10da63f9a8996daf84ed9d04f
 # CHECK: /usr/lib/dyld
 ]
@@ -101,9 +97,6 @@ frames = [
 "2   MyApp Pro arm64    	0x000000019b0db3a8 foo + 72",
 # CHECK: 2
 # CHECK: MyApp Pro
-# CHECK: a
-# CHECK: arm64
-# CHECK: a
 # CHECK: 0x000000019b0db3a8
 # CHECK: foo + 72
 "3   He 0x1    	0x000000019b0db3a8 foo + 72"


        


More information about the lldb-commits mailing list