[Lldb-commits] [lldb] r154602 - in /lldb/trunk/examples/darwin/heap_find: heap.py heap_find.cpp

Greg Clayton gclayton at apple.com
Thu Apr 12 11:57:36 PDT 2012


Author: gclayton
Date: Thu Apr 12 13:57:36 2012
New Revision: 154602

URL: http://llvm.org/viewvc/llvm-project?rev=154602&view=rev
Log:
A few tweaks done to the heap.py in me free time where we now have:

(lldb) command script import heap.py

Find all malloc blocks that contains a pointer value of 0x1234000:
(lldb) ptr_refs 0x1234000

Find all malloc blocks that contain a C string:
(lldb) cstr_refs "hello"

Get info on a malloc block that starts at or contains 0x12340000
(lldb) malloc_info 0x12340000


Modified:
    lldb/trunk/examples/darwin/heap_find/heap.py
    lldb/trunk/examples/darwin/heap_find/heap_find.cpp

Modified: lldb/trunk/examples/darwin/heap_find/heap.py
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/examples/darwin/heap_find/heap.py?rev=154602&r1=154601&r2=154602&view=diff
==============================================================================
--- lldb/trunk/examples/darwin/heap_find/heap.py (original)
+++ lldb/trunk/examples/darwin/heap_find/heap.py Thu Apr 12 13:57:36 2012
@@ -23,11 +23,16 @@
 
 def heap_search(options, arg_str):
     expr = None
+    arg_str_description = arg_str
     if options.type == 'pointer':
-        ptr = int(arg_str, 0)
-        expr = 'find_pointer_in_heap(0x%x)' % ptr
+        expr = 'find_pointer_in_heap(%s)' % arg_str
+        arg_str_description = 'malloc block containing pointer %s' % arg_str
     elif options.type == 'cstr':
         expr = 'find_cstring_in_heap("%s")' % arg_str
+        arg_str_description = 'malloc block containing "%s"' % arg_str
+    elif options.type == 'addr':
+        expr = 'find_block_for_address(%s)' % arg_str
+        arg_str_description = 'malloc block for %s' % arg_str
     else:
         print 'error: invalid type "%s"\nvalid values are "pointer", "cstr"' % options.type
         return
@@ -48,73 +53,81 @@
                 # If the type is still 'void *' then we weren't able to figure
                 # out a dynamic type for the malloc_addr
                 type_name = dynamic_value.type.name
+                description = '[%u] %s: addr = 0x%x' % (i, arg_str_description, malloc_addr)
+                if offset != 0:
+                    description += ' + %u' % (offset)
+                description += ', size = %u' % (malloc_size)
                 if type_name == 'void *':
                     if options.type == 'pointer' and malloc_size == 4096:
                         error = lldb.SBError()
                         data = bytearray(lldb.process.ReadMemory(malloc_addr, 16, error))
                         if data == '\xa1\xa1\xa1\xa1AUTORELEASE!':
-                            print 'found %s %s: block = 0x%x, size = %u, offset = %u, type = (autorelease object pool)' % (options.type, arg_str, malloc_addr, malloc_size, offset)
+                            description += ', type = (AUTORELEASE!)'
+                            print description
                             continue
-                
-                print 'found %s %s: block = 0x%x, size = %u, offset = %u, type = \'%s\'' % (options.type, arg_str, malloc_addr, malloc_size, offset, type_name),
-                derefed_dynamic_value = dynamic_value.deref
-                ivar_member = None
-                if derefed_dynamic_value:
-                    derefed_dynamic_type = derefed_dynamic_value.type
-                    member = derefed_dynamic_type.GetFieldAtIndex(0)
-                    search_bases = False
-                    if member:
-                        if member.GetOffsetInBytes() <= offset:
-                            for field_idx in range (derefed_dynamic_type.GetNumberOfFields()):
-                                member = derefed_dynamic_type.GetFieldAtIndex(field_idx)
-                                member_byte_offset = member.GetOffsetInBytes()
-                                if member_byte_offset == offset:
-                                    ivar_member = member
-                                    break
+                else:
+                    description += ', type = %s' % (type_name)
+                    derefed_dynamic_value = dynamic_value.deref
+                    ivar_member = None
+                    if derefed_dynamic_value:
+                        derefed_dynamic_type = derefed_dynamic_value.type
+                        member = derefed_dynamic_type.GetFieldAtIndex(0)
+                        search_bases = False
+                        if member:
+                            if member.GetOffsetInBytes() <= offset:
+                                for field_idx in range (derefed_dynamic_type.GetNumberOfFields()):
+                                    member = derefed_dynamic_type.GetFieldAtIndex(field_idx)
+                                    member_byte_offset = member.GetOffsetInBytes()
+                                    if member_byte_offset == offset:
+                                        ivar_member = member
+                                        break
+                            else:
+                                search_bases = True
                         else:
                             search_bases = True
-                    else:
-                        search_bases = True
 
-                    if not ivar_member and search_bases:
-                        for field_idx in range (derefed_dynamic_type.GetNumberOfDirectBaseClasses()):
-                            member = derefed_dynamic_type.GetDirectBaseClassAtIndex(field_idx)
-                            member_byte_offset = member.GetOffsetInBytes()
-                            if member_byte_offset == offset:
-                                ivar_member = member
-                                break
-                        if not ivar_member:
-                            for field_idx in range (derefed_dynamic_type.GetNumberOfVirtualBaseClasses()):
-                                member = derefed_dynamic_type.GetVirtualBaseClassAtIndex(field_idx)
+                        if not ivar_member and search_bases:
+                            for field_idx in range (derefed_dynamic_type.GetNumberOfDirectBaseClasses()):
+                                member = derefed_dynamic_type.GetDirectBaseClassAtIndex(field_idx)
                                 member_byte_offset = member.GetOffsetInBytes()
                                 if member_byte_offset == offset:
                                     ivar_member = member
                                     break
+                            if not ivar_member:
+                                for field_idx in range (derefed_dynamic_type.GetNumberOfVirtualBaseClasses()):
+                                    member = derefed_dynamic_type.GetVirtualBaseClassAtIndex(field_idx)
+                                    member_byte_offset = member.GetOffsetInBytes()
+                                    if member_byte_offset == offset:
+                                        ivar_member = member
+                                        break
                     if ivar_member:
-                        print ", ivar = %s" % ivar_member.name,
-                    print "\n", dynamic_value.deref
-                else:
-                    print
-                if options.print_object_description:
-                    desc = dynamic_value.GetObjectDescription()
-                    if desc:
-                        print '  (%s) 0x%x %s\n' % (type_name, malloc_addr, desc)
+                        description +=', ivar = %s' % (ivar_member.name)
+
+                    print description
+                    if derefed_dynamic_value:
+                        print derefed_dynamic_value
+                    if options.print_object_description:
+                        desc = dynamic_value.GetObjectDescription()
+                        if desc:
+                            print '  (%s) 0x%x %s\n' % (type_name, malloc_addr, desc)
         else:
             print '%s %s was not found in any malloc blocks' % (options.type, arg_str)
     else:
-        print expr_sbvalue.error        
+        print expr_sbvalue.error
+    print     
     
-def heap_ptr_refs(debugger, command, result, dict):
+def ptr_refs(debugger, command, result, dict):
     command_args = shlex.split(command)
-    usage = "usage: %prog [options] <PATH> [PATH ...]"
+    usage = "usage: %prog [options] <PTR> [PTR ...]"
     description='''Searches the heap for pointer references on darwin user space programs. 
     
     Any matches that were found will dump the malloc blocks that contain the pointers 
     and might be able to print what kind of objects the pointers are contained in using 
-    dynamic type information from the program.'''
-    parser = optparse.OptionParser(description=description, prog='heap_ptr_refs',usage=usage)
+    dynamic type information in the program.'''
+    parser = optparse.OptionParser(description=description, prog='ptr_refs',usage=usage)
     parser.add_option('-v', '--verbose', action='store_true', dest='verbose', help='display verbose debug info', default=False)
     parser.add_option('-o', '--po', action='store_true', dest='print_object_description', help='print the object descriptions for any matches', default=False)
+    parser.add_option('-m', '--memory', action='store_true', dest='show_memory', help='dump the memory for each matching block', default=False)
     try:
         (options, args) = parser.parse_args(command_args)
     except:
@@ -129,17 +142,18 @@
     else:
         print 'error: no pointer arguments were given'
 
-def heap_cstr_refs(debugger, command, result, dict):
+def cstr_refs(debugger, command, result, dict):
     command_args = shlex.split(command)
-    usage = "usage: %prog [options] <PATH> [PATH ...]"
+    usage = "usage: %prog [options] <CSTR> [CSTR ...]"
     description='''Searches the heap for C string references on darwin user space programs. 
     
     Any matches that were found will dump the malloc blocks that contain the C strings 
     and might be able to print what kind of objects the pointers are contained in using 
-    dynamic type information from the program.'''
-    parser = optparse.OptionParser(description=description, prog='heap_cstr_refs',usage=usage)
+    dynamic type information in the program.'''
+    parser = optparse.OptionParser(description=description, prog='cstr_refs',usage=usage)
     parser.add_option('-v', '--verbose', action='store_true', dest='verbose', help='display verbose debug info', default=False)
     parser.add_option('-o', '--po', action='store_true', dest='print_object_description', help='print the object descriptions for any matches', default=False)
+    parser.add_option('-m', '--memory', action='store_true', dest='show_memory', help='dump the memory for each matching block', default=False)
     try:
         (options, args) = parser.parse_args(command_args)
     except:
@@ -154,14 +168,41 @@
     else:
         print 'error: no c string arguments were given to search for'
 
+def malloc_info(debugger, command, result, dict):
+    command_args = shlex.split(command)
+    usage = "usage: %prog [options] <ADDR> [ADDR ...]"
+    description='''Searches the heap a malloc block that contains the addresses specified as arguments. 
+
+    Any matches that were found will dump the malloc blocks that match or contain
+    the specified address. The matching blocks might be able to show what kind 
+    of objects they are using dynamic type information in the program.'''
+    parser = optparse.OptionParser(description=description, prog='cstr_refs',usage=usage)
+    parser.add_option('-v', '--verbose', action='store_true', dest='verbose', help='display verbose debug info', default=False)
+    parser.add_option('-o', '--po', action='store_true', dest='print_object_description', help='print the object descriptions for any matches', default=False)
+    parser.add_option('-m', '--memory', action='store_true', dest='show_memory', help='dump the memory for each matching block', default=False)
+    try:
+        (options, args) = parser.parse_args(command_args)
+    except:
+        return
+
+    options.type = 'addr'
+
+    if args:
+
+        for data in args:
+            heap_search (options, data)
+    else:
+        print 'error: no c string arguments were given to search for'
+
 def __lldb_init_module (debugger, dict):
     # This initializer is being run from LLDB in the embedded command interpreter
     # Add any commands contained in this module to LLDB
     libheap_dylib_path = os.path.dirname(__file__) + '/libheap.dylib'
     debugger.HandleCommand('process load "%s"' % libheap_dylib_path)
-    debugger.HandleCommand('command script add -f heap.heap_ptr_refs heap_ptr_refs')
-    debugger.HandleCommand('command script add -f heap.heap_cstr_refs heap_cstr_refs')
-    print '"heap_ptr_refs" and "heap_cstr_refs" commands have been installed, use the "--help" options on these commands for detailed help.'
+    debugger.HandleCommand('command script add -f heap.ptr_refs ptr_refs')
+    debugger.HandleCommand('command script add -f heap.cstr_refs cstr_refs')
+    debugger.HandleCommand('command script add -f heap.malloc_info malloc_info')
+    print '"ptr_refs", "cstr_refs", and "malloc_info" commands have been installed, use the "--help" options on these commands for detailed help.'
 
 
 

Modified: lldb/trunk/examples/darwin/heap_find/heap_find.cpp
URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/examples/darwin/heap_find/heap_find.cpp?rev=154602&r1=154601&r2=154602&view=diff
==============================================================================
--- lldb/trunk/examples/darwin/heap_find/heap_find.cpp (original)
+++ lldb/trunk/examples/darwin/heap_find/heap_find.cpp Thu Apr 12 13:57:36 2012
@@ -73,8 +73,6 @@
 #include <stdlib.h>
 #include <vector>
 
-struct range_callback_info_t;
-
 typedef void range_callback_t (task_t task, void *baton, unsigned type, uint64_t ptr_addr, uint64_t ptr_size);
 typedef void zone_callback_t (void *info, const malloc_zone_t *zone);
 
@@ -87,18 +85,28 @@
 
 enum data_type_t
 {
-    eDataTypeBytes,
-    eDataTypeCStr,
-    eDataTypeInteger
+    eDataTypeAddress,
+    eDataTypeContainsData
+};
+
+struct aligned_data_t
+{
+    const uint8_t *buffer;
+    uint32_t size;
+    uint32_t align;
 };
 
 struct range_contains_data_callback_info_t
 {
-    const uint8_t *data;
-    const size_t data_len;
-    const uint32_t align;
-    const data_type_t data_type;
+    data_type_t type;
+    const void *lookup_addr;
+    union
+    {
+        uintptr_t addr;
+        aligned_data_t data;
+    };
     uint32_t match_count;
+    bool done;
 };
 
 struct malloc_match
@@ -109,7 +117,15 @@
 };
 
 std::vector<malloc_match> g_matches;
+const void *g_lookup_addr = 0;
 
+//----------------------------------------------------------------------
+// task_peek
+//
+// Reads memory from this tasks address space. This callback is needed
+// by the code that iterates through all of the malloc blocks to read
+// the memory in this process.
+//----------------------------------------------------------------------
 static kern_return_t
 task_peek (task_t task, vm_address_t remote_address, vm_size_t size, void **local_memory)
 {
@@ -121,7 +137,6 @@
 static const void
 foreach_zone_in_this_process (range_callback_info_t *info)
 {
-    //printf ("foreach_zone_in_this_process ( info->zone_callback = %p, info->range_callback = %p, info->baton = %p)", info->zone_callback, info->range_callback, info->baton);
     if (info == NULL || info->zone_callback == NULL)
         return;
 
@@ -138,8 +153,14 @@
     }
 }
 
+//----------------------------------------------------------------------
+// dump_malloc_block_callback
+//
+// A simple callback that will dump each malloc block and all available
+// info from the enumeration callback perpective.
+//----------------------------------------------------------------------
 static void
-range_callback (task_t task, void *baton, unsigned type, uint64_t ptr_addr, uint64_t ptr_size)
+dump_malloc_block_callback (task_t task, void *baton, unsigned type, uint64_t ptr_addr, uint64_t ptr_size)
 {
     printf ("task = 0x%4.4x: baton = %p, type = %u, ptr_addr = 0x%llx + 0x%llu\n", task, baton, type, ptr_addr, ptr_size);
 }
@@ -168,89 +189,77 @@
                                       ranges_callback);    
 }
 
-const void
-foreach_range_in_this_process (range_callback_t *callback, void *baton)
-{
-    range_callback_info_t info = { enumerate_range_in_zone, callback ? callback : range_callback, baton };
-    foreach_zone_in_this_process (&info);
-}
-
-
 static void
-range_contains_ptr_callback (task_t task, void *baton, unsigned type, uint64_t ptr_addr, uint64_t ptr_size)
+range_info_callback (task_t task, void *baton, unsigned type, uint64_t ptr_addr, uint64_t ptr_size)
 {
-    uint8_t *data = NULL;
-    range_contains_data_callback_info_t *data_info = (range_contains_data_callback_info_t *)baton;
-    if (data_info->data_len <= 0)
+    const uint64_t end_addr = ptr_addr + ptr_size;
+    
+    range_contains_data_callback_info_t *info = (range_contains_data_callback_info_t *)baton;
+    switch (info->type)
     {
-        printf ("error: invalid data size: %zu\n", data_info->data_len);
-    }
-    else if (data_info->data_len > ptr_size)
-    {
-        // This block is too short to contain the data we are looking for...
-        return;
-    }
-    else if (task_peek (task, ptr_addr, ptr_size, (void **)&data) == KERN_SUCCESS)
-    {
-        assert (data);
-        const uint64_t end_addr = ptr_addr + ptr_size;
-        for (uint64_t addr = ptr_addr; 
-             addr < end_addr && ((end_addr - addr) >= data_info->data_len);
-             addr += data_info->align, data += data_info->align)
+    case eDataTypeAddress:
+        if (ptr_addr <= info->addr && info->addr < end_addr)
         {
-            if (memcmp (data_info->data, data, data_info->data_len) == 0)
+            ++info->match_count;
+            malloc_match match = { (void *)ptr_addr, ptr_size, info->addr - ptr_addr };
+            g_matches.push_back(match);            
+        }
+        break;
+    
+    case eDataTypeContainsData:
+        {
+            const uint32_t size = info->data.size;
+            if (size < ptr_size) // Make sure this block can contain this data
             {
-                ++data_info->match_count;
-                malloc_match match = { (void *)ptr_addr, ptr_size, addr - ptr_addr };
-                g_matches.push_back(match);
-                // printf ("0x%llx: ", addr);
-                // uint32_t i;
-                // switch (data_info->data_type)
-                // {
-                // case eDataTypeInteger:
-                //     {
-                //         // NOTE: little endian specific, but all darwin platforms are little endian now..
-                //         for (i=0; i<data_info->data_len; ++i)
-                //             printf (i ? "%2.2x" : "0x%2.2x", data[data_info->data_len - (i + 1)]);
-                //     }
-                //     break;
-                // case eDataTypeBytes:
-                //     {
-                //         for (i=0; i<data_info->data_len; ++i)
-                //             printf (" %2.2x", data[i]);
-                //     }
-                //     break;
-                // case eDataTypeCStr:
-                //     {
-                //         putchar ('"');
-                //         for (i=0; i<data_info->data_len; ++i)
-                //         {
-                //             if (isprint (data[i]))
-                //                 putchar (data[i]);
-                //             else
-                //                 printf ("\\x%2.2x", data[i]);
-                //         }
-                //         putchar ('"');
-                //     }
-                //     break;
-                //     
-                // }
-                // printf (" found in malloc block 0x%llx + %llu (malloc_size = %llu)\n", ptr_addr, addr - ptr_addr, ptr_size);
+                uint8_t *ptr_data = NULL;
+                if (task_peek (task, ptr_addr, ptr_size, (void **)&ptr_data) == KERN_SUCCESS)
+                {
+                    const void *buffer = info->data.buffer;
+                    assert (ptr_data);
+                    const uint32_t align = info->data.align;
+                    for (uint64_t addr = ptr_addr; 
+                         addr < end_addr && ((end_addr - addr) >= size);
+                         addr += align, ptr_data += align)
+                    {
+                        if (memcmp (buffer, ptr_data, size) == 0)
+                        {
+                            ++info->match_count;
+                            malloc_match match = { (void *)ptr_addr, ptr_size, addr - ptr_addr };
+                            g_matches.push_back(match);
+                        }
+                    }
+                }
+                else
+                {
+                    printf ("0x%llx: error: couldn't read %llu bytes\n", ptr_addr, ptr_size);
+                }   
             }
         }
+        break;
     }
-    else
-    {
-        printf ("0x%llx: error: couldn't read %llu bytes\n", ptr_addr, ptr_size);
-    }   
 }
 
+//----------------------------------------------------------------------
+// find_pointer_in_heap
+//
+// Finds a pointer value inside one or more currently valid malloc
+// blocks.
+//----------------------------------------------------------------------
 malloc_match *
-find_pointer_in_heap (intptr_t addr)
+find_pointer_in_heap (const void * addr)
 {
     g_matches.clear();
-    range_contains_data_callback_info_t data_info = { (uint8_t *)&addr, sizeof(addr), sizeof(addr), eDataTypeInteger, 0};
-    range_callback_info_t info = { enumerate_range_in_zone, range_contains_ptr_callback, &data_info };
+    // Setup "info" to look for a malloc block that contains data
+    // that is the a pointer 
+    range_contains_data_callback_info_t data_info;
+    data_info.type = eDataTypeContainsData;      // Check each block for data
+    g_lookup_addr = addr;
+    data_info.data.buffer = (uint8_t *)&addr;    // What data? The pointer value passed in
+    data_info.data.size = sizeof(addr);          // How many bytes? The byte size of a pointer
+    data_info.data.align = sizeof(addr);         // Align to a pointer byte size
+    data_info.match_count = 0;                   // Initialize the match count to zero
+    data_info.done = false;                      // Set done to false so searching doesn't stop
+    range_callback_info_t info = { enumerate_range_in_zone, range_info_callback, &data_info };
     foreach_zone_in_this_process (&info);
     if (g_matches.empty())
         return NULL;
@@ -260,24 +269,61 @@
 }
 
 
+//----------------------------------------------------------------------
+// find_cstring_in_heap
+//
+// Finds a C string inside one or more currently valid malloc blocks.
+//----------------------------------------------------------------------
 malloc_match *
 find_cstring_in_heap (const char *s)
 {
-    if (s && s[0])
-    {
-        g_matches.clear();
-        range_contains_data_callback_info_t data_info = { (uint8_t *)s, strlen(s), 1, eDataTypeCStr, 0};
-        range_callback_info_t info = { enumerate_range_in_zone, range_contains_ptr_callback, &data_info };
-        foreach_zone_in_this_process (&info);
-        if (g_matches.empty())
-            return NULL;
-        malloc_match match = { NULL, 0, 0 };
-        g_matches.push_back(match);
-        return g_matches.data();
-    }
-    else
+    g_matches.clear();
+    if (s == NULL || s[0] == '\0')
     {
         printf ("error: invalid argument (empty cstring)\n");
+        return NULL;
     }
-    return 0;
+    // Setup "info" to look for a malloc block that contains data
+    // that is the C string passed in aligned on a 1 byte boundary
+    range_contains_data_callback_info_t data_info;
+    data_info.type = eDataTypeContainsData;  // Check each block for data
+    g_lookup_addr = s;               // If an expression was used, then fill in the resolved address we are looking up
+    data_info.data.buffer = (uint8_t *)s;    // What data? The C string passed in
+    data_info.data.size = strlen(s);         // How many bytes? The length of the C string
+    data_info.data.align = 1;                // Data doesn't need to be aligned, so set the alignment to 1
+    data_info.match_count = 0;               // Initialize the match count to zero
+    data_info.done = false;                  // Set done to false so searching doesn't stop
+    range_callback_info_t info = { enumerate_range_in_zone, range_info_callback, &data_info };
+    foreach_zone_in_this_process (&info);
+    if (g_matches.empty())
+        return NULL;
+    malloc_match match = { NULL, 0, 0 };
+    g_matches.push_back(match);
+    return g_matches.data();
+}
+
+//----------------------------------------------------------------------
+// find_block_for_address
+//
+// Find the malloc block that whose address range contains "addr".
+//----------------------------------------------------------------------
+malloc_match *
+find_block_for_address (const void *addr)
+{
+    g_matches.clear();
+    // Setup "info" to look for a malloc block that contains data
+    // that is the C string passed in aligned on a 1 byte boundary
+    range_contains_data_callback_info_t data_info;
+    g_lookup_addr = addr;               // If an expression was used, then fill in the resolved address we are looking up
+    data_info.type = eDataTypeAddress;  // Check each block to see if the block contains the address passed in
+    data_info.addr = (uintptr_t)addr;   // What data? The C string passed in
+    data_info.match_count = 0;          // Initialize the match count to zero
+    data_info.done = false;             // Set done to false so searching doesn't stop
+    range_callback_info_t info = { enumerate_range_in_zone, range_info_callback, &data_info };
+    foreach_zone_in_this_process (&info);
+    if (g_matches.empty())
+        return NULL;
+    malloc_match match = { NULL, 0, 0 };
+    g_matches.push_back(match);
+    return g_matches.data();
 }





More information about the lldb-commits mailing list